aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-08-15 14:11:45 -0400
committerSage Weil <sage@inktank.com>2013-08-15 14:11:45 -0400
commitee3e542fec6e69bc9fb668698889a37d93950ddf (patch)
treee74ee766a4764769ef1d3d45d266b4dea64101d3 /arch/powerpc
parentfe2a801b50c0bb8039d627e5ae1fec249d10ff39 (diff)
parentf1d6e17f540af37bb1891480143669ba7636c4cf (diff)
Merge remote-tracking branch 'linus/master' into testing
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig23
-rw-r--r--arch/powerpc/Kconfig.debug14
-rw-r--r--arch/powerpc/boot/dts/currituck.dts5
-rw-r--r--arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi156
-rw-r--r--arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi45
-rw-r--r--arch/powerpc/configs/c2k_defconfig2
-rw-r--r--arch/powerpc/configs/g5_defconfig2
-rw-r--r--arch/powerpc/configs/maple_defconfig2
-rw-r--r--arch/powerpc/configs/mpc512x_defconfig27
-rw-r--r--arch/powerpc/configs/mpc85xx_smp_defconfig1
-rw-r--r--arch/powerpc/configs/pmac32_defconfig2
-rw-r--r--arch/powerpc/configs/ppc64_defconfig4
-rw-r--r--arch/powerpc/configs/ppc64e_defconfig2
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig2
-rw-r--r--arch/powerpc/configs/pseries_defconfig3
-rw-r--r--arch/powerpc/include/asm/eeh.h66
-rw-r--r--arch/powerpc/include/asm/eeh_event.h2
-rw-r--r--arch/powerpc/include/asm/exception-64s.h8
-rw-r--r--arch/powerpc/include/asm/hugetlb.h8
-rw-r--r--arch/powerpc/include/asm/hw_irq.h7
-rw-r--r--arch/powerpc/include/asm/ibmebus.h4
-rw-r--r--arch/powerpc/include/asm/iommu.h33
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h58
-rw-r--r--arch/powerpc/include/asm/lppaca.h3
-rw-r--r--arch/powerpc/include/asm/machdep.h11
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h14
-rw-r--r--arch/powerpc/include/asm/mmu_context.h2
-rw-r--r--arch/powerpc/include/asm/module.h5
-rw-r--r--arch/powerpc/include/asm/mpc5121.h1
-rw-r--r--arch/powerpc/include/asm/mpc52xx_psc.h49
-rw-r--r--arch/powerpc/include/asm/mpic.h5
-rw-r--r--arch/powerpc/include/asm/mpic_timer.h46
-rw-r--r--arch/powerpc/include/asm/mutex.h10
-rw-r--r--arch/powerpc/include/asm/opal.h140
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h1
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/include/asm/pgalloc-64.h6
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64-64k.h3
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h241
-rw-r--r--arch/powerpc/include/asm/pgtable.h9
-rw-r--r--arch/powerpc/include/asm/probes.h25
-rw-r--r--arch/powerpc/include/asm/processor.h20
-rw-r--r--arch/powerpc/include/asm/reg.h43
-rw-r--r--arch/powerpc/include/asm/rtas.h4
-rw-r--r--arch/powerpc/include/asm/smp.h4
-rw-r--r--arch/powerpc/include/asm/switch_to.h23
-rw-r--r--arch/powerpc/include/asm/tlbflush.h3
-rw-r--r--arch/powerpc/include/asm/uaccess.h16
-rw-r--r--arch/powerpc/include/asm/vdso.h2
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild1
-rw-r--r--arch/powerpc/include/uapi/asm/perf_event.h18
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h2
-rw-r--r--arch/powerpc/kernel/Makefile4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c10
-rw-r--r--arch/powerpc/kernel/cacheinfo.c36
-rw-r--r--arch/powerpc/kernel/cputable.c20
-rw-r--r--arch/powerpc/kernel/crash_dump.c10
-rw-r--r--arch/powerpc/kernel/eeh.c (renamed from arch/powerpc/platforms/pseries/eeh.c)266
-rw-r--r--arch/powerpc/kernel/eeh_cache.c (renamed from arch/powerpc/platforms/pseries/eeh_cache.c)23
-rw-r--r--arch/powerpc/kernel/eeh_dev.c (renamed from arch/powerpc/platforms/pseries/eeh_dev.c)0
-rw-r--r--arch/powerpc/kernel/eeh_driver.c (renamed from arch/powerpc/platforms/pseries/eeh_driver.c)246
-rw-r--r--arch/powerpc/kernel/eeh_event.c (renamed from arch/powerpc/platforms/pseries/eeh_event.c)134
-rw-r--r--arch/powerpc/kernel/eeh_pe.c (renamed from arch/powerpc/platforms/pseries/eeh_pe.c)255
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c (renamed from arch/powerpc/platforms/pseries/eeh_sysfs.c)22
-rw-r--r--arch/powerpc/kernel/entry_64.S66
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S57
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c3
-rw-r--r--arch/powerpc/kernel/ibmebus.c22
-rw-r--r--arch/powerpc/kernel/idle.c4
-rw-r--r--arch/powerpc/kernel/io-workarounds.c11
-rw-r--r--arch/powerpc/kernel/iommu.c323
-rw-r--r--arch/powerpc/kernel/irq.c4
-rw-r--r--arch/powerpc/kernel/kprobes.c20
-rw-r--r--arch/powerpc/kernel/kvm.c9
-rw-r--r--arch/powerpc/kernel/nvram_64.c20
-rw-r--r--arch/powerpc/kernel/pci-common.c2
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c110
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c61
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c20
-rw-r--r--arch/powerpc/kernel/process.c14
-rw-r--r--arch/powerpc/kernel/prom.c42
-rw-r--r--arch/powerpc/kernel/prom_init.c5
-rw-r--r--arch/powerpc/kernel/ptrace.c30
-rw-r--r--arch/powerpc/kernel/reloc_32.S3
-rw-r--r--arch/powerpc/kernel/rtas.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/powerpc/kernel/signal_32.c70
-rw-r--r--arch/powerpc/kernel/signal_64.c8
-rw-r--r--arch/powerpc/kernel/smp.c12
-rw-r--r--arch/powerpc/kernel/sysfs.c6
-rw-r--r--arch/powerpc/kernel/time.c1
-rw-r--r--arch/powerpc/kernel/tm.S38
-rw-r--r--arch/powerpc/kernel/traps.c93
-rw-r--r--arch/powerpc/kernel/udbg.c2
-rw-r--r--arch/powerpc/kernel/vdso.c2
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S3
-rw-r--r--arch/powerpc/kvm/Makefile13
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c81
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c23
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c8
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S13
-rw-r--r--arch/powerpc/kvm/book3s_hv.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c14
-rw-r--r--arch/powerpc/kvm/book3s_pr.c8
-rw-r--r--arch/powerpc/kvm/booke.c2
-rw-r--r--arch/powerpc/kvm/emulate.c3
-rw-r--r--arch/powerpc/lib/sstep.c2
-rw-r--r--arch/powerpc/math-emu/Makefile3
-rw-r--r--arch/powerpc/math-emu/fre.c11
-rw-r--r--arch/powerpc/math-emu/frsqrtes.c11
-rw-r--r--arch/powerpc/math-emu/math.c14
-rw-r--r--arch/powerpc/mm/44x_mmu.c6
-rw-r--r--arch/powerpc/mm/Makefile8
-rw-r--r--arch/powerpc/mm/gup.c18
-rw-r--r--arch/powerpc/mm/hash_low_64.S21
-rw-r--r--arch/powerpc/mm/hash_native_64.c207
-rw-r--r--arch/powerpc/mm/hash_utils_64.c67
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c175
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c2
-rw-r--r--arch/powerpc/mm/hugetlbpage.c301
-rw-r--r--arch/powerpc/mm/init_64.c9
-rw-r--r--arch/powerpc/mm/mem.c63
-rw-r--r--arch/powerpc/mm/mmap.c (renamed from arch/powerpc/mm/mmap_64.c)2
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c15
-rw-r--r--arch/powerpc/mm/numa.c71
-rw-r--r--arch/powerpc/mm/pgtable.c8
-rw-r--r--arch/powerpc/mm/pgtable_64.c414
-rw-r--r--arch/powerpc/mm/subpage-prot.c48
-rw-r--r--arch/powerpc/mm/tlb_hash64.c40
-rw-r--r--arch/powerpc/mm/tlb_nohash.c2
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c19
-rw-r--r--arch/powerpc/perf/core-book3s.c206
-rw-r--r--arch/powerpc/perf/power7-pmu.c85
-rw-r--r--arch/powerpc/perf/power8-pmu.c86
-rw-r--r--arch/powerpc/platforms/44x/currituck.c43
-rw-r--r--arch/powerpc/platforms/44x/iss4xx.c4
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads.c6
-rw-r--r--arch/powerpc/platforms/512x/mpc512x.h12
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_generic.c4
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c31
-rw-r--r--arch/powerpc/platforms/512x/pdm360ng.c4
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c12
-rw-r--r--arch/powerpc/platforms/85xx/p5020_ds.c5
-rw-r--r--arch/powerpc/platforms/85xx/p5040_ds.c5
-rw-r--r--arch/powerpc/platforms/85xx/smp.c6
-rw-r--r--arch/powerpc/platforms/85xx/t4240_qds.c5
-rw-r--r--arch/powerpc/platforms/8xx/m8xx_setup.c14
-rw-r--r--arch/powerpc/platforms/Kconfig57
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype2
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c16
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.c2
-rw-r--r--arch/powerpc/platforms/cell/smp.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c2
-rw-r--r--arch/powerpc/platforms/pasemi/Makefile1
-rw-r--r--arch/powerpc/platforms/pasemi/cpufreq.c330
-rw-r--r--arch/powerpc/platforms/powermac/Makefile2
-rw-r--r--arch/powerpc/platforms/powermac/cpufreq_32.c721
-rw-r--r--arch/powerpc/platforms/powermac/cpufreq_64.c746
-rw-r--r--arch/powerpc/platforms/powermac/smp.c4
-rw-r--r--arch/powerpc/platforms/powernv/Makefile1
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c916
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c390
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S3
-rw-r--r--arch/powerpc/platforms/powernv/opal.c69
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c64
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c11
-rw-r--r--arch/powerpc/platforms/powernv/pci.c139
-rw-r--r--arch/powerpc/platforms/powernv/pci.h35
-rw-r--r--arch/powerpc/platforms/powernv/setup.c4
-rw-r--r--arch/powerpc/platforms/powernv/smp.c4
-rw-r--r--arch/powerpc/platforms/ps3/htab.c5
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig6
-rw-r--r--arch/powerpc/platforms/pseries/Makefile4
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c67
-rw-r--r--arch/powerpc/platforms/pseries/io_event_irq.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c4
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c144
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c546
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c85
-rw-r--r--arch/powerpc/platforms/pseries/ras.c11
-rw-r--r--arch/powerpc/platforms/pseries/smp.c2
-rwxr-xr-xarch/powerpc/relocs_check.pl10
-rw-r--r--arch/powerpc/sysdev/Makefile2
-rw-r--r--arch/powerpc/sysdev/cpm1.c1
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c161
-rw-r--r--arch/powerpc/sysdev/mpic.c58
-rw-r--r--arch/powerpc/sysdev/mpic_timer.c593
188 files changed, 7369 insertions, 3373 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c33e3ad2c8fd..dbd9d3c991e8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -138,6 +138,7 @@ config PPC
138 select ARCH_USE_BUILTIN_BSWAP 138 select ARCH_USE_BUILTIN_BSWAP
139 select OLD_SIGSUSPEND 139 select OLD_SIGSUSPEND
140 select OLD_SIGACTION if PPC32 140 select OLD_SIGACTION if PPC32
141 select HAVE_DEBUG_STACKOVERFLOW
141 142
142config EARLY_PRINTK 143config EARLY_PRINTK
143 bool 144 bool
@@ -298,7 +299,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE
298 299
299config MATH_EMULATION 300config MATH_EMULATION
300 bool "Math emulation" 301 bool "Math emulation"
301 depends on 4xx || 8xx || E200 || PPC_MPC832x || E500 302 depends on 4xx || 8xx || PPC_MPC832x || BOOKE
302 ---help--- 303 ---help---
303 Some PowerPC chips designed for embedded applications do not have 304 Some PowerPC chips designed for embedded applications do not have
304 a floating-point unit and therefore do not implement the 305 a floating-point unit and therefore do not implement the
@@ -307,6 +308,10 @@ config MATH_EMULATION
307 unit, which will allow programs that use floating-point 308 unit, which will allow programs that use floating-point
308 instructions to run. 309 instructions to run.
309 310
311 This is also useful to emulate missing (optional) instructions
312 such as fsqrt on cores that do have an FPU but do not implement
313 them (such as Freescale BookE).
314
310config PPC_TRANSACTIONAL_MEM 315config PPC_TRANSACTIONAL_MEM
311 bool "Transactional Memory support for POWERPC" 316 bool "Transactional Memory support for POWERPC"
312 depends on PPC_BOOK3S_64 317 depends on PPC_BOOK3S_64
@@ -315,17 +320,6 @@ config PPC_TRANSACTIONAL_MEM
315 ---help--- 320 ---help---
316 Support user-mode Transactional Memory on POWERPC. 321 Support user-mode Transactional Memory on POWERPC.
317 322
318config 8XX_MINIMAL_FPEMU
319 bool "Minimal math emulation for 8xx"
320 depends on 8xx && !MATH_EMULATION
321 help
322 Older arch/ppc kernels still emulated a few floating point
323 instructions such as load and store, even when full math
324 emulation is disabled. Say "Y" here if you want to preserve
325 this behavior.
326
327 It is recommended that you build a soft-float userspace instead.
328
329config IOMMU_HELPER 323config IOMMU_HELPER
330 def_bool PPC64 324 def_bool PPC64
331 325
@@ -341,7 +335,7 @@ config SWIOTLB
341 335
342config HOTPLUG_CPU 336config HOTPLUG_CPU
343 bool "Support for enabling/disabling CPUs" 337 bool "Support for enabling/disabling CPUs"
344 depends on SMP && HOTPLUG && (PPC_PSERIES || \ 338 depends on SMP && (PPC_PSERIES || \
345 PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC)) 339 PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC))
346 ---help--- 340 ---help---
347 Say Y here to be able to disable and re-enable individual 341 Say Y here to be able to disable and re-enable individual
@@ -572,7 +566,7 @@ config SCHED_SMT
572config PPC_DENORMALISATION 566config PPC_DENORMALISATION
573 bool "PowerPC denormalisation exception handling" 567 bool "PowerPC denormalisation exception handling"
574 depends on PPC_BOOK3S_64 568 depends on PPC_BOOK3S_64
575 default "n" 569 default "y" if PPC_POWERNV
576 ---help--- 570 ---help---
577 Add support for handling denormalisation of single precision 571 Add support for handling denormalisation of single precision
578 values. Useful for bare metal only. If unsure say Y here. 572 values. Useful for bare metal only. If unsure say Y here.
@@ -674,7 +668,6 @@ config SBUS
674 668
675config FSL_SOC 669config FSL_SOC
676 bool 670 bool
677 select HAVE_CAN_FLEXCAN if NET && CAN
678 671
679config FSL_PCI 672config FSL_PCI
680 bool 673 bool
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 863d877e0b5f..21c9f304e96c 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -28,13 +28,6 @@ config PRINT_STACK_DEPTH
28 too small and stack traces cause important information to 28 too small and stack traces cause important information to
29 scroll off the screen. 29 scroll off the screen.
30 30
31config DEBUG_STACKOVERFLOW
32 bool "Check for stack overflows"
33 depends on DEBUG_KERNEL
34 help
35 This option will cause messages to be printed if free stack space
36 drops below a certain limit.
37
38config HCALL_STATS 31config HCALL_STATS
39 bool "Hypervisor call instrumentation" 32 bool "Hypervisor call instrumentation"
40 depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS 33 depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
@@ -147,6 +140,13 @@ choice
147 enable debugging for the wrong type of machine your kernel 140 enable debugging for the wrong type of machine your kernel
148 _will not boot_. 141 _will not boot_.
149 142
143config PPC_EARLY_DEBUG_BOOTX
144 bool "BootX or OpenFirmware"
145 depends on BOOTX_TEXT
146 help
147 Select this to enable early debugging for a machine using BootX
148 or OpenFirmware.
149
150config PPC_EARLY_DEBUG_LPAR 150config PPC_EARLY_DEBUG_LPAR
151 bool "LPAR HV Console" 151 bool "LPAR HV Console"
152 depends on PPC_PSERIES 152 depends on PPC_PSERIES
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts
index b801dd06e573..d2c8a872308e 100644
--- a/arch/powerpc/boot/dts/currituck.dts
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -103,6 +103,11 @@
103 interrupts = <34 2>; 103 interrupts = <34 2>;
104 }; 104 };
105 105
106 FPGA0: fpga@50000000 {
107 compatible = "ibm,currituck-fpga";
108 reg = <0x50000000 0x4>;
109 };
110
106 IIC0: i2c@00000000 { 111 IIC0: i2c@00000000 {
107 compatible = "ibm,iic-currituck", "ibm,iic"; 112 compatible = "ibm,iic-currituck", "ibm,iic";
108 reg = <0x0 0x00000014>; 113 reg = <0x0 0x00000014>;
diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi
new file mode 100644
index 000000000000..9cffccf4e07e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi
@@ -0,0 +1,156 @@
1/* T4240 Interlaken LAC Portal device tree stub with 24 portals.
2 *
3 * Copyright 2012 Freescale Semiconductor Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Freescale Semiconductor nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 *
17 * ALTERNATIVELY, this software may be distributed under the terms of the
18 * GNU General Public License ("GPL") as published by the Free Software
19 * Foundation, either version 2 of that License or (at your option) any
20 * later version.
21 *
22 * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
23 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
26 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
29 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#address-cells = <0x1>;
35#size-cells = <0x1>;
36compatible = "fsl,interlaken-lac-portals";
37
38lportal0: lac-portal@0 {
39 compatible = "fsl,interlaken-lac-portal-v1.0";
40 reg = <0x0 0x1000>;
41};
42
43lportal1: lac-portal@1000 {
44 compatible = "fsl,interlaken-lac-portal-v1.0";
45 reg = <0x1000 0x1000>;
46};
47
48lportal2: lac-portal@2000 {
49 compatible = "fsl,interlaken-lac-portal-v1.0";
50 reg = <0x2000 0x1000>;
51};
52
53lportal3: lac-portal@3000 {
54 compatible = "fsl,interlaken-lac-portal-v1.0";
55 reg = <0x3000 0x1000>;
56};
57
58lportal4: lac-portal@4000 {
59 compatible = "fsl,interlaken-lac-portal-v1.0";
60 reg = <0x4000 0x1000>;
61};
62
63lportal5: lac-portal@5000 {
64 compatible = "fsl,interlaken-lac-portal-v1.0";
65 reg = <0x5000 0x1000>;
66};
67
68lportal6: lac-portal@6000 {
69 compatible = "fsl,interlaken-lac-portal-v1.0";
70 reg = <0x6000 0x1000>;
71};
72
73lportal7: lac-portal@7000 {
74 compatible = "fsl,interlaken-lac-portal-v1.0";
75 reg = <0x7000 0x1000>;
76};
77
78lportal8: lac-portal@8000 {
79 compatible = "fsl,interlaken-lac-portal-v1.0";
80 reg = <0x8000 0x1000>;
81};
82
83lportal9: lac-portal@9000 {
84 compatible = "fsl,interlaken-lac-portal-v1.0";
85 reg = <0x9000 0x1000>;
86};
87
88lportal10: lac-portal@A000 {
89 compatible = "fsl,interlaken-lac-portal-v1.0";
90 reg = <0xA000 0x1000>;
91};
92
93lportal11: lac-portal@B000 {
94 compatible = "fsl,interlaken-lac-portal-v1.0";
95 reg = <0xB000 0x1000>;
96};
97
98lportal12: lac-portal@C000 {
99 compatible = "fsl,interlaken-lac-portal-v1.0";
100 reg = <0xC000 0x1000>;
101};
102
103lportal13: lac-portal@D000 {
104 compatible = "fsl,interlaken-lac-portal-v1.0";
105 reg = <0xD000 0x1000>;
106};
107
108lportal14: lac-portal@E000 {
109 compatible = "fsl,interlaken-lac-portal-v1.0";
110 reg = <0xE000 0x1000>;
111};
112
113lportal15: lac-portal@F000 {
114 compatible = "fsl,interlaken-lac-portal-v1.0";
115 reg = <0xF000 0x1000>;
116};
117
118lportal16: lac-portal@10000 {
119 compatible = "fsl,interlaken-lac-portal-v1.0";
120 reg = <0x10000 0x1000>;
121};
122
123lportal17: lac-portal@11000 {
124 compatible = "fsl,interlaken-lac-portal-v1.0";
125 reg = <0x11000 0x1000>;
126};
127
128lportal18: lac-portal@1200 {
129 compatible = "fsl,interlaken-lac-portal-v1.0";
130 reg = <0x12000 0x1000>;
131};
132
133lportal19: lac-portal@13000 {
134 compatible = "fsl,interlaken-lac-portal-v1.0";
135 reg = <0x13000 0x1000>;
136};
137
138lportal20: lac-portal@14000 {
139 compatible = "fsl,interlaken-lac-portal-v1.0";
140 reg = <0x14000 0x1000>;
141};
142
143lportal21: lac-portal@15000 {
144 compatible = "fsl,interlaken-lac-portal-v1.0";
145 reg = <0x15000 0x1000>;
146};
147
148lportal22: lac-portal@16000 {
149 compatible = "fsl,interlaken-lac-portal-v1.0";
150 reg = <0x16000 0x1000>;
151};
152
153lportal23: lac-portal@17000 {
154 compatible = "fsl,interlaken-lac-portal-v1.0";
155 reg = <0x17000 0x1000>;
156};
diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi
new file mode 100644
index 000000000000..e8208720ac0e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi
@@ -0,0 +1,45 @@
1/*
2 * T4 Interlaken Look-aside Controller (LAC) device tree stub
3 *
4 * Copyright 2012 Freescale Semiconductor Inc.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Freescale Semiconductor nor the
14 * names of its contributors may be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 *
18 * ALTERNATIVELY, this software may be distributed under the terms of the
19 * GNU General Public License ("GPL") as published by the Free Software
20 * Foundation, either version 2 of that License or (at your option) any
21 * later version.
22 *
23 * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
24 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
27 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
30 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35lac: lac@229000 {
36 compatible = "fsl,interlaken-lac";
37 reg = <0x229000 0x1000>;
38 interrupts = <16 2 1 18>;
39};
40
41lac-hv@228000 {
42 compatible = "fsl,interlaken-lac-hv";
43 reg = <0x228000 0x1000>;
44 fsl,non-hv-node = <&lac>;
45};
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
index 2a84fd7f631c..671a8f960afa 100644
--- a/arch/powerpc/configs/c2k_defconfig
+++ b/arch/powerpc/configs/c2k_defconfig
@@ -423,6 +423,8 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
423CONFIG_DEBUG_STACKOVERFLOW=y 423CONFIG_DEBUG_STACKOVERFLOW=y
424CONFIG_DEBUG_STACK_USAGE=y 424CONFIG_DEBUG_STACK_USAGE=y
425CONFIG_BOOTX_TEXT=y 425CONFIG_BOOTX_TEXT=y
426CONFIG_PPC_EARLY_DEBUG=y
427CONFIG_PPC_EARLY_DEBUG_BOOTX=y
426CONFIG_KEYS=y 428CONFIG_KEYS=y
427CONFIG_KEYS_DEBUG_PROC_KEYS=y 429CONFIG_KEYS_DEBUG_PROC_KEYS=y
428CONFIG_SECURITY=y 430CONFIG_SECURITY=y
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index 07b7f2af2dca..1ea22fc24ea8 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -284,6 +284,8 @@ CONFIG_DEBUG_MUTEXES=y
284CONFIG_LATENCYTOP=y 284CONFIG_LATENCYTOP=y
285CONFIG_SYSCTL_SYSCALL_CHECK=y 285CONFIG_SYSCTL_SYSCALL_CHECK=y
286CONFIG_BOOTX_TEXT=y 286CONFIG_BOOTX_TEXT=y
287CONFIG_PPC_EARLY_DEBUG=y
288CONFIG_PPC_EARLY_DEBUG_BOOTX=y
287CONFIG_CRYPTO_NULL=m 289CONFIG_CRYPTO_NULL=m
288CONFIG_CRYPTO_TEST=m 290CONFIG_CRYPTO_TEST=m
289CONFIG_CRYPTO_ECB=m 291CONFIG_CRYPTO_ECB=m
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig
index 02ac96b679b8..2a5afac29861 100644
--- a/arch/powerpc/configs/maple_defconfig
+++ b/arch/powerpc/configs/maple_defconfig
@@ -138,6 +138,8 @@ CONFIG_DEBUG_STACK_USAGE=y
138CONFIG_XMON=y 138CONFIG_XMON=y
139CONFIG_XMON_DEFAULT=y 139CONFIG_XMON_DEFAULT=y
140CONFIG_BOOTX_TEXT=y 140CONFIG_BOOTX_TEXT=y
141CONFIG_PPC_EARLY_DEBUG=y
142CONFIG_PPC_EARLY_DEBUG_BOOTX=y
141CONFIG_CRYPTO_ECB=m 143CONFIG_CRYPTO_ECB=m
142CONFIG_CRYPTO_PCBC=m 144CONFIG_CRYPTO_PCBC=m
143# CONFIG_CRYPTO_ANSI_CPRNG is not set 145# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index 0d0d981442fd..ee853a1b1b2c 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -1,7 +1,6 @@
1CONFIG_EXPERIMENTAL=y
2# CONFIG_SWAP is not set 1# CONFIG_SWAP is not set
3CONFIG_SYSVIPC=y 2CONFIG_SYSVIPC=y
4CONFIG_SPARSE_IRQ=y 3CONFIG_NO_HZ=y
5CONFIG_LOG_BUF_SHIFT=16 4CONFIG_LOG_BUF_SHIFT=16
6CONFIG_BLK_DEV_INITRD=y 5CONFIG_BLK_DEV_INITRD=y
7# CONFIG_COMPAT_BRK is not set 6# CONFIG_COMPAT_BRK is not set
@@ -9,6 +8,7 @@ CONFIG_SLAB=y
9CONFIG_MODULES=y 8CONFIG_MODULES=y
10CONFIG_MODULE_UNLOAD=y 9CONFIG_MODULE_UNLOAD=y
11# CONFIG_BLK_DEV_BSG is not set 10# CONFIG_BLK_DEV_BSG is not set
11CONFIG_PARTITION_ADVANCED=y
12# CONFIG_IOSCHED_CFQ is not set 12# CONFIG_IOSCHED_CFQ is not set
13# CONFIG_PPC_CHRP is not set 13# CONFIG_PPC_CHRP is not set
14CONFIG_PPC_MPC512x=y 14CONFIG_PPC_MPC512x=y
@@ -16,9 +16,7 @@ CONFIG_MPC5121_ADS=y
16CONFIG_MPC512x_GENERIC=y 16CONFIG_MPC512x_GENERIC=y
17CONFIG_PDM360NG=y 17CONFIG_PDM360NG=y
18# CONFIG_PPC_PMAC is not set 18# CONFIG_PPC_PMAC is not set
19CONFIG_NO_HZ=y
20CONFIG_HZ_1000=y 19CONFIG_HZ_1000=y
21# CONFIG_MIGRATION is not set
22# CONFIG_SECCOMP is not set 20# CONFIG_SECCOMP is not set
23# CONFIG_PCI is not set 21# CONFIG_PCI is not set
24CONFIG_NET=y 22CONFIG_NET=y
@@ -33,8 +31,6 @@ CONFIG_IP_PNP=y
33# CONFIG_INET_DIAG is not set 31# CONFIG_INET_DIAG is not set
34# CONFIG_IPV6 is not set 32# CONFIG_IPV6 is not set
35CONFIG_CAN=y 33CONFIG_CAN=y
36CONFIG_CAN_RAW=y
37CONFIG_CAN_BCM=y
38CONFIG_CAN_VCAN=y 34CONFIG_CAN_VCAN=y
39CONFIG_CAN_MSCAN=y 35CONFIG_CAN_MSCAN=y
40CONFIG_CAN_DEBUG_DEVICES=y 36CONFIG_CAN_DEBUG_DEVICES=y
@@ -46,7 +42,6 @@ CONFIG_DEVTMPFS_MOUNT=y
46# CONFIG_FIRMWARE_IN_KERNEL is not set 42# CONFIG_FIRMWARE_IN_KERNEL is not set
47CONFIG_MTD=y 43CONFIG_MTD=y
48CONFIG_MTD_CMDLINE_PARTS=y 44CONFIG_MTD_CMDLINE_PARTS=y
49CONFIG_MTD_CHAR=y
50CONFIG_MTD_BLOCK=y 45CONFIG_MTD_BLOCK=y
51CONFIG_MTD_CFI=y 46CONFIG_MTD_CFI=y
52CONFIG_MTD_CFI_AMDSTD=y 47CONFIG_MTD_CFI_AMDSTD=y
@@ -60,7 +55,6 @@ CONFIG_BLK_DEV_RAM=y
60CONFIG_BLK_DEV_RAM_COUNT=1 55CONFIG_BLK_DEV_RAM_COUNT=1
61CONFIG_BLK_DEV_RAM_SIZE=8192 56CONFIG_BLK_DEV_RAM_SIZE=8192
62CONFIG_BLK_DEV_XIP=y 57CONFIG_BLK_DEV_XIP=y
63CONFIG_MISC_DEVICES=y
64CONFIG_EEPROM_AT24=y 58CONFIG_EEPROM_AT24=y
65CONFIG_EEPROM_AT25=y 59CONFIG_EEPROM_AT25=y
66CONFIG_SCSI=y 60CONFIG_SCSI=y
@@ -68,6 +62,7 @@ CONFIG_SCSI=y
68CONFIG_BLK_DEV_SD=y 62CONFIG_BLK_DEV_SD=y
69CONFIG_CHR_DEV_SG=y 63CONFIG_CHR_DEV_SG=y
70CONFIG_NETDEVICES=y 64CONFIG_NETDEVICES=y
65CONFIG_FS_ENET=y
71CONFIG_MARVELL_PHY=y 66CONFIG_MARVELL_PHY=y
72CONFIG_DAVICOM_PHY=y 67CONFIG_DAVICOM_PHY=y
73CONFIG_QSEMI_PHY=y 68CONFIG_QSEMI_PHY=y
@@ -83,10 +78,6 @@ CONFIG_STE10XP=y
83CONFIG_LSI_ET1011C_PHY=y 78CONFIG_LSI_ET1011C_PHY=y
84CONFIG_FIXED_PHY=y 79CONFIG_FIXED_PHY=y
85CONFIG_MDIO_BITBANG=y 80CONFIG_MDIO_BITBANG=y
86CONFIG_NET_ETHERNET=y
87CONFIG_FS_ENET=y
88# CONFIG_NETDEV_1000 is not set
89# CONFIG_NETDEV_10000 is not set
90# CONFIG_WLAN is not set 81# CONFIG_WLAN is not set
91# CONFIG_INPUT_MOUSEDEV_PSAUX is not set 82# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
92CONFIG_INPUT_EVDEV=y 83CONFIG_INPUT_EVDEV=y
@@ -106,14 +97,18 @@ CONFIG_GPIO_SYSFS=y
106CONFIG_GPIO_MPC8XXX=y 97CONFIG_GPIO_MPC8XXX=y
107# CONFIG_HWMON is not set 98# CONFIG_HWMON is not set
108CONFIG_MEDIA_SUPPORT=y 99CONFIG_MEDIA_SUPPORT=y
109CONFIG_VIDEO_DEV=y
110CONFIG_VIDEO_ADV_DEBUG=y 100CONFIG_VIDEO_ADV_DEBUG=y
111# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set
112CONFIG_VIDEO_SAA711X=y
113CONFIG_FB=y 101CONFIG_FB=y
114CONFIG_FB_FSL_DIU=y 102CONFIG_FB_FSL_DIU=y
115# CONFIG_VGA_CONSOLE is not set 103# CONFIG_VGA_CONSOLE is not set
116CONFIG_FRAMEBUFFER_CONSOLE=y 104CONFIG_FRAMEBUFFER_CONSOLE=y
105CONFIG_USB=y
106CONFIG_USB_EHCI_HCD=y
107CONFIG_USB_EHCI_FSL=y
108# CONFIG_USB_EHCI_HCD_PPC_OF is not set
109CONFIG_USB_STORAGE=y
110CONFIG_USB_GADGET=y
111CONFIG_USB_FSL_USB2=y
117CONFIG_RTC_CLASS=y 112CONFIG_RTC_CLASS=y
118CONFIG_RTC_DRV_M41T80=y 113CONFIG_RTC_DRV_M41T80=y
119CONFIG_RTC_DRV_MPC5121=y 114CONFIG_RTC_DRV_MPC5121=y
@@ -129,9 +124,7 @@ CONFIG_TMPFS=y
129CONFIG_JFFS2_FS=y 124CONFIG_JFFS2_FS=y
130CONFIG_UBIFS_FS=y 125CONFIG_UBIFS_FS=y
131CONFIG_NFS_FS=y 126CONFIG_NFS_FS=y
132CONFIG_NFS_V3=y
133CONFIG_ROOT_NFS=y 127CONFIG_ROOT_NFS=y
134CONFIG_PARTITION_ADVANCED=y
135CONFIG_NLS_CODEPAGE_437=y 128CONFIG_NLS_CODEPAGE_437=y
136CONFIG_NLS_ISO8859_1=y 129CONFIG_NLS_ISO8859_1=y
137# CONFIG_ENABLE_WARN_DEPRECATED is not set 130# CONFIG_ENABLE_WARN_DEPRECATED is not set
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index 165e6b32baef..152fa05b15e4 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -131,6 +131,7 @@ CONFIG_DUMMY=y
131CONFIG_FS_ENET=y 131CONFIG_FS_ENET=y
132CONFIG_UCC_GETH=y 132CONFIG_UCC_GETH=y
133CONFIG_GIANFAR=y 133CONFIG_GIANFAR=y
134CONFIG_E1000E=y
134CONFIG_MARVELL_PHY=y 135CONFIG_MARVELL_PHY=y
135CONFIG_DAVICOM_PHY=y 136CONFIG_DAVICOM_PHY=y
136CONFIG_CICADA_PHY=y 137CONFIG_CICADA_PHY=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 29767a8dfea5..a73626b09051 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -350,6 +350,8 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
350CONFIG_XMON=y 350CONFIG_XMON=y
351CONFIG_XMON_DEFAULT=y 351CONFIG_XMON_DEFAULT=y
352CONFIG_BOOTX_TEXT=y 352CONFIG_BOOTX_TEXT=y
353CONFIG_PPC_EARLY_DEBUG=y
354CONFIG_PPC_EARLY_DEBUG_BOOTX=y
353CONFIG_CRYPTO_NULL=m 355CONFIG_CRYPTO_NULL=m
354CONFIG_CRYPTO_PCBC=m 356CONFIG_CRYPTO_PCBC=m
355CONFIG_CRYPTO_MD4=m 357CONFIG_CRYPTO_MD4=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index aef3f71de5ad..0e8cfd09da2f 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -58,7 +58,7 @@ CONFIG_SCHED_SMT=y
58CONFIG_PPC_DENORMALISATION=y 58CONFIG_PPC_DENORMALISATION=y
59CONFIG_PCCARD=y 59CONFIG_PCCARD=y
60CONFIG_ELECTRA_CF=y 60CONFIG_ELECTRA_CF=y
61CONFIG_HOTPLUG_PCI=m 61CONFIG_HOTPLUG_PCI=y
62CONFIG_HOTPLUG_PCI_RPA=m 62CONFIG_HOTPLUG_PCI_RPA=m
63CONFIG_HOTPLUG_PCI_RPA_DLPAR=m 63CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
64CONFIG_PACKET=y 64CONFIG_PACKET=y
@@ -398,6 +398,8 @@ CONFIG_FTR_FIXUP_SELFTEST=y
398CONFIG_MSI_BITMAP_SELFTEST=y 398CONFIG_MSI_BITMAP_SELFTEST=y
399CONFIG_XMON=y 399CONFIG_XMON=y
400CONFIG_BOOTX_TEXT=y 400CONFIG_BOOTX_TEXT=y
401CONFIG_PPC_EARLY_DEBUG=y
402CONFIG_PPC_EARLY_DEBUG_BOOTX=y
401CONFIG_CRYPTO_NULL=m 403CONFIG_CRYPTO_NULL=m
402CONFIG_CRYPTO_TEST=m 404CONFIG_CRYPTO_TEST=m
403CONFIG_CRYPTO_PCBC=m 405CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 4b20f76172e2..0085dc4642c5 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -32,7 +32,7 @@ CONFIG_IRQ_ALL_CPUS=y
32CONFIG_SPARSEMEM_MANUAL=y 32CONFIG_SPARSEMEM_MANUAL=y
33CONFIG_PCI_MSI=y 33CONFIG_PCI_MSI=y
34CONFIG_PCCARD=y 34CONFIG_PCCARD=y
35CONFIG_HOTPLUG_PCI=m 35CONFIG_HOTPLUG_PCI=y
36CONFIG_PACKET=y 36CONFIG_PACKET=y
37CONFIG_UNIX=y 37CONFIG_UNIX=y
38CONFIG_XFRM_USER=m 38CONFIG_XFRM_USER=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index be1cb6ea3a36..20ebfaf7234b 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -1264,6 +1264,8 @@ CONFIG_DEBUG_STACKOVERFLOW=y
1264CONFIG_DEBUG_STACK_USAGE=y 1264CONFIG_DEBUG_STACK_USAGE=y
1265CONFIG_XMON=y 1265CONFIG_XMON=y
1266CONFIG_BOOTX_TEXT=y 1266CONFIG_BOOTX_TEXT=y
1267CONFIG_PPC_EARLY_DEBUG=y
1268CONFIG_PPC_EARLY_DEBUG_BOOTX=y
1267CONFIG_KEYS=y 1269CONFIG_KEYS=y
1268CONFIG_KEYS_DEBUG_PROC_KEYS=y 1270CONFIG_KEYS_DEBUG_PROC_KEYS=y
1269CONFIG_SECURITY=y 1271CONFIG_SECURITY=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index c4dfbaf8b192..1d4b9763895d 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -53,7 +53,7 @@ CONFIG_PPC_64K_PAGES=y
53CONFIG_PPC_SUBPAGE_PROT=y 53CONFIG_PPC_SUBPAGE_PROT=y
54CONFIG_SCHED_SMT=y 54CONFIG_SCHED_SMT=y
55CONFIG_PPC_DENORMALISATION=y 55CONFIG_PPC_DENORMALISATION=y
56CONFIG_HOTPLUG_PCI=m 56CONFIG_HOTPLUG_PCI=y
57CONFIG_HOTPLUG_PCI_RPA=m 57CONFIG_HOTPLUG_PCI_RPA=m
58CONFIG_HOTPLUG_PCI_RPA_DLPAR=m 58CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
59CONFIG_PACKET=y 59CONFIG_PACKET=y
@@ -296,6 +296,7 @@ CONFIG_SQUASHFS=m
296CONFIG_SQUASHFS_XATTR=y 296CONFIG_SQUASHFS_XATTR=y
297CONFIG_SQUASHFS_LZO=y 297CONFIG_SQUASHFS_LZO=y
298CONFIG_SQUASHFS_XZ=y 298CONFIG_SQUASHFS_XZ=y
299CONFIG_PSTORE=y
299CONFIG_NFS_FS=y 300CONFIG_NFS_FS=y
300CONFIG_NFS_V3_ACL=y 301CONFIG_NFS_V3_ACL=y
301CONFIG_NFS_V4=y 302CONFIG_NFS_V4=y
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index a80e32b46c11..d3e5e9bc8f94 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -24,6 +24,7 @@
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/list.h> 25#include <linux/list.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/time.h>
27 28
28struct pci_dev; 29struct pci_dev;
29struct pci_bus; 30struct pci_bus;
@@ -52,6 +53,9 @@ struct device_node;
52 53
53#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ 54#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */
54#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ 55#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */
56#define EEH_PE_PHB_DEAD (1 << 2) /* Dead PHB */
57
58#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
55 59
56struct eeh_pe { 60struct eeh_pe {
57 int type; /* PE type: PHB/Bus/Device */ 61 int type; /* PE type: PHB/Bus/Device */
@@ -59,8 +63,10 @@ struct eeh_pe {
59 int config_addr; /* Traditional PCI address */ 63 int config_addr; /* Traditional PCI address */
60 int addr; /* PE configuration address */ 64 int addr; /* PE configuration address */
61 struct pci_controller *phb; /* Associated PHB */ 65 struct pci_controller *phb; /* Associated PHB */
66 struct pci_bus *bus; /* Top PCI bus for bus PE */
62 int check_count; /* Times of ignored error */ 67 int check_count; /* Times of ignored error */
63 int freeze_count; /* Times of froze up */ 68 int freeze_count; /* Times of froze up */
69 struct timeval tstamp; /* Time on first-time freeze */
64 int false_positives; /* Times of reported #ff's */ 70 int false_positives; /* Times of reported #ff's */
65 struct eeh_pe *parent; /* Parent PE */ 71 struct eeh_pe *parent; /* Parent PE */
66 struct list_head child_list; /* Link PE to the child list */ 72 struct list_head child_list; /* Link PE to the child list */
@@ -68,8 +74,8 @@ struct eeh_pe {
68 struct list_head child; /* Child PEs */ 74 struct list_head child; /* Child PEs */
69}; 75};
70 76
71#define eeh_pe_for_each_dev(pe, edev) \ 77#define eeh_pe_for_each_dev(pe, edev, tmp) \
72 list_for_each_entry(edev, &pe->edevs, list) 78 list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
73 79
74/* 80/*
75 * The struct is used to trace EEH state for the associated 81 * The struct is used to trace EEH state for the associated
@@ -78,7 +84,13 @@ struct eeh_pe {
78 * another tree except the currently existing tree of PCI 84 * another tree except the currently existing tree of PCI
79 * buses and PCI devices 85 * buses and PCI devices
80 */ 86 */
81#define EEH_DEV_IRQ_DISABLED (1<<0) /* Interrupt disabled */ 87#define EEH_DEV_BRIDGE (1 << 0) /* PCI bridge */
88#define EEH_DEV_ROOT_PORT (1 << 1) /* PCIe root port */
89#define EEH_DEV_DS_PORT (1 << 2) /* Downstream port */
90#define EEH_DEV_IRQ_DISABLED (1 << 3) /* Interrupt disabled */
91#define EEH_DEV_DISCONNECTED (1 << 4) /* Removing from PE */
92
93#define EEH_DEV_SYSFS (1 << 8) /* Sysfs created */
82 94
83struct eeh_dev { 95struct eeh_dev {
84 int mode; /* EEH mode */ 96 int mode; /* EEH mode */
@@ -86,21 +98,23 @@ struct eeh_dev {
86 int config_addr; /* Config address */ 98 int config_addr; /* Config address */
87 int pe_config_addr; /* PE config address */ 99 int pe_config_addr; /* PE config address */
88 u32 config_space[16]; /* Saved PCI config space */ 100 u32 config_space[16]; /* Saved PCI config space */
101 u8 pcie_cap; /* Saved PCIe capability */
89 struct eeh_pe *pe; /* Associated PE */ 102 struct eeh_pe *pe; /* Associated PE */
90 struct list_head list; /* Form link list in the PE */ 103 struct list_head list; /* Form link list in the PE */
91 struct pci_controller *phb; /* Associated PHB */ 104 struct pci_controller *phb; /* Associated PHB */
92 struct device_node *dn; /* Associated device node */ 105 struct device_node *dn; /* Associated device node */
93 struct pci_dev *pdev; /* Associated PCI device */ 106 struct pci_dev *pdev; /* Associated PCI device */
107 struct pci_bus *bus; /* PCI bus for partial hotplug */
94}; 108};
95 109
96static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev) 110static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
97{ 111{
98 return edev->dn; 112 return edev ? edev->dn : NULL;
99} 113}
100 114
101static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) 115static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
102{ 116{
103 return edev->pdev; 117 return edev ? edev->pdev : NULL;
104} 118}
105 119
106/* 120/*
@@ -130,8 +144,9 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
130struct eeh_ops { 144struct eeh_ops {
131 char *name; 145 char *name;
132 int (*init)(void); 146 int (*init)(void);
147 int (*post_init)(void);
133 void* (*of_probe)(struct device_node *dn, void *flag); 148 void* (*of_probe)(struct device_node *dn, void *flag);
134 void* (*dev_probe)(struct pci_dev *dev, void *flag); 149 int (*dev_probe)(struct pci_dev *dev, void *flag);
135 int (*set_option)(struct eeh_pe *pe, int option); 150 int (*set_option)(struct eeh_pe *pe, int option);
136 int (*get_pe_addr)(struct eeh_pe *pe); 151 int (*get_pe_addr)(struct eeh_pe *pe);
137 int (*get_state)(struct eeh_pe *pe, int *state); 152 int (*get_state)(struct eeh_pe *pe, int *state);
@@ -141,11 +156,12 @@ struct eeh_ops {
141 int (*configure_bridge)(struct eeh_pe *pe); 156 int (*configure_bridge)(struct eeh_pe *pe);
142 int (*read_config)(struct device_node *dn, int where, int size, u32 *val); 157 int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
143 int (*write_config)(struct device_node *dn, int where, int size, u32 val); 158 int (*write_config)(struct device_node *dn, int where, int size, u32 val);
159 int (*next_error)(struct eeh_pe **pe);
144}; 160};
145 161
146extern struct eeh_ops *eeh_ops; 162extern struct eeh_ops *eeh_ops;
147extern int eeh_subsystem_enabled; 163extern int eeh_subsystem_enabled;
148extern struct mutex eeh_mutex; 164extern raw_spinlock_t confirm_error_lock;
149extern int eeh_probe_mode; 165extern int eeh_probe_mode;
150 166
151#define EEH_PROBE_MODE_DEV (1<<0) /* From PCI device */ 167#define EEH_PROBE_MODE_DEV (1<<0) /* From PCI device */
@@ -166,14 +182,14 @@ static inline int eeh_probe_mode_dev(void)
166 return (eeh_probe_mode == EEH_PROBE_MODE_DEV); 182 return (eeh_probe_mode == EEH_PROBE_MODE_DEV);
167} 183}
168 184
169static inline void eeh_lock(void) 185static inline void eeh_serialize_lock(unsigned long *flags)
170{ 186{
171 mutex_lock(&eeh_mutex); 187 raw_spin_lock_irqsave(&confirm_error_lock, *flags);
172} 188}
173 189
174static inline void eeh_unlock(void) 190static inline void eeh_serialize_unlock(unsigned long flags)
175{ 191{
176 mutex_unlock(&eeh_mutex); 192 raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
177} 193}
178 194
179/* 195/*
@@ -184,8 +200,13 @@ static inline void eeh_unlock(void)
184 200
185typedef void *(*eeh_traverse_func)(void *data, void *flag); 201typedef void *(*eeh_traverse_func)(void *data, void *flag);
186int eeh_phb_pe_create(struct pci_controller *phb); 202int eeh_phb_pe_create(struct pci_controller *phb);
203struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
204struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
187int eeh_add_to_parent_pe(struct eeh_dev *edev); 205int eeh_add_to_parent_pe(struct eeh_dev *edev);
188int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe); 206int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
207void eeh_pe_update_time_stamp(struct eeh_pe *pe);
208void *eeh_pe_traverse(struct eeh_pe *root,
209 eeh_traverse_func fn, void *flag);
189void *eeh_pe_dev_traverse(struct eeh_pe *root, 210void *eeh_pe_dev_traverse(struct eeh_pe *root,
190 eeh_traverse_func fn, void *flag); 211 eeh_traverse_func fn, void *flag);
191void eeh_pe_restore_bars(struct eeh_pe *pe); 212void eeh_pe_restore_bars(struct eeh_pe *pe);
@@ -193,16 +214,19 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
193 214
194void *eeh_dev_init(struct device_node *dn, void *data); 215void *eeh_dev_init(struct device_node *dn, void *data);
195void eeh_dev_phb_init_dynamic(struct pci_controller *phb); 216void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
217int eeh_init(void);
196int __init eeh_ops_register(struct eeh_ops *ops); 218int __init eeh_ops_register(struct eeh_ops *ops);
197int __exit eeh_ops_unregister(const char *name); 219int __exit eeh_ops_unregister(const char *name);
198unsigned long eeh_check_failure(const volatile void __iomem *token, 220unsigned long eeh_check_failure(const volatile void __iomem *token,
199 unsigned long val); 221 unsigned long val);
200int eeh_dev_check_failure(struct eeh_dev *edev); 222int eeh_dev_check_failure(struct eeh_dev *edev);
201void __init eeh_addr_cache_build(void); 223void eeh_addr_cache_build(void);
224void eeh_add_device_early(struct device_node *);
202void eeh_add_device_tree_early(struct device_node *); 225void eeh_add_device_tree_early(struct device_node *);
226void eeh_add_device_late(struct pci_dev *);
203void eeh_add_device_tree_late(struct pci_bus *); 227void eeh_add_device_tree_late(struct pci_bus *);
204void eeh_add_sysfs_files(struct pci_bus *); 228void eeh_add_sysfs_files(struct pci_bus *);
205void eeh_remove_bus_device(struct pci_dev *, int); 229void eeh_remove_device(struct pci_dev *);
206 230
207/** 231/**
208 * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. 232 * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
@@ -221,6 +245,11 @@ void eeh_remove_bus_device(struct pci_dev *, int);
221 245
222#else /* !CONFIG_EEH */ 246#else /* !CONFIG_EEH */
223 247
248static inline int eeh_init(void)
249{
250 return 0;
251}
252
224static inline void *eeh_dev_init(struct device_node *dn, void *data) 253static inline void *eeh_dev_init(struct device_node *dn, void *data)
225{ 254{
226 return NULL; 255 return NULL;
@@ -237,16 +266,17 @@ static inline unsigned long eeh_check_failure(const volatile void __iomem *token
237 266
238static inline void eeh_addr_cache_build(void) { } 267static inline void eeh_addr_cache_build(void) { }
239 268
269static inline void eeh_add_device_early(struct device_node *dn) { }
270
240static inline void eeh_add_device_tree_early(struct device_node *dn) { } 271static inline void eeh_add_device_tree_early(struct device_node *dn) { }
241 272
273static inline void eeh_add_device_late(struct pci_dev *dev) { }
274
242static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } 275static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
243 276
244static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } 277static inline void eeh_add_sysfs_files(struct pci_bus *bus) { }
245 278
246static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { } 279static inline void eeh_remove_device(struct pci_dev *dev) { }
247
248static inline void eeh_lock(void) { }
249static inline void eeh_unlock(void) { }
250 280
251#define EEH_POSSIBLE_ERROR(val, type) (0) 281#define EEH_POSSIBLE_ERROR(val, type) (0)
252#define EEH_IO_ERROR_VALUE(size) (-1UL) 282#define EEH_IO_ERROR_VALUE(size) (-1UL)
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index de67d830151b..89d5670b2eeb 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -31,7 +31,9 @@ struct eeh_event {
31 struct eeh_pe *pe; /* EEH PE */ 31 struct eeh_pe *pe; /* EEH PE */
32}; 32};
33 33
34int eeh_event_init(void);
34int eeh_send_failure_event(struct eeh_pe *pe); 35int eeh_send_failure_event(struct eeh_pe *pe);
36void eeh_remove_event(struct eeh_pe *pe);
35void eeh_handle_event(struct eeh_pe *pe); 37void eeh_handle_event(struct eeh_pe *pe);
36 38
37#endif /* __KERNEL__ */ 39#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 46793b58a761..07ca627e52c0 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -358,12 +358,12 @@ label##_relon_pSeries: \
358 /* No guest interrupts come through here */ \ 358 /* No guest interrupts come through here */ \
359 SET_SCRATCH0(r13); /* save r13 */ \ 359 SET_SCRATCH0(r13); /* save r13 */ \
360 EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ 360 EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
361 EXC_STD, KVMTEST_PR, vec) 361 EXC_STD, NOTEST, vec)
362 362
363#define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \ 363#define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \
364 .globl label##_relon_pSeries; \ 364 .globl label##_relon_pSeries; \
365label##_relon_pSeries: \ 365label##_relon_pSeries: \
366 EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ 366 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \
367 EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD) 367 EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD)
368 368
369#define STD_RELON_EXCEPTION_HV(loc, vec, label) \ 369#define STD_RELON_EXCEPTION_HV(loc, vec, label) \
@@ -374,12 +374,12 @@ label##_relon_hv: \
374 /* No guest interrupts come through here */ \ 374 /* No guest interrupts come through here */ \
375 SET_SCRATCH0(r13); /* save r13 */ \ 375 SET_SCRATCH0(r13); /* save r13 */ \
376 EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ 376 EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
377 EXC_HV, KVMTEST, vec) 377 EXC_HV, NOTEST, vec)
378 378
379#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ 379#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \
380 .globl label##_relon_hv; \ 380 .globl label##_relon_hv; \
381label##_relon_hv: \ 381label##_relon_hv: \
382 EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ 382 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \
383 EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV) 383 EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV)
384 384
385/* This associate vector numbers with bits in paca->irq_happened */ 385/* This associate vector numbers with bits in paca->irq_happened */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index f2498c8e595d..d750336b171d 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -191,8 +191,14 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
191 unsigned long vmaddr) 191 unsigned long vmaddr)
192{ 192{
193} 193}
194#endif /* CONFIG_HUGETLB_PAGE */
195 194
195#define hugepd_shift(x) 0
196static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
197 unsigned pdshift)
198{
199 return 0;
200}
201#endif /* CONFIG_HUGETLB_PAGE */
196 202
197/* 203/*
198 * FSL Book3E platforms require special gpage handling - the gpages 204 * FSL Book3E platforms require special gpage handling - the gpages
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index ba713f166fa5..10be1dd01c6b 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -96,10 +96,11 @@ static inline bool arch_irqs_disabled(void)
96#endif 96#endif
97 97
98#define hard_irq_disable() do { \ 98#define hard_irq_disable() do { \
99 u8 _was_enabled = get_paca()->soft_enabled; \ 99 u8 _was_enabled; \
100 __hard_irq_disable(); \ 100 __hard_irq_disable(); \
101 get_paca()->soft_enabled = 0; \ 101 _was_enabled = local_paca->soft_enabled; \
102 get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; \ 102 local_paca->soft_enabled = 0; \
103 local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
103 if (_was_enabled) \ 104 if (_was_enabled) \
104 trace_hardirqs_off(); \ 105 trace_hardirqs_off(); \
105} while(0) 106} while(0)
diff --git a/arch/powerpc/include/asm/ibmebus.h b/arch/powerpc/include/asm/ibmebus.h
index 1a9d9aea21fa..088f95b2e14f 100644
--- a/arch/powerpc/include/asm/ibmebus.h
+++ b/arch/powerpc/include/asm/ibmebus.h
@@ -48,8 +48,8 @@
48 48
49extern struct bus_type ibmebus_bus_type; 49extern struct bus_type ibmebus_bus_type;
50 50
51int ibmebus_register_driver(struct of_platform_driver *drv); 51int ibmebus_register_driver(struct platform_driver *drv);
52void ibmebus_unregister_driver(struct of_platform_driver *drv); 52void ibmebus_unregister_driver(struct platform_driver *drv);
53 53
54int ibmebus_request_irq(u32 ist, irq_handler_t handler, 54int ibmebus_request_irq(u32 ist, irq_handler_t handler,
55 unsigned long irq_flags, const char *devname, 55 unsigned long irq_flags, const char *devname,
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index cbfe678e3dbe..c34656a8925e 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -76,6 +76,9 @@ struct iommu_table {
76 struct iommu_pool large_pool; 76 struct iommu_pool large_pool;
77 struct iommu_pool pools[IOMMU_NR_POOLS]; 77 struct iommu_pool pools[IOMMU_NR_POOLS];
78 unsigned long *it_map; /* A simple allocation bitmap for now */ 78 unsigned long *it_map; /* A simple allocation bitmap for now */
79#ifdef CONFIG_IOMMU_API
80 struct iommu_group *it_group;
81#endif
79}; 82};
80 83
81struct scatterlist; 84struct scatterlist;
@@ -98,6 +101,8 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
98 */ 101 */
99extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, 102extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
100 int nid); 103 int nid);
104extern void iommu_register_group(struct iommu_table *tbl,
105 int pci_domain_number, unsigned long pe_num);
101 106
102extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl, 107extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
103 struct scatterlist *sglist, int nelems, 108 struct scatterlist *sglist, int nelems,
@@ -125,13 +130,6 @@ extern void iommu_init_early_pSeries(void);
125extern void iommu_init_early_dart(void); 130extern void iommu_init_early_dart(void);
126extern void iommu_init_early_pasemi(void); 131extern void iommu_init_early_pasemi(void);
127 132
128#ifdef CONFIG_PCI
129extern void pci_iommu_init(void);
130extern void pci_direct_iommu_init(void);
131#else
132static inline void pci_iommu_init(void) { }
133#endif
134
135extern void alloc_dart_table(void); 133extern void alloc_dart_table(void);
136#if defined(CONFIG_PPC64) && defined(CONFIG_PM) 134#if defined(CONFIG_PPC64) && defined(CONFIG_PM)
137static inline void iommu_save(void) 135static inline void iommu_save(void)
@@ -147,5 +145,26 @@ static inline void iommu_restore(void)
147} 145}
148#endif 146#endif
149 147
148/* The API to support IOMMU operations for VFIO */
149extern int iommu_tce_clear_param_check(struct iommu_table *tbl,
150 unsigned long ioba, unsigned long tce_value,
151 unsigned long npages);
152extern int iommu_tce_put_param_check(struct iommu_table *tbl,
153 unsigned long ioba, unsigned long tce);
154extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
155 unsigned long hwaddr, enum dma_data_direction direction);
156extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
157 unsigned long entry);
158extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
159 unsigned long entry, unsigned long pages);
160extern int iommu_put_tce_user_mode(struct iommu_table *tbl,
161 unsigned long entry, unsigned long tce);
162
163extern void iommu_flush_tce(struct iommu_table *tbl);
164extern int iommu_take_ownership(struct iommu_table *tbl);
165extern void iommu_release_ownership(struct iommu_table *tbl);
166
167extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
168
150#endif /* __KERNEL__ */ 169#endif /* __KERNEL__ */
151#endif /* _ASM_IOMMU_H */ 170#endif /* _ASM_IOMMU_H */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 349ed85c7d61..08891d07aeb6 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -107,8 +107,9 @@ struct kvmppc_vcpu_book3s {
107#define CONTEXT_GUEST 1 107#define CONTEXT_GUEST 1
108#define CONTEXT_GUEST_END 2 108#define CONTEXT_GUEST_END 2
109 109
110#define VSID_REAL 0x1fffffffffc00000ULL 110#define VSID_REAL 0x0fffffffffc00000ULL
111#define VSID_BAT 0x1fffffffffb00000ULL 111#define VSID_BAT 0x0fffffffffb00000ULL
112#define VSID_1T 0x1000000000000000ULL
112#define VSID_REAL_DR 0x2000000000000000ULL 113#define VSID_REAL_DR 0x2000000000000000ULL
113#define VSID_REAL_IR 0x4000000000000000ULL 114#define VSID_REAL_IR 0x4000000000000000ULL
114#define VSID_PR 0x8000000000000000ULL 115#define VSID_PR 0x8000000000000000ULL
@@ -123,6 +124,7 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
123extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); 124extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
124extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); 125extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
125extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); 126extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
127extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size);
126extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); 128extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
127extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 129extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
128 struct kvm_vcpu *vcpu, unsigned long addr, 130 struct kvm_vcpu *vcpu, unsigned long addr,
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9c1ff330c805..a1ecb14e4442 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -159,36 +159,46 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
159} 159}
160 160
161/* 161/*
162 * Lock and read a linux PTE. If it's present and writable, atomically 162 * If it's present and writable, atomically set dirty and referenced bits and
163 * set dirty and referenced bits and return the PTE, otherwise return 0. 163 * return the PTE, otherwise return 0. If we find a transparent hugepage
164 * and if it is marked splitting we return 0;
164 */ 165 */
165static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing) 166static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
167 unsigned int hugepage)
166{ 168{
167 pte_t pte, tmp; 169 pte_t old_pte, new_pte = __pte(0);
168 170
169 /* wait until _PAGE_BUSY is clear then set it atomically */ 171 while (1) {
170 __asm__ __volatile__ ( 172 old_pte = pte_val(*ptep);
171 "1: ldarx %0,0,%3\n" 173 /*
172 " andi. %1,%0,%4\n" 174 * wait until _PAGE_BUSY is clear then set it atomically
173 " bne- 1b\n" 175 */
174 " ori %1,%0,%4\n" 176 if (unlikely(old_pte & _PAGE_BUSY)) {
175 " stdcx. %1,0,%3\n" 177 cpu_relax();
176 " bne- 1b" 178 continue;
177 : "=&r" (pte), "=&r" (tmp), "=m" (*p) 179 }
178 : "r" (p), "i" (_PAGE_BUSY) 180#ifdef CONFIG_TRANSPARENT_HUGEPAGE
179 : "cc"); 181 /* If hugepage and is trans splitting return None */
180 182 if (unlikely(hugepage &&
181 if (pte_present(pte)) { 183 pmd_trans_splitting(pte_pmd(old_pte))))
182 pte = pte_mkyoung(pte); 184 return __pte(0);
183 if (writing && pte_write(pte)) 185#endif
184 pte = pte_mkdirty(pte); 186 /* If pte is not present return None */
185 } 187 if (unlikely(!(old_pte & _PAGE_PRESENT)))
188 return __pte(0);
186 189
187 *p = pte; /* clears _PAGE_BUSY */ 190 new_pte = pte_mkyoung(old_pte);
191 if (writing && pte_write(old_pte))
192 new_pte = pte_mkdirty(new_pte);
188 193
189 return pte; 194 if (old_pte == __cmpxchg_u64((unsigned long *)ptep, old_pte,
195 new_pte))
196 break;
197 }
198 return new_pte;
190} 199}
191 200
201
192/* Return HPTE cache control bits corresponding to Linux pte bits */ 202/* Return HPTE cache control bits corresponding to Linux pte bits */
193static inline unsigned long hpte_cache_bits(unsigned long pte_val) 203static inline unsigned long hpte_cache_bits(unsigned long pte_val)
194{ 204{
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index b1e7f2af1016..9b12f88d4adb 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -66,7 +66,8 @@ struct lppaca {
66 66
67 u8 reserved6[48]; 67 u8 reserved6[48];
68 u8 cede_latency_hint; 68 u8 cede_latency_hint;
69 u8 reserved7[7]; 69 u8 ebb_regs_in_use;
70 u8 reserved7[6];
70 u8 dtl_enable_mask; /* Dispatch Trace Log mask */ 71 u8 dtl_enable_mask; /* Dispatch Trace Log mask */
71 u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */ 72 u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */
72 u8 fpregs_in_use; 73 u8 fpregs_in_use;
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 92386fc4e82a..8b480901165a 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -36,13 +36,13 @@ struct machdep_calls {
36#ifdef CONFIG_PPC64 36#ifdef CONFIG_PPC64
37 void (*hpte_invalidate)(unsigned long slot, 37 void (*hpte_invalidate)(unsigned long slot,
38 unsigned long vpn, 38 unsigned long vpn,
39 int psize, int ssize, 39 int bpsize, int apsize,
40 int local); 40 int ssize, int local);
41 long (*hpte_updatepp)(unsigned long slot, 41 long (*hpte_updatepp)(unsigned long slot,
42 unsigned long newpp, 42 unsigned long newpp,
43 unsigned long vpn, 43 unsigned long vpn,
44 int psize, int ssize, 44 int bpsize, int apsize,
45 int local); 45 int ssize, int local);
46 void (*hpte_updateboltedpp)(unsigned long newpp, 46 void (*hpte_updateboltedpp)(unsigned long newpp,
47 unsigned long ea, 47 unsigned long ea,
48 int psize, int ssize); 48 int psize, int ssize);
@@ -57,6 +57,9 @@ struct machdep_calls {
57 void (*hpte_removebolted)(unsigned long ea, 57 void (*hpte_removebolted)(unsigned long ea,
58 int psize, int ssize); 58 int psize, int ssize);
59 void (*flush_hash_range)(unsigned long number, int local); 59 void (*flush_hash_range)(unsigned long number, int local);
60 void (*hugepage_invalidate)(struct mm_struct *mm,
61 unsigned char *hpte_slot_array,
62 unsigned long addr, int psize);
60 63
61 /* special for kexec, to be called in real mode, linear mapping is 64 /* special for kexec, to be called in real mode, linear mapping is
62 * destroyed as well */ 65 * destroyed as well */
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 2accc9611248..c4cf01197273 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -340,6 +340,20 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
340int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, 340int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
341 pte_t *ptep, unsigned long trap, int local, int ssize, 341 pte_t *ptep, unsigned long trap, int local, int ssize,
342 unsigned int shift, unsigned int mmu_psize); 342 unsigned int shift, unsigned int mmu_psize);
343#ifdef CONFIG_TRANSPARENT_HUGEPAGE
344extern int __hash_page_thp(unsigned long ea, unsigned long access,
345 unsigned long vsid, pmd_t *pmdp, unsigned long trap,
346 int local, int ssize, unsigned int psize);
347#else
348static inline int __hash_page_thp(unsigned long ea, unsigned long access,
349 unsigned long vsid, pmd_t *pmdp,
350 unsigned long trap, int local,
351 int ssize, unsigned int psize)
352{
353 BUG();
354 return -1;
355}
356#endif
343extern void hash_failure_debug(unsigned long ea, unsigned long access, 357extern void hash_failure_debug(unsigned long ea, unsigned long access,
344 unsigned long vsid, unsigned long trap, 358 unsigned long vsid, unsigned long trap,
345 int ssize, int psize, int lpsize, 359 int ssize, int psize, int lpsize,
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index a73668a5f30d..b467530e2485 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -38,7 +38,7 @@ extern void drop_cop(unsigned long acop, struct mm_struct *mm);
38 38
39/* 39/*
40 * switch_mm is the entry point called from the architecture independent 40 * switch_mm is the entry point called from the architecture independent
41 * code in kernel/sched.c 41 * code in kernel/sched/core.c
42 */ 42 */
43static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 43static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
44 struct task_struct *tsk) 44 struct task_struct *tsk)
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index c1df590ec444..49fa55bfbac4 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -82,10 +82,9 @@ struct exception_table_entry;
82void sort_ex_table(struct exception_table_entry *start, 82void sort_ex_table(struct exception_table_entry *start,
83 struct exception_table_entry *finish); 83 struct exception_table_entry *finish);
84 84
85#ifdef CONFIG_MODVERSIONS 85#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64)
86#define ARCH_RELOCATES_KCRCTAB 86#define ARCH_RELOCATES_KCRCTAB
87 87#define reloc_start PHYSICAL_START
88extern const unsigned long reloc_start[];
89#endif 88#endif
90#endif /* __KERNEL__ */ 89#endif /* __KERNEL__ */
91#endif /* _ASM_POWERPC_MODULE_H */ 90#endif /* _ASM_POWERPC_MODULE_H */
diff --git a/arch/powerpc/include/asm/mpc5121.h b/arch/powerpc/include/asm/mpc5121.h
index 885c040d6194..8ae133eaf9fa 100644
--- a/arch/powerpc/include/asm/mpc5121.h
+++ b/arch/powerpc/include/asm/mpc5121.h
@@ -68,6 +68,5 @@ struct mpc512x_lpc {
68}; 68};
69 69
70int mpc512x_cs_config(unsigned int cs, u32 val); 70int mpc512x_cs_config(unsigned int cs, u32 val);
71int __init mpc5121_clk_init(void);
72 71
73#endif /* __ASM_POWERPC_MPC5121_H__ */ 72#endif /* __ASM_POWERPC_MPC5121_H__ */
diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h
index 2966df604221..d0ece257d310 100644
--- a/arch/powerpc/include/asm/mpc52xx_psc.h
+++ b/arch/powerpc/include/asm/mpc52xx_psc.h
@@ -299,4 +299,53 @@ struct mpc512x_psc_fifo {
299#define rxdata_32 rxdata.rxdata_32 299#define rxdata_32 rxdata.rxdata_32
300}; 300};
301 301
302struct mpc5125_psc {
303 u8 mr1; /* PSC + 0x00 */
304 u8 reserved0[3];
305 u8 mr2; /* PSC + 0x04 */
306 u8 reserved1[3];
307 struct {
308 u16 status; /* PSC + 0x08 */
309 u8 reserved2[2];
310 u8 clock_select; /* PSC + 0x0c */
311 u8 reserved3[3];
312 } sr_csr;
313 u8 command; /* PSC + 0x10 */
314 u8 reserved4[3];
315 union { /* PSC + 0x14 */
316 u8 buffer_8;
317 u16 buffer_16;
318 u32 buffer_32;
319 } buffer;
320 struct {
321 u8 ipcr; /* PSC + 0x18 */
322 u8 reserved5[3];
323 u8 acr; /* PSC + 0x1c */
324 u8 reserved6[3];
325 } ipcr_acr;
326 struct {
327 u16 isr; /* PSC + 0x20 */
328 u8 reserved7[2];
329 u16 imr; /* PSC + 0x24 */
330 u8 reserved8[2];
331 } isr_imr;
332 u8 ctur; /* PSC + 0x28 */
333 u8 reserved9[3];
334 u8 ctlr; /* PSC + 0x2c */
335 u8 reserved10[3];
336 u32 ccr; /* PSC + 0x30 */
337 u32 ac97slots; /* PSC + 0x34 */
338 u32 ac97cmd; /* PSC + 0x38 */
339 u32 ac97data; /* PSC + 0x3c */
340 u8 reserved11[4];
341 u8 ip; /* PSC + 0x44 */
342 u8 reserved12[3];
343 u8 op1; /* PSC + 0x48 */
344 u8 reserved13[3];
345 u8 op0; /* PSC + 0x4c */
346 u8 reserved14[3];
347 u32 sicr; /* PSC + 0x50 */
348 u8 reserved15[4]; /* make eq. sizeof(mpc52xx_psc) */
349};
350
302#endif /* __ASM_MPC52xx_PSC_H__ */ 351#endif /* __ASM_MPC52xx_PSC_H__ */
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index c0f9ef90f0b8..4a1ac9fbf186 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -339,6 +339,8 @@ struct mpic
339#endif 339#endif
340}; 340};
341 341
342extern struct bus_type mpic_subsys;
343
342/* 344/*
343 * MPIC flags (passed to mpic_alloc) 345 * MPIC flags (passed to mpic_alloc)
344 * 346 *
@@ -393,6 +395,9 @@ struct mpic
393#define MPIC_REGSET_STANDARD MPIC_REGSET(0) /* Original MPIC */ 395#define MPIC_REGSET_STANDARD MPIC_REGSET(0) /* Original MPIC */
394#define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */ 396#define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */
395 397
398/* Get the version of primary MPIC */
399extern u32 fsl_mpic_primary_get_version(void);
400
396/* Allocate the controller structure and setup the linux irq descs 401/* Allocate the controller structure and setup the linux irq descs
397 * for the range if interrupts passed in. No HW initialization is 402 * for the range if interrupts passed in. No HW initialization is
398 * actually performed. 403 * actually performed.
diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h
new file mode 100644
index 000000000000..0e23cd4ac8aa
--- /dev/null
+++ b/arch/powerpc/include/asm/mpic_timer.h
@@ -0,0 +1,46 @@
1/*
2 * arch/powerpc/include/asm/mpic_timer.h
3 *
4 * Header file for Mpic Global Timer
5 *
6 * Copyright 2013 Freescale Semiconductor, Inc.
7 *
8 * Author: Wang Dongsheng <Dongsheng.Wang@freescale.com>
9 * Li Yang <leoli@freescale.com>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 */
16
17#ifndef __MPIC_TIMER__
18#define __MPIC_TIMER__
19
20#include <linux/interrupt.h>
21#include <linux/time.h>
22
23struct mpic_timer {
24 void *dev;
25 struct cascade_priv *cascade_handle;
26 unsigned int num;
27 unsigned int irq;
28};
29
30#ifdef CONFIG_MPIC_TIMER
31struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
32 const struct timeval *time);
33void mpic_start_timer(struct mpic_timer *handle);
34void mpic_stop_timer(struct mpic_timer *handle);
35void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time);
36void mpic_free_timer(struct mpic_timer *handle);
37#else
38struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
39 const struct timeval *time) { return NULL; }
40void mpic_start_timer(struct mpic_timer *handle) { }
41void mpic_stop_timer(struct mpic_timer *handle) { }
42void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { }
43void mpic_free_timer(struct mpic_timer *handle) { }
44#endif
45
46#endif
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 5399f7e18102..127ab23e1f6c 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -82,17 +82,15 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
82 * __mutex_fastpath_lock_retval - try to take the lock by moving the count 82 * __mutex_fastpath_lock_retval - try to take the lock by moving the count
83 * from 1 to a 0 value 83 * from 1 to a 0 value
84 * @count: pointer of type atomic_t 84 * @count: pointer of type atomic_t
85 * @fail_fn: function to call if the original value was not 1
86 * 85 *
87 * Change the count from 1 to a value lower than 1, and call <fail_fn> if 86 * Change the count from 1 to a value lower than 1. This function returns 0
88 * it wasn't 1 originally. This function returns 0 if the fastpath succeeds, 87 * if the fastpath succeeds, or -1 otherwise.
89 * or anything the slow path function returns.
90 */ 88 */
91static inline int 89static inline int
92__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) 90__mutex_fastpath_lock_retval(atomic_t *count)
93{ 91{
94 if (unlikely(__mutex_dec_return_lock(count) < 0)) 92 if (unlikely(__mutex_dec_return_lock(count) < 0))
95 return fail_fn(count); 93 return -1;
96 return 0; 94 return 0;
97} 95}
98 96
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index cbb9305ab15a..029fe85722aa 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -117,7 +117,13 @@ extern int opal_enter_rtas(struct rtas_args *args,
117#define OPAL_SET_SLOT_LED_STATUS 55 117#define OPAL_SET_SLOT_LED_STATUS 55
118#define OPAL_GET_EPOW_STATUS 56 118#define OPAL_GET_EPOW_STATUS 56
119#define OPAL_SET_SYSTEM_ATTENTION_LED 57 119#define OPAL_SET_SYSTEM_ATTENTION_LED 57
120#define OPAL_RESERVED1 58
121#define OPAL_RESERVED2 59
122#define OPAL_PCI_NEXT_ERROR 60
123#define OPAL_PCI_EEH_FREEZE_STATUS2 61
124#define OPAL_PCI_POLL 62
120#define OPAL_PCI_MSI_EOI 63 125#define OPAL_PCI_MSI_EOI 63
126#define OPAL_PCI_GET_PHB_DIAG_DATA2 64
121 127
122#ifndef __ASSEMBLY__ 128#ifndef __ASSEMBLY__
123 129
@@ -125,6 +131,7 @@ extern int opal_enter_rtas(struct rtas_args *args,
125enum OpalVendorApiTokens { 131enum OpalVendorApiTokens {
126 OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999 132 OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999
127}; 133};
134
128enum OpalFreezeState { 135enum OpalFreezeState {
129 OPAL_EEH_STOPPED_NOT_FROZEN = 0, 136 OPAL_EEH_STOPPED_NOT_FROZEN = 0,
130 OPAL_EEH_STOPPED_MMIO_FREEZE = 1, 137 OPAL_EEH_STOPPED_MMIO_FREEZE = 1,
@@ -134,55 +141,69 @@ enum OpalFreezeState {
134 OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5, 141 OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5,
135 OPAL_EEH_STOPPED_PERM_UNAVAIL = 6 142 OPAL_EEH_STOPPED_PERM_UNAVAIL = 6
136}; 143};
144
137enum OpalEehFreezeActionToken { 145enum OpalEehFreezeActionToken {
138 OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1, 146 OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
139 OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2, 147 OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
140 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3 148 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3
141}; 149};
150
142enum OpalPciStatusToken { 151enum OpalPciStatusToken {
143 OPAL_EEH_PHB_NO_ERROR = 0, 152 OPAL_EEH_NO_ERROR = 0,
144 OPAL_EEH_PHB_FATAL = 1, 153 OPAL_EEH_IOC_ERROR = 1,
145 OPAL_EEH_PHB_RECOVERABLE = 2, 154 OPAL_EEH_PHB_ERROR = 2,
146 OPAL_EEH_PHB_BUS_ERROR = 3, 155 OPAL_EEH_PE_ERROR = 3,
147 OPAL_EEH_PCI_NO_DEVSEL = 4, 156 OPAL_EEH_PE_MMIO_ERROR = 4,
148 OPAL_EEH_PCI_TA = 5, 157 OPAL_EEH_PE_DMA_ERROR = 5
149 OPAL_EEH_PCIEX_UR = 6,
150 OPAL_EEH_PCIEX_CA = 7,
151 OPAL_EEH_PCI_MMIO_ERROR = 8,
152 OPAL_EEH_PCI_DMA_ERROR = 9
153}; 158};
159
160enum OpalPciErrorSeverity {
161 OPAL_EEH_SEV_NO_ERROR = 0,
162 OPAL_EEH_SEV_IOC_DEAD = 1,
163 OPAL_EEH_SEV_PHB_DEAD = 2,
164 OPAL_EEH_SEV_PHB_FENCED = 3,
165 OPAL_EEH_SEV_PE_ER = 4,
166 OPAL_EEH_SEV_INF = 5
167};
168
154enum OpalShpcAction { 169enum OpalShpcAction {
155 OPAL_SHPC_GET_LINK_STATE = 0, 170 OPAL_SHPC_GET_LINK_STATE = 0,
156 OPAL_SHPC_GET_SLOT_STATE = 1 171 OPAL_SHPC_GET_SLOT_STATE = 1
157}; 172};
173
158enum OpalShpcLinkState { 174enum OpalShpcLinkState {
159 OPAL_SHPC_LINK_DOWN = 0, 175 OPAL_SHPC_LINK_DOWN = 0,
160 OPAL_SHPC_LINK_UP = 1 176 OPAL_SHPC_LINK_UP = 1
161}; 177};
178
162enum OpalMmioWindowType { 179enum OpalMmioWindowType {
163 OPAL_M32_WINDOW_TYPE = 1, 180 OPAL_M32_WINDOW_TYPE = 1,
164 OPAL_M64_WINDOW_TYPE = 2, 181 OPAL_M64_WINDOW_TYPE = 2,
165 OPAL_IO_WINDOW_TYPE = 3 182 OPAL_IO_WINDOW_TYPE = 3
166}; 183};
184
167enum OpalShpcSlotState { 185enum OpalShpcSlotState {
168 OPAL_SHPC_DEV_NOT_PRESENT = 0, 186 OPAL_SHPC_DEV_NOT_PRESENT = 0,
169 OPAL_SHPC_DEV_PRESENT = 1 187 OPAL_SHPC_DEV_PRESENT = 1
170}; 188};
189
171enum OpalExceptionHandler { 190enum OpalExceptionHandler {
172 OPAL_MACHINE_CHECK_HANDLER = 1, 191 OPAL_MACHINE_CHECK_HANDLER = 1,
173 OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2, 192 OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2,
174 OPAL_SOFTPATCH_HANDLER = 3 193 OPAL_SOFTPATCH_HANDLER = 3
175}; 194};
195
176enum OpalPendingState { 196enum OpalPendingState {
177 OPAL_EVENT_OPAL_INTERNAL = 0x1, 197 OPAL_EVENT_OPAL_INTERNAL = 0x1,
178 OPAL_EVENT_NVRAM = 0x2, 198 OPAL_EVENT_NVRAM = 0x2,
179 OPAL_EVENT_RTC = 0x4, 199 OPAL_EVENT_RTC = 0x4,
180 OPAL_EVENT_CONSOLE_OUTPUT = 0x8, 200 OPAL_EVENT_CONSOLE_OUTPUT = 0x8,
181 OPAL_EVENT_CONSOLE_INPUT = 0x10, 201 OPAL_EVENT_CONSOLE_INPUT = 0x10,
182 OPAL_EVENT_ERROR_LOG_AVAIL = 0x20, 202 OPAL_EVENT_ERROR_LOG_AVAIL = 0x20,
183 OPAL_EVENT_ERROR_LOG = 0x40, 203 OPAL_EVENT_ERROR_LOG = 0x40,
184 OPAL_EVENT_EPOW = 0x80, 204 OPAL_EVENT_EPOW = 0x80,
185 OPAL_EVENT_LED_STATUS = 0x100 205 OPAL_EVENT_LED_STATUS = 0x100,
206 OPAL_EVENT_PCI_ERROR = 0x200
186}; 207};
187 208
188/* Machine check related definitions */ 209/* Machine check related definitions */
@@ -364,15 +385,80 @@ struct opal_machine_check_event {
364 } u; 385 } u;
365}; 386};
366 387
388enum {
389 OPAL_P7IOC_DIAG_TYPE_NONE = 0,
390 OPAL_P7IOC_DIAG_TYPE_RGC = 1,
391 OPAL_P7IOC_DIAG_TYPE_BI = 2,
392 OPAL_P7IOC_DIAG_TYPE_CI = 3,
393 OPAL_P7IOC_DIAG_TYPE_MISC = 4,
394 OPAL_P7IOC_DIAG_TYPE_I2C = 5,
395 OPAL_P7IOC_DIAG_TYPE_LAST = 6
396};
397
398struct OpalIoP7IOCErrorData {
399 uint16_t type;
400
401 /* GEM */
402 uint64_t gemXfir;
403 uint64_t gemRfir;
404 uint64_t gemRirqfir;
405 uint64_t gemMask;
406 uint64_t gemRwof;
407
408 /* LEM */
409 uint64_t lemFir;
410 uint64_t lemErrMask;
411 uint64_t lemAction0;
412 uint64_t lemAction1;
413 uint64_t lemWof;
414
415 union {
416 struct OpalIoP7IOCRgcErrorData {
417 uint64_t rgcStatus; /* 3E1C10 */
418 uint64_t rgcLdcp; /* 3E1C18 */
419 }rgc;
420 struct OpalIoP7IOCBiErrorData {
421 uint64_t biLdcp0; /* 3C0100, 3C0118 */
422 uint64_t biLdcp1; /* 3C0108, 3C0120 */
423 uint64_t biLdcp2; /* 3C0110, 3C0128 */
424 uint64_t biFenceStatus; /* 3C0130, 3C0130 */
425
426 uint8_t biDownbound; /* BI Downbound or Upbound */
427 }bi;
428 struct OpalIoP7IOCCiErrorData {
429 uint64_t ciPortStatus; /* 3Dn008 */
430 uint64_t ciPortLdcp; /* 3Dn010 */
431
432 uint8_t ciPort; /* Index of CI port: 0/1 */
433 }ci;
434 };
435};
436
367/** 437/**
368 * This structure defines the overlay which will be used to store PHB error 438 * This structure defines the overlay which will be used to store PHB error
369 * data upon request. 439 * data upon request.
370 */ 440 */
371enum { 441enum {
442 OPAL_PHB_ERROR_DATA_VERSION_1 = 1,
443};
444
445enum {
446 OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
447};
448
449enum {
372 OPAL_P7IOC_NUM_PEST_REGS = 128, 450 OPAL_P7IOC_NUM_PEST_REGS = 128,
373}; 451};
374 452
453struct OpalIoPhbErrorCommon {
454 uint32_t version;
455 uint32_t ioType;
456 uint32_t len;
457};
458
375struct OpalIoP7IOCPhbErrorData { 459struct OpalIoP7IOCPhbErrorData {
460 struct OpalIoPhbErrorCommon common;
461
376 uint32_t brdgCtl; 462 uint32_t brdgCtl;
377 463
378 // P7IOC utl regs 464 // P7IOC utl regs
@@ -530,14 +616,21 @@ int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number,
530 uint64_t pci_mem_size); 616 uint64_t pci_mem_size);
531int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state); 617int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state);
532 618
533int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer, uint64_t diag_buffer_len); 619int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer,
534int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, void *diag_buffer, uint64_t diag_buffer_len); 620 uint64_t diag_buffer_len);
621int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, void *diag_buffer,
622 uint64_t diag_buffer_len);
623int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id, void *diag_buffer,
624 uint64_t diag_buffer_len);
535int64_t opal_pci_fence_phb(uint64_t phb_id); 625int64_t opal_pci_fence_phb(uint64_t phb_id);
536int64_t opal_pci_reinit(uint64_t phb_id, uint8_t reinit_scope); 626int64_t opal_pci_reinit(uint64_t phb_id, uint8_t reinit_scope);
537int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action); 627int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action);
538int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action); 628int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action);
539int64_t opal_get_epow_status(uint64_t *status); 629int64_t opal_get_epow_status(uint64_t *status);
540int64_t opal_set_system_attention_led(uint8_t led_action); 630int64_t opal_set_system_attention_led(uint8_t led_action);
631int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe,
632 uint16_t *pci_error_type, uint16_t *severity);
633int64_t opal_pci_poll(uint64_t phb_id);
541 634
542/* Internal functions */ 635/* Internal functions */
543extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); 636extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data);
@@ -551,6 +644,11 @@ extern void hvc_opal_init_early(void);
551extern int early_init_dt_scan_opal(unsigned long node, const char *uname, 644extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
552 int depth, void *data); 645 int depth, void *data);
553 646
647extern int opal_notifier_register(struct notifier_block *nb);
648extern void opal_notifier_enable(void);
649extern void opal_notifier_disable(void);
650extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val);
651
554extern int opal_get_chars(uint32_t vtermno, char *buf, int count); 652extern int opal_get_chars(uint32_t vtermno, char *buf, int count);
555extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); 653extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len);
556 654
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 2c1d8cb9b265..32d0d2018faf 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -209,7 +209,6 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
209extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn); 209extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
210 210
211/** Remove all of the PCI devices under this bus */ 211/** Remove all of the PCI devices under this bus */
212extern void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe);
213extern void pcibios_remove_pci_devices(struct pci_bus *bus); 212extern void pcibios_remove_pci_devices(struct pci_bus *bus);
214 213
215/** Discover new pci devices under this bus, and add them */ 214/** Discover new pci devices under this bus, and add them */
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index f265049dd7d6..8b2492644754 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -12,6 +12,7 @@
12#include <linux/types.h> 12#include <linux/types.h>
13#include <asm/hw_irq.h> 13#include <asm/hw_irq.h>
14#include <linux/device.h> 14#include <linux/device.h>
15#include <uapi/asm/perf_event.h>
15 16
16#define MAX_HWEVENTS 8 17#define MAX_HWEVENTS 8
17#define MAX_EVENT_ALTERNATIVES 8 18#define MAX_EVENT_ALTERNATIVES 8
@@ -60,6 +61,7 @@ struct power_pmu {
60#define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */ 61#define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */
61#define PPMU_HAS_SIER 0x00000040 /* Has SIER */ 62#define PPMU_HAS_SIER 0x00000040 /* Has SIER */
62#define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */ 63#define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */
64#define PPMU_EBB 0x00000100 /* supports event based branch */
63 65
64/* 66/*
65 * Values for flags to get_alternatives() 67 * Values for flags to get_alternatives()
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index b66ae722a8e9..f65e27b09bd3 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -221,17 +221,17 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
221 221
222static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 222static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
223{ 223{
224 return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE), 224 return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
225 GFP_KERNEL|__GFP_REPEAT); 225 GFP_KERNEL|__GFP_REPEAT);
226} 226}
227 227
228static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) 228static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
229{ 229{
230 kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd); 230 kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
231} 231}
232 232
233#define __pmd_free_tlb(tlb, pmd, addr) \ 233#define __pmd_free_tlb(tlb, pmd, addr) \
234 pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE) 234 pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
235#ifndef CONFIG_PPC_64K_PAGES 235#ifndef CONFIG_PPC_64K_PAGES
236#define __pud_free_tlb(tlb, pud, addr) \ 236#define __pud_free_tlb(tlb, pud, addr) \
237 pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) 237 pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
index 45142d640720..a56b82fb0609 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
@@ -33,7 +33,8 @@
33#define PGDIR_MASK (~(PGDIR_SIZE-1)) 33#define PGDIR_MASK (~(PGDIR_SIZE-1))
34 34
35/* Bits to mask out from a PMD to get to the PTE page */ 35/* Bits to mask out from a PMD to get to the PTE page */
36#define PMD_MASKED_BITS 0x1ff 36/* PMDs point to PTE table fragments which are 4K aligned. */
37#define PMD_MASKED_BITS 0xfff
37/* Bits to mask out from a PGD/PUD to get to the PMD page */ 38/* Bits to mask out from a PGD/PUD to get to the PMD page */
38#define PUD_MASKED_BITS 0x1ff 39#define PUD_MASKED_BITS 0x1ff
39 40
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index e3d55f6f24fe..46db09414a10 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -10,6 +10,7 @@
10#else 10#else
11#include <asm/pgtable-ppc64-4k.h> 11#include <asm/pgtable-ppc64-4k.h>
12#endif 12#endif
13#include <asm/barrier.h>
13 14
14#define FIRST_USER_ADDRESS 0 15#define FIRST_USER_ADDRESS 0
15 16
@@ -20,7 +21,11 @@
20 PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) 21 PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
21#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) 22#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
22 23
23 24#ifdef CONFIG_TRANSPARENT_HUGEPAGE
25#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1)
26#else
27#define PMD_CACHE_INDEX PMD_INDEX_SIZE
28#endif
24/* 29/*
25 * Define the address range of the kernel non-linear virtual area 30 * Define the address range of the kernel non-linear virtual area
26 */ 31 */
@@ -150,7 +155,7 @@
150#define pmd_present(pmd) (pmd_val(pmd) != 0) 155#define pmd_present(pmd) (pmd_val(pmd) != 0)
151#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) 156#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
152#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) 157#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
153#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd)) 158extern struct page *pmd_page(pmd_t pmd);
154 159
155#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval)) 160#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
156#define pud_none(pud) (!pud_val(pud)) 161#define pud_none(pud) (!pud_val(pud))
@@ -339,43 +344,217 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
339 344
340void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); 345void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
341void pgtable_cache_init(void); 346void pgtable_cache_init(void);
347#endif /* __ASSEMBLY__ */
348
349/*
350 * THP pages can't be special. So use the _PAGE_SPECIAL
351 */
352#define _PAGE_SPLITTING _PAGE_SPECIAL
353
354/*
355 * We need to differentiate between explicit huge page and THP huge
356 * page, since THP huge page also need to track real subpage details
357 */
358#define _PAGE_THP_HUGE _PAGE_4K_PFN
342 359
343/* 360/*
344 * find_linux_pte returns the address of a linux pte for a given 361 * set of bits not changed in pmd_modify.
345 * effective address and directory. If not found, it returns zero.
346 */ 362 */
347static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea) 363#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \
364 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
365 _PAGE_THP_HUGE)
366
367#ifndef __ASSEMBLY__
368/*
369 * The linux hugepage PMD now include the pmd entries followed by the address
370 * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
371 * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
372 * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
373 * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
374 *
375 * The last three bits are intentionally left to zero. This memory location
376 * are also used as normal page PTE pointers. So if we have any pointers
377 * left around while we collapse a hugepage, we need to make sure
378 * _PAGE_PRESENT and _PAGE_FILE bits of that are zero when we look at them
379 */
380static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
348{ 381{
349 pgd_t *pg; 382 return (hpte_slot_array[index] >> 3) & 0x1;
350 pud_t *pu;
351 pmd_t *pm;
352 pte_t *pt = NULL;
353
354 pg = pgdir + pgd_index(ea);
355 if (!pgd_none(*pg)) {
356 pu = pud_offset(pg, ea);
357 if (!pud_none(*pu)) {
358 pm = pmd_offset(pu, ea);
359 if (pmd_present(*pm))
360 pt = pte_offset_kernel(pm, ea);
361 }
362 }
363 return pt;
364} 383}
365 384
366#ifdef CONFIG_HUGETLB_PAGE 385static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
367pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, 386 int index)
368 unsigned *shift);
369#else
370static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
371 unsigned *shift)
372{ 387{
373 if (shift) 388 return hpte_slot_array[index] >> 4;
374 *shift = 0;
375 return find_linux_pte(pgdir, ea);
376} 389}
377#endif /* !CONFIG_HUGETLB_PAGE */
378 390
379#endif /* __ASSEMBLY__ */ 391static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
392 unsigned int index, unsigned int hidx)
393{
394 hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
395}
380 396
397static inline char *get_hpte_slot_array(pmd_t *pmdp)
398{
399 /*
400 * The hpte hindex is stored in the pgtable whose address is in the
401 * second half of the PMD
402 *
403 * Order this load with the test for pmd_trans_huge in the caller
404 */
405 smp_rmb();
406 return *(char **)(pmdp + PTRS_PER_PMD);
407
408
409}
410
411extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
412 pmd_t *pmdp);
413#ifdef CONFIG_TRANSPARENT_HUGEPAGE
414extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
415extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
416extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
417extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
418 pmd_t *pmdp, pmd_t pmd);
419extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
420 pmd_t *pmd);
421
422static inline int pmd_trans_huge(pmd_t pmd)
423{
424 /*
425 * leaf pte for huge page, bottom two bits != 00
426 */
427 return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
428}
429
430static inline int pmd_large(pmd_t pmd)
431{
432 /*
433 * leaf pte for huge page, bottom two bits != 00
434 */
435 if (pmd_trans_huge(pmd))
436 return pmd_val(pmd) & _PAGE_PRESENT;
437 return 0;
438}
439
440static inline int pmd_trans_splitting(pmd_t pmd)
441{
442 if (pmd_trans_huge(pmd))
443 return pmd_val(pmd) & _PAGE_SPLITTING;
444 return 0;
445}
446
447extern int has_transparent_hugepage(void);
448#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
449
450static inline pte_t pmd_pte(pmd_t pmd)
451{
452 return __pte(pmd_val(pmd));
453}
454
455static inline pmd_t pte_pmd(pte_t pte)
456{
457 return __pmd(pte_val(pte));
458}
459
460static inline pte_t *pmdp_ptep(pmd_t *pmd)
461{
462 return (pte_t *)pmd;
463}
464
465#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
466#define pmd_young(pmd) pte_young(pmd_pte(pmd))
467#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
468#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
469#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
470#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
471#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
472
473#define __HAVE_ARCH_PMD_WRITE
474#define pmd_write(pmd) pte_write(pmd_pte(pmd))
475
476static inline pmd_t pmd_mkhuge(pmd_t pmd)
477{
478 /* Do nothing, mk_pmd() does this part. */
479 return pmd;
480}
481
482static inline pmd_t pmd_mknotpresent(pmd_t pmd)
483{
484 pmd_val(pmd) &= ~_PAGE_PRESENT;
485 return pmd;
486}
487
488static inline pmd_t pmd_mksplitting(pmd_t pmd)
489{
490 pmd_val(pmd) |= _PAGE_SPLITTING;
491 return pmd;
492}
493
494#define __HAVE_ARCH_PMD_SAME
495static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
496{
497 return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
498}
499
500#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
501extern int pmdp_set_access_flags(struct vm_area_struct *vma,
502 unsigned long address, pmd_t *pmdp,
503 pmd_t entry, int dirty);
504
505extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
506 unsigned long addr,
507 pmd_t *pmdp, unsigned long clr);
508
509static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
510 unsigned long addr, pmd_t *pmdp)
511{
512 unsigned long old;
513
514 if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
515 return 0;
516 old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED);
517 return ((old & _PAGE_ACCESSED) != 0);
518}
519
520#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
521extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
522 unsigned long address, pmd_t *pmdp);
523#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
524extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
525 unsigned long address, pmd_t *pmdp);
526
527#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
528extern pmd_t pmdp_get_and_clear(struct mm_struct *mm,
529 unsigned long addr, pmd_t *pmdp);
530
531#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
532extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
533 pmd_t *pmdp);
534
535#define __HAVE_ARCH_PMDP_SET_WRPROTECT
536static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
537 pmd_t *pmdp)
538{
539
540 if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
541 return;
542
543 pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW);
544}
545
546#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
547extern void pmdp_splitting_flush(struct vm_area_struct *vma,
548 unsigned long address, pmd_t *pmdp);
549
550#define __HAVE_ARCH_PGTABLE_DEPOSIT
551extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
552 pgtable_t pgtable);
553#define __HAVE_ARCH_PGTABLE_WITHDRAW
554extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
555
556#define __HAVE_ARCH_PMDP_INVALIDATE
557extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
558 pmd_t *pmdp);
559#endif /* __ASSEMBLY__ */
381#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ 560#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 7aeb9555f6ea..7d6eacf249cf 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -198,9 +198,6 @@ extern void paging_init(void);
198 */ 198 */
199#define kern_addr_valid(addr) (1) 199#define kern_addr_valid(addr) (1)
200 200
201#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
202 remap_pfn_range(vma, vaddr, pfn, size, prot)
203
204#include <asm-generic/pgtable.h> 201#include <asm-generic/pgtable.h>
205 202
206 203
@@ -220,6 +217,12 @@ extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
220 217
221extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, 218extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
222 unsigned long end, int write, struct page **pages, int *nr); 219 unsigned long end, int write, struct page **pages, int *nr);
220#ifndef CONFIG_TRANSPARENT_HUGEPAGE
221#define pmd_large(pmd) 0
222#define has_transparent_hugepage() 0
223#endif
224pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
225 unsigned *shift);
223#endif /* __ASSEMBLY__ */ 226#endif /* __ASSEMBLY__ */
224 227
225#endif /* __KERNEL__ */ 228#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h
index 5f1e15b68704..3421637cfd7b 100644
--- a/arch/powerpc/include/asm/probes.h
+++ b/arch/powerpc/include/asm/probes.h
@@ -38,5 +38,30 @@ typedef u32 ppc_opcode_t;
38#define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) 38#define is_trap(instr) (IS_TW(instr) || IS_TWI(instr))
39#endif /* CONFIG_PPC64 */ 39#endif /* CONFIG_PPC64 */
40 40
41#ifdef CONFIG_PPC_ADV_DEBUG_REGS
42#define MSR_SINGLESTEP (MSR_DE)
43#else
44#define MSR_SINGLESTEP (MSR_SE)
45#endif
46
47/* Enable single stepping for the current task */
48static inline void enable_single_step(struct pt_regs *regs)
49{
50 regs->msr |= MSR_SINGLESTEP;
51#ifdef CONFIG_PPC_ADV_DEBUG_REGS
52 /*
53 * We turn off Critical Input Exception(CE) to ensure that the single
54 * step will be for the instruction we have the probe on; if we don't,
55 * it is possible we'd get the single step reported for CE.
56 */
57 regs->msr &= ~MSR_CE;
58 mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
59#ifdef CONFIG_PPC_47x
60 isync();
61#endif
62#endif
63}
64
65
41#endif /* __KERNEL__ */ 66#endif /* __KERNEL__ */
42#endif /* _ASM_POWERPC_PROBES_H */ 67#endif /* _ASM_POWERPC_PROBES_H */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 14a658363698..e378cccfca55 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -168,10 +168,10 @@ struct thread_struct {
168 * The following help to manage the use of Debug Control Registers 168 * The following help to manage the use of Debug Control Registers
169 * om the BookE platforms. 169 * om the BookE platforms.
170 */ 170 */
171 unsigned long dbcr0; 171 uint32_t dbcr0;
172 unsigned long dbcr1; 172 uint32_t dbcr1;
173#ifdef CONFIG_BOOKE 173#ifdef CONFIG_BOOKE
174 unsigned long dbcr2; 174 uint32_t dbcr2;
175#endif 175#endif
176 /* 176 /*
177 * The stored value of the DBSR register will be the value at the 177 * The stored value of the DBSR register will be the value at the
@@ -179,7 +179,7 @@ struct thread_struct {
179 * user (will never be written to) and has value while helping to 179 * user (will never be written to) and has value while helping to
180 * describe the reason for the last debug trap. Torez 180 * describe the reason for the last debug trap. Torez
181 */ 181 */
182 unsigned long dbsr; 182 uint32_t dbsr;
183 /* 183 /*
184 * The following will contain addresses used by debug applications 184 * The following will contain addresses used by debug applications
185 * to help trace and trap on particular address locations. 185 * to help trace and trap on particular address locations.
@@ -200,7 +200,7 @@ struct thread_struct {
200#endif 200#endif
201#endif 201#endif
202 /* FP and VSX 0-31 register set */ 202 /* FP and VSX 0-31 register set */
203 double fpr[32][TS_FPRWIDTH]; 203 double fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
204 struct { 204 struct {
205 205
206 unsigned int pad; 206 unsigned int pad;
@@ -247,6 +247,10 @@ struct thread_struct {
247 unsigned long tm_orig_msr; /* Thread's MSR on ctx switch */ 247 unsigned long tm_orig_msr; /* Thread's MSR on ctx switch */
248 struct pt_regs ckpt_regs; /* Checkpointed registers */ 248 struct pt_regs ckpt_regs; /* Checkpointed registers */
249 249
250 unsigned long tm_tar;
251 unsigned long tm_ppr;
252 unsigned long tm_dscr;
253
250 /* 254 /*
251 * Transactional FP and VSX 0-31 register set. 255 * Transactional FP and VSX 0-31 register set.
252 * NOTE: the sense of these is the opposite of the integer ckpt_regs! 256 * NOTE: the sense of these is the opposite of the integer ckpt_regs!
@@ -287,9 +291,9 @@ struct thread_struct {
287 unsigned long siar; 291 unsigned long siar;
288 unsigned long sdar; 292 unsigned long sdar;
289 unsigned long sier; 293 unsigned long sier;
290 unsigned long mmcr0;
291 unsigned long mmcr2; 294 unsigned long mmcr2;
292 unsigned long mmcra; 295 unsigned mmcr0;
296 unsigned used_ebb;
293#endif 297#endif
294}; 298};
295 299
@@ -404,9 +408,7 @@ static inline void prefetchw(const void *x)
404 408
405#define spin_lock_prefetch(x) prefetchw(x) 409#define spin_lock_prefetch(x) prefetchw(x)
406 410
407#ifdef CONFIG_PPC64
408#define HAVE_ARCH_PICK_MMAP_LAYOUT 411#define HAVE_ARCH_PICK_MMAP_LAYOUT
409#endif
410 412
411#ifdef CONFIG_PPC64 413#ifdef CONFIG_PPC64
412static inline unsigned long get_clean_sp(unsigned long sp, int is_32) 414static inline unsigned long get_clean_sp(unsigned long sp, int is_32)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 4a9e408644fe..99222e27f173 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -254,19 +254,28 @@
254#define SPRN_HRMOR 0x139 /* Real mode offset register */ 254#define SPRN_HRMOR 0x139 /* Real mode offset register */
255#define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ 255#define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */
256#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ 256#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
257/* HFSCR and FSCR bit numbers are the same */
258#define FSCR_TAR_LG 8 /* Enable Target Address Register */
259#define FSCR_EBB_LG 7 /* Enable Event Based Branching */
260#define FSCR_TM_LG 5 /* Enable Transactional Memory */
261#define FSCR_PM_LG 4 /* Enable prob/priv access to PMU SPRs */
262#define FSCR_BHRB_LG 3 /* Enable Branch History Rolling Buffer*/
263#define FSCR_DSCR_LG 2 /* Enable Data Stream Control Register */
264#define FSCR_VECVSX_LG 1 /* Enable VMX/VSX */
265#define FSCR_FP_LG 0 /* Enable Floating Point */
257#define SPRN_FSCR 0x099 /* Facility Status & Control Register */ 266#define SPRN_FSCR 0x099 /* Facility Status & Control Register */
258#define FSCR_TAR (1 << (63-55)) /* Enable Target Address Register */ 267#define FSCR_TAR __MASK(FSCR_TAR_LG)
259#define FSCR_EBB (1 << (63-56)) /* Enable Event Based Branching */ 268#define FSCR_EBB __MASK(FSCR_EBB_LG)
260#define FSCR_DSCR (1 << (63-61)) /* Enable Data Stream Control Register */ 269#define FSCR_DSCR __MASK(FSCR_DSCR_LG)
261#define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ 270#define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */
262#define HFSCR_TAR (1 << (63-55)) /* Enable Target Address Register */ 271#define HFSCR_TAR __MASK(FSCR_TAR_LG)
263#define HFSCR_EBB (1 << (63-56)) /* Enable Event Based Branching */ 272#define HFSCR_EBB __MASK(FSCR_EBB_LG)
264#define HFSCR_TM (1 << (63-58)) /* Enable Transactional Memory */ 273#define HFSCR_TM __MASK(FSCR_TM_LG)
265#define HFSCR_PM (1 << (63-60)) /* Enable prob/priv access to PMU SPRs */ 274#define HFSCR_PM __MASK(FSCR_PM_LG)
266#define HFSCR_BHRB (1 << (63-59)) /* Enable Branch History Rolling Buffer*/ 275#define HFSCR_BHRB __MASK(FSCR_BHRB_LG)
267#define HFSCR_DSCR (1 << (63-61)) /* Enable Data Stream Control Register */ 276#define HFSCR_DSCR __MASK(FSCR_DSCR_LG)
268#define HFSCR_VECVSX (1 << (63-62)) /* Enable VMX/VSX */ 277#define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG)
269#define HFSCR_FP (1 << (63-63)) /* Enable Floating Point */ 278#define HFSCR_FP __MASK(FSCR_FP_LG)
270#define SPRN_TAR 0x32f /* Target Address Register */ 279#define SPRN_TAR 0x32f /* Target Address Register */
271#define SPRN_LPCR 0x13E /* LPAR Control Register */ 280#define SPRN_LPCR 0x13E /* LPAR Control Register */
272#define LPCR_VPM0 (1ul << (63-0)) 281#define LPCR_VPM0 (1ul << (63-0))
@@ -621,11 +630,15 @@
621#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ 630#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */
622#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ 631#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */
623#define MMCR0_TBEE 0x00400000UL /* time base exception enable */ 632#define MMCR0_TBEE 0x00400000UL /* time base exception enable */
633#define MMCR0_EBE 0x00100000UL /* Event based branch enable */
634#define MMCR0_PMCC 0x000c0000UL /* PMC control */
635#define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */
624#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ 636#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/
625#define MMCR0_PMCjCE 0x00004000UL /* PMCj count enable*/ 637#define MMCR0_PMCjCE 0x00004000UL /* PMCj count enable*/
626#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ 638#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */
627#define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */ 639#define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */
628#define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */ 640#define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */
641#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */
629#define MMCR0_FCTI 0x00000008UL /* freeze counters in tags inactive mode */ 642#define MMCR0_FCTI 0x00000008UL /* freeze counters in tags inactive mode */
630#define MMCR0_FCTA 0x00000004UL /* freeze counters in tags active mode */ 643#define MMCR0_FCTA 0x00000004UL /* freeze counters in tags active mode */
631#define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */ 644#define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */
@@ -673,6 +686,11 @@
673#define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ 686#define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */
674#define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ 687#define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */
675 688
689/* When EBB is enabled, some of MMCR0/MMCR2/SIER are user accessible */
690#define MMCR0_USER_MASK (MMCR0_FC | MMCR0_PMXE | MMCR0_PMAO)
691#define MMCR2_USER_MASK 0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */
692#define SIER_USER_MASK 0x7fffffUL
693
676#define SPRN_PA6T_MMCR0 795 694#define SPRN_PA6T_MMCR0 795
677#define PA6T_MMCR0_EN0 0x0000000000000001UL 695#define PA6T_MMCR0_EN0 0x0000000000000001UL
678#define PA6T_MMCR0_EN1 0x0000000000000002UL 696#define PA6T_MMCR0_EN1 0x0000000000000002UL
@@ -1079,7 +1097,8 @@
1079#define PVR_970MP 0x0044 1097#define PVR_970MP 0x0044
1080#define PVR_970GX 0x0045 1098#define PVR_970GX 0x0045
1081#define PVR_POWER7p 0x004A 1099#define PVR_POWER7p 0x004A
1082#define PVR_POWER8 0x004B 1100#define PVR_POWER8E 0x004B
1101#define PVR_POWER8 0x004D
1083#define PVR_BE 0x0070 1102#define PVR_BE 0x0070
1084#define PVR_PA6T 0x0090 1103#define PVR_PA6T 0x0090
1085 1104
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 34fd70488d83..c7a8bfc9f6f5 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -350,8 +350,8 @@ static inline u32 rtas_config_addr(int busno, int devfn, int reg)
350 (devfn << 8) | (reg & 0xff); 350 (devfn << 8) | (reg & 0xff);
351} 351}
352 352
353extern void __cpuinit rtas_give_timebase(void); 353extern void rtas_give_timebase(void);
354extern void __cpuinit rtas_take_timebase(void); 354extern void rtas_take_timebase(void);
355 355
356#ifdef CONFIG_PPC_RTAS 356#ifdef CONFIG_PPC_RTAS
357static inline int page_is_rtas_user_buf(unsigned long pfn) 357static inline int page_is_rtas_user_buf(unsigned long pfn)
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index ffbaabebcdca..48cfc858abd6 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -145,6 +145,10 @@ extern void __cpu_die(unsigned int cpu);
145#define smp_setup_cpu_maps() 145#define smp_setup_cpu_maps()
146static inline void inhibit_secondary_onlining(void) {} 146static inline void inhibit_secondary_onlining(void) {}
147static inline void uninhibit_secondary_onlining(void) {} 147static inline void uninhibit_secondary_onlining(void) {}
148static inline const struct cpumask *cpu_sibling_mask(int cpu)
149{
150 return cpumask_of(cpu);
151}
148 152
149#endif /* CONFIG_SMP */ 153#endif /* CONFIG_SMP */
150 154
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 200d763a0a67..294c2cedcf7a 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -15,6 +15,15 @@ extern struct task_struct *__switch_to(struct task_struct *,
15struct thread_struct; 15struct thread_struct;
16extern struct task_struct *_switch(struct thread_struct *prev, 16extern struct task_struct *_switch(struct thread_struct *prev,
17 struct thread_struct *next); 17 struct thread_struct *next);
18#ifdef CONFIG_PPC_BOOK3S_64
19static inline void save_tar(struct thread_struct *prev)
20{
21 if (cpu_has_feature(CPU_FTR_ARCH_207S))
22 prev->tar = mfspr(SPRN_TAR);
23}
24#else
25static inline void save_tar(struct thread_struct *prev) {}
26#endif
18 27
19extern void giveup_fpu(struct task_struct *); 28extern void giveup_fpu(struct task_struct *);
20extern void load_up_fpu(void); 29extern void load_up_fpu(void);
@@ -67,4 +76,18 @@ static inline void flush_spe_to_thread(struct task_struct *t)
67} 76}
68#endif 77#endif
69 78
79static inline void clear_task_ebb(struct task_struct *t)
80{
81#ifdef CONFIG_PPC_BOOK3S_64
82 /* EBB perf events are not inherited, so clear all EBB state. */
83 t->thread.bescr = 0;
84 t->thread.mmcr2 = 0;
85 t->thread.mmcr0 = 0;
86 t->thread.siar = 0;
87 t->thread.sdar = 0;
88 t->thread.sier = 0;
89 t->thread.used_ebb = 0;
90#endif
91}
92
70#endif /* _ASM_POWERPC_SWITCH_TO_H */ 93#endif /* _ASM_POWERPC_SWITCH_TO_H */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 61a59271665b..2def01ed0cb2 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -165,7 +165,8 @@ static inline void flush_tlb_kernel_range(unsigned long start,
165/* Private function for use by PCI IO mapping code */ 165/* Private function for use by PCI IO mapping code */
166extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, 166extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
167 unsigned long end); 167 unsigned long end);
168 168extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
169 unsigned long addr);
169#else 170#else
170#error Unsupported MMU type 171#error Unsupported MMU type
171#endif 172#endif
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 4db49590acf5..9485b43a7c00 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -178,7 +178,7 @@ do { \
178 long __pu_err; \ 178 long __pu_err; \
179 __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ 179 __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
180 if (!is_kernel_addr((unsigned long)__pu_addr)) \ 180 if (!is_kernel_addr((unsigned long)__pu_addr)) \
181 might_sleep(); \ 181 might_fault(); \
182 __chk_user_ptr(ptr); \ 182 __chk_user_ptr(ptr); \
183 __put_user_size((x), __pu_addr, (size), __pu_err); \ 183 __put_user_size((x), __pu_addr, (size), __pu_err); \
184 __pu_err; \ 184 __pu_err; \
@@ -188,7 +188,7 @@ do { \
188({ \ 188({ \
189 long __pu_err = -EFAULT; \ 189 long __pu_err = -EFAULT; \
190 __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ 190 __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
191 might_sleep(); \ 191 might_fault(); \
192 if (access_ok(VERIFY_WRITE, __pu_addr, size)) \ 192 if (access_ok(VERIFY_WRITE, __pu_addr, size)) \
193 __put_user_size((x), __pu_addr, (size), __pu_err); \ 193 __put_user_size((x), __pu_addr, (size), __pu_err); \
194 __pu_err; \ 194 __pu_err; \
@@ -268,7 +268,7 @@ do { \
268 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 268 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
269 __chk_user_ptr(ptr); \ 269 __chk_user_ptr(ptr); \
270 if (!is_kernel_addr((unsigned long)__gu_addr)) \ 270 if (!is_kernel_addr((unsigned long)__gu_addr)) \
271 might_sleep(); \ 271 might_fault(); \
272 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ 272 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
273 (x) = (__typeof__(*(ptr)))__gu_val; \ 273 (x) = (__typeof__(*(ptr)))__gu_val; \
274 __gu_err; \ 274 __gu_err; \
@@ -282,7 +282,7 @@ do { \
282 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 282 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
283 __chk_user_ptr(ptr); \ 283 __chk_user_ptr(ptr); \
284 if (!is_kernel_addr((unsigned long)__gu_addr)) \ 284 if (!is_kernel_addr((unsigned long)__gu_addr)) \
285 might_sleep(); \ 285 might_fault(); \
286 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ 286 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
287 (x) = (__typeof__(*(ptr)))__gu_val; \ 287 (x) = (__typeof__(*(ptr)))__gu_val; \
288 __gu_err; \ 288 __gu_err; \
@@ -294,7 +294,7 @@ do { \
294 long __gu_err = -EFAULT; \ 294 long __gu_err = -EFAULT; \
295 unsigned long __gu_val = 0; \ 295 unsigned long __gu_val = 0; \
296 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 296 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
297 might_sleep(); \ 297 might_fault(); \
298 if (access_ok(VERIFY_READ, __gu_addr, (size))) \ 298 if (access_ok(VERIFY_READ, __gu_addr, (size))) \
299 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ 299 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
300 (x) = (__typeof__(*(ptr)))__gu_val; \ 300 (x) = (__typeof__(*(ptr)))__gu_val; \
@@ -419,14 +419,14 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to,
419static inline unsigned long __copy_from_user(void *to, 419static inline unsigned long __copy_from_user(void *to,
420 const void __user *from, unsigned long size) 420 const void __user *from, unsigned long size)
421{ 421{
422 might_sleep(); 422 might_fault();
423 return __copy_from_user_inatomic(to, from, size); 423 return __copy_from_user_inatomic(to, from, size);
424} 424}
425 425
426static inline unsigned long __copy_to_user(void __user *to, 426static inline unsigned long __copy_to_user(void __user *to,
427 const void *from, unsigned long size) 427 const void *from, unsigned long size)
428{ 428{
429 might_sleep(); 429 might_fault();
430 return __copy_to_user_inatomic(to, from, size); 430 return __copy_to_user_inatomic(to, from, size);
431} 431}
432 432
@@ -434,7 +434,7 @@ extern unsigned long __clear_user(void __user *addr, unsigned long size);
434 434
435static inline unsigned long clear_user(void __user *addr, unsigned long size) 435static inline unsigned long clear_user(void __user *addr, unsigned long size)
436{ 436{
437 might_sleep(); 437 might_fault();
438 if (likely(access_ok(VERIFY_WRITE, addr, size))) 438 if (likely(access_ok(VERIFY_WRITE, addr, size)))
439 return __clear_user(addr, size); 439 return __clear_user(addr, size);
440 if ((unsigned long)addr < TASK_SIZE) { 440 if ((unsigned long)addr < TASK_SIZE) {
diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
index 50f261bc3e95..0d9cecddf8a4 100644
--- a/arch/powerpc/include/asm/vdso.h
+++ b/arch/powerpc/include/asm/vdso.h
@@ -22,7 +22,7 @@ extern unsigned long vdso64_rt_sigtramp;
22extern unsigned long vdso32_sigtramp; 22extern unsigned long vdso32_sigtramp;
23extern unsigned long vdso32_rt_sigtramp; 23extern unsigned long vdso32_rt_sigtramp;
24 24
25int __cpuinit vdso_getcpu_init(void); 25int vdso_getcpu_init(void);
26 26
27#else /* __ASSEMBLY__ */ 27#else /* __ASSEMBLY__ */
28 28
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 5182c8622b54..48be855ef37b 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -20,6 +20,7 @@ header-y += mman.h
20header-y += msgbuf.h 20header-y += msgbuf.h
21header-y += nvram.h 21header-y += nvram.h
22header-y += param.h 22header-y += param.h
23header-y += perf_event.h
23header-y += poll.h 24header-y += poll.h
24header-y += posix_types.h 25header-y += posix_types.h
25header-y += ps3fb.h 26header-y += ps3fb.h
diff --git a/arch/powerpc/include/uapi/asm/perf_event.h b/arch/powerpc/include/uapi/asm/perf_event.h
new file mode 100644
index 000000000000..80a4d40cf5bc
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_event.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright 2013 Michael Ellerman, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; version 2 of the
7 * License.
8 */
9
10#ifndef _UAPI_ASM_POWERPC_PERF_EVENT_H
11#define _UAPI_ASM_POWERPC_PERF_EVENT_H
12
13/*
14 * We use bit 63 of perf_event_attr.config as a flag to request EBB.
15 */
16#define PERF_EVENT_CONFIG_EBB_SHIFT 63
17
18#endif /* _UAPI_ASM_POWERPC_PERF_EVENT_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index a36daf3c6f9a..a6d74467c9ed 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -81,4 +81,6 @@
81 81
82#define SO_SELECT_ERR_QUEUE 45 82#define SO_SELECT_ERR_QUEUE 45
83 83
84#define SO_BUSY_POLL 46
85
84#endif /* _ASM_POWERPC_SOCKET_H */ 86#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index f960a7944553..a8619bfe879e 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -58,6 +58,8 @@ obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
58obj-$(CONFIG_LPARCFG) += lparcfg.o 58obj-$(CONFIG_LPARCFG) += lparcfg.o
59obj-$(CONFIG_IBMVIO) += vio.o 59obj-$(CONFIG_IBMVIO) += vio.o
60obj-$(CONFIG_IBMEBUS) += ibmebus.o 60obj-$(CONFIG_IBMEBUS) += ibmebus.o
61obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
62 eeh_driver.o eeh_event.o eeh_sysfs.o
61obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o 63obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
62obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 64obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
63obj-$(CONFIG_FA_DUMP) += fadump.o 65obj-$(CONFIG_FA_DUMP) += fadump.o
@@ -100,7 +102,7 @@ obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
100obj-$(CONFIG_STACKTRACE) += stacktrace.o 102obj-$(CONFIG_STACKTRACE) += stacktrace.o
101obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o 103obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
102 104
103pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o 105pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
104obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ 106obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
105 pci-common.o pci_of_scan.o 107 pci-common.o pci_of_scan.o
106obj-$(CONFIG_PCI_MSI) += msi.o 108obj-$(CONFIG_PCI_MSI) += msi.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6f16ffafa6f0..8207459efe56 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -105,9 +105,6 @@ int main(void)
105 DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); 105 DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
106#else /* CONFIG_PPC64 */ 106#else /* CONFIG_PPC64 */
107 DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); 107 DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
108#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
109 DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
110#endif
111#ifdef CONFIG_SPE 108#ifdef CONFIG_SPE
112 DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); 109 DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0]));
113 DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc)); 110 DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc));
@@ -115,6 +112,9 @@ int main(void)
115 DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); 112 DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
116#endif /* CONFIG_SPE */ 113#endif /* CONFIG_SPE */
117#endif /* CONFIG_PPC64 */ 114#endif /* CONFIG_PPC64 */
115#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
116 DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
117#endif
118#ifdef CONFIG_KVM_BOOK3S_32_HANDLER 118#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
119 DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); 119 DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
120#endif 120#endif
@@ -132,13 +132,15 @@ int main(void)
132 DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier)); 132 DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier));
133 DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0)); 133 DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0));
134 DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2)); 134 DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2));
135 DEFINE(THREAD_MMCRA, offsetof(struct thread_struct, mmcra));
136#endif 135#endif
137#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 136#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
138 DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); 137 DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch));
139 DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); 138 DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar));
140 DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr)); 139 DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr));
141 DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar)); 140 DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar));
141 DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar));
142 DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
143 DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
142 DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); 144 DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
143 DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct, 145 DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct,
144 transact_vr[0])); 146 transact_vr[0]));
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 92c6b008dd2b..9262cf2bec4b 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -131,7 +131,8 @@ static const char *cache_type_string(const struct cache *cache)
131 return cache_type_info[cache->type].name; 131 return cache_type_info[cache->type].name;
132} 132}
133 133
134static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode) 134static void cache_init(struct cache *cache, int type, int level,
135 struct device_node *ofnode)
135{ 136{
136 cache->type = type; 137 cache->type = type;
137 cache->level = level; 138 cache->level = level;
@@ -140,7 +141,7 @@ static void __cpuinit cache_init(struct cache *cache, int type, int level, struc
140 list_add(&cache->list, &cache_list); 141 list_add(&cache->list, &cache_list);
141} 142}
142 143
143static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode) 144static struct cache *new_cache(int type, int level, struct device_node *ofnode)
144{ 145{
145 struct cache *cache; 146 struct cache *cache;
146 147
@@ -324,7 +325,8 @@ static bool cache_node_is_unified(const struct device_node *np)
324 return of_get_property(np, "cache-unified", NULL); 325 return of_get_property(np, "cache-unified", NULL);
325} 326}
326 327
327static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level) 328static struct cache *cache_do_one_devnode_unified(struct device_node *node,
329 int level)
328{ 330{
329 struct cache *cache; 331 struct cache *cache;
330 332
@@ -335,7 +337,8 @@ static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *
335 return cache; 337 return cache;
336} 338}
337 339
338static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level) 340static struct cache *cache_do_one_devnode_split(struct device_node *node,
341 int level)
339{ 342{
340 struct cache *dcache, *icache; 343 struct cache *dcache, *icache;
341 344
@@ -357,7 +360,7 @@ err:
357 return NULL; 360 return NULL;
358} 361}
359 362
360static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level) 363static struct cache *cache_do_one_devnode(struct device_node *node, int level)
361{ 364{
362 struct cache *cache; 365 struct cache *cache;
363 366
@@ -369,7 +372,8 @@ static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, in
369 return cache; 372 return cache;
370} 373}
371 374
372static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level) 375static struct cache *cache_lookup_or_instantiate(struct device_node *node,
376 int level)
373{ 377{
374 struct cache *cache; 378 struct cache *cache;
375 379
@@ -385,7 +389,7 @@ static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *n
385 return cache; 389 return cache;
386} 390}
387 391
388static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger) 392static void link_cache_lists(struct cache *smaller, struct cache *bigger)
389{ 393{
390 while (smaller->next_local) { 394 while (smaller->next_local) {
391 if (smaller->next_local == bigger) 395 if (smaller->next_local == bigger)
@@ -396,13 +400,13 @@ static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigg
396 smaller->next_local = bigger; 400 smaller->next_local = bigger;
397} 401}
398 402
399static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache) 403static void do_subsidiary_caches_debugcheck(struct cache *cache)
400{ 404{
401 WARN_ON_ONCE(cache->level != 1); 405 WARN_ON_ONCE(cache->level != 1);
402 WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu")); 406 WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
403} 407}
404 408
405static void __cpuinit do_subsidiary_caches(struct cache *cache) 409static void do_subsidiary_caches(struct cache *cache)
406{ 410{
407 struct device_node *subcache_node; 411 struct device_node *subcache_node;
408 int level = cache->level; 412 int level = cache->level;
@@ -423,7 +427,7 @@ static void __cpuinit do_subsidiary_caches(struct cache *cache)
423 } 427 }
424} 428}
425 429
426static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id) 430static struct cache *cache_chain_instantiate(unsigned int cpu_id)
427{ 431{
428 struct device_node *cpu_node; 432 struct device_node *cpu_node;
429 struct cache *cpu_cache = NULL; 433 struct cache *cpu_cache = NULL;
@@ -448,7 +452,7 @@ out:
448 return cpu_cache; 452 return cpu_cache;
449} 453}
450 454
451static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id) 455static struct cache_dir *cacheinfo_create_cache_dir(unsigned int cpu_id)
452{ 456{
453 struct cache_dir *cache_dir; 457 struct cache_dir *cache_dir;
454 struct device *dev; 458 struct device *dev;
@@ -653,7 +657,7 @@ static struct kobj_type cache_index_type = {
653 .default_attrs = cache_index_default_attrs, 657 .default_attrs = cache_index_default_attrs,
654}; 658};
655 659
656static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) 660static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
657{ 661{
658 const char *cache_name; 662 const char *cache_name;
659 const char *cache_type; 663 const char *cache_type;
@@ -696,7 +700,8 @@ static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *d
696 kfree(buf); 700 kfree(buf);
697} 701}
698 702
699static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir) 703static void cacheinfo_create_index_dir(struct cache *cache, int index,
704 struct cache_dir *cache_dir)
700{ 705{
701 struct cache_index_dir *index_dir; 706 struct cache_index_dir *index_dir;
702 int rc; 707 int rc;
@@ -722,7 +727,8 @@ err:
722 kfree(index_dir); 727 kfree(index_dir);
723} 728}
724 729
725static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list) 730static void cacheinfo_sysfs_populate(unsigned int cpu_id,
731 struct cache *cache_list)
726{ 732{
727 struct cache_dir *cache_dir; 733 struct cache_dir *cache_dir;
728 struct cache *cache; 734 struct cache *cache;
@@ -740,7 +746,7 @@ static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache
740 } 746 }
741} 747}
742 748
743void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id) 749void cacheinfo_cpu_online(unsigned int cpu_id)
744{ 750{
745 struct cache *cache; 751 struct cache *cache;
746 752
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 2a45d0f04385..22973a74df73 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -494,9 +494,27 @@ static struct cpu_spec __initdata cpu_specs[] = {
494 .cpu_restore = __restore_cpu_power7, 494 .cpu_restore = __restore_cpu_power7,
495 .platform = "power7+", 495 .platform = "power7+",
496 }, 496 },
497 { /* Power8 */ 497 { /* Power8E */
498 .pvr_mask = 0xffff0000, 498 .pvr_mask = 0xffff0000,
499 .pvr_value = 0x004b0000, 499 .pvr_value = 0x004b0000,
500 .cpu_name = "POWER8E (raw)",
501 .cpu_features = CPU_FTRS_POWER8,
502 .cpu_user_features = COMMON_USER_POWER8,
503 .cpu_user_features2 = COMMON_USER2_POWER8,
504 .mmu_features = MMU_FTRS_POWER8,
505 .icache_bsize = 128,
506 .dcache_bsize = 128,
507 .num_pmcs = 6,
508 .pmc_type = PPC_PMC_IBM,
509 .oprofile_cpu_type = "ppc64/power8",
510 .oprofile_type = PPC_OPROFILE_INVALID,
511 .cpu_setup = __setup_cpu_power8,
512 .cpu_restore = __restore_cpu_power8,
513 .platform = "power8",
514 },
515 { /* Power8 */
516 .pvr_mask = 0xffff0000,
517 .pvr_value = 0x004d0000,
500 .cpu_name = "POWER8 (raw)", 518 .cpu_name = "POWER8 (raw)",
501 .cpu_features = CPU_FTRS_POWER8, 519 .cpu_features = CPU_FTRS_POWER8,
502 .cpu_user_features = COMMON_USER_POWER8, 520 .cpu_user_features = COMMON_USER_POWER8,
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 9ec3fe174cba..779a78c26435 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -69,16 +69,6 @@ void __init setup_kdump_trampoline(void)
69} 69}
70#endif /* CONFIG_NONSTATIC_KERNEL */ 70#endif /* CONFIG_NONSTATIC_KERNEL */
71 71
72static int __init parse_savemaxmem(char *p)
73{
74 if (p)
75 saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1;
76
77 return 1;
78}
79__setup("savemaxmem=", parse_savemaxmem);
80
81
82static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize, 72static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize,
83 unsigned long offset, int userbuf) 73 unsigned long offset, int userbuf)
84{ 74{
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/kernel/eeh.c
index 6b73d6c44f51..55593ee2d5aa 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -103,11 +103,8 @@ EXPORT_SYMBOL(eeh_subsystem_enabled);
103 */ 103 */
104int eeh_probe_mode; 104int eeh_probe_mode;
105 105
106/* Global EEH mutex */
107DEFINE_MUTEX(eeh_mutex);
108
109/* Lock to avoid races due to multiple reports of an error */ 106/* Lock to avoid races due to multiple reports of an error */
110static DEFINE_RAW_SPINLOCK(confirm_error_lock); 107DEFINE_RAW_SPINLOCK(confirm_error_lock);
111 108
112/* Buffer for reporting pci register dumps. Its here in BSS, and 109/* Buffer for reporting pci register dumps. Its here in BSS, and
113 * not dynamically alloced, so that it ends up in RMO where RTAS 110 * not dynamically alloced, so that it ends up in RMO where RTAS
@@ -234,17 +231,31 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
234void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 231void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
235{ 232{
236 size_t loglen = 0; 233 size_t loglen = 0;
237 struct eeh_dev *edev; 234 struct eeh_dev *edev, *tmp;
238 235 bool valid_cfg_log = true;
239 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
240 eeh_ops->configure_bridge(pe);
241 eeh_pe_restore_bars(pe);
242 236
243 pci_regs_buf[0] = 0; 237 /*
244 eeh_pe_for_each_dev(pe, edev) { 238 * When the PHB is fenced or dead, it's pointless to collect
245 loglen += eeh_gather_pci_data(edev, pci_regs_buf, 239 * the data from PCI config space because it should return
246 EEH_PCI_REGS_LOG_LEN); 240 * 0xFF's. For ER, we still retrieve the data from the PCI
247 } 241 * config space.
242 */
243 if (eeh_probe_mode_dev() &&
244 (pe->type & EEH_PE_PHB) &&
245 (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)))
246 valid_cfg_log = false;
247
248 if (valid_cfg_log) {
249 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
250 eeh_ops->configure_bridge(pe);
251 eeh_pe_restore_bars(pe);
252
253 pci_regs_buf[0] = 0;
254 eeh_pe_for_each_dev(pe, edev, tmp) {
255 loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
256 EEH_PCI_REGS_LOG_LEN - loglen);
257 }
258 }
248 259
249 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 260 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
250} 261}
@@ -260,15 +271,74 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
260{ 271{
261 pte_t *ptep; 272 pte_t *ptep;
262 unsigned long pa; 273 unsigned long pa;
274 int hugepage_shift;
263 275
264 ptep = find_linux_pte(init_mm.pgd, token); 276 /*
277 * We won't find hugepages here, iomem
278 */
279 ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
265 if (!ptep) 280 if (!ptep)
266 return token; 281 return token;
282 WARN_ON(hugepage_shift);
267 pa = pte_pfn(*ptep) << PAGE_SHIFT; 283 pa = pte_pfn(*ptep) << PAGE_SHIFT;
268 284
269 return pa | (token & (PAGE_SIZE-1)); 285 return pa | (token & (PAGE_SIZE-1));
270} 286}
271 287
288/*
289 * On PowerNV platform, we might already have fenced PHB there.
290 * For that case, it's meaningless to recover frozen PE. Intead,
291 * We have to handle fenced PHB firstly.
292 */
293static int eeh_phb_check_failure(struct eeh_pe *pe)
294{
295 struct eeh_pe *phb_pe;
296 unsigned long flags;
297 int ret;
298
299 if (!eeh_probe_mode_dev())
300 return -EPERM;
301
302 /* Find the PHB PE */
303 phb_pe = eeh_phb_pe_get(pe->phb);
304 if (!phb_pe) {
305 pr_warning("%s Can't find PE for PHB#%d\n",
306 __func__, pe->phb->global_number);
307 return -EEXIST;
308 }
309
310 /* If the PHB has been in problematic state */
311 eeh_serialize_lock(&flags);
312 if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) {
313 ret = 0;
314 goto out;
315 }
316
317 /* Check PHB state */
318 ret = eeh_ops->get_state(phb_pe, NULL);
319 if ((ret < 0) ||
320 (ret == EEH_STATE_NOT_SUPPORT) ||
321 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
322 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
323 ret = 0;
324 goto out;
325 }
326
327 /* Isolate the PHB and send event */
328 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
329 eeh_serialize_unlock(flags);
330 eeh_send_failure_event(phb_pe);
331
332 pr_err("EEH: PHB#%x failure detected\n",
333 phb_pe->phb->global_number);
334 dump_stack();
335
336 return 1;
337out:
338 eeh_serialize_unlock(flags);
339 return ret;
340}
341
272/** 342/**
273 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 343 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
274 * @edev: eeh device 344 * @edev: eeh device
@@ -319,13 +389,21 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
319 return 0; 389 return 0;
320 } 390 }
321 391
392 /*
393 * On PowerNV platform, we might already have fenced PHB
394 * there and we need take care of that firstly.
395 */
396 ret = eeh_phb_check_failure(pe);
397 if (ret > 0)
398 return ret;
399
322 /* If we already have a pending isolation event for this 400 /* If we already have a pending isolation event for this
323 * slot, we know it's bad already, we don't need to check. 401 * slot, we know it's bad already, we don't need to check.
324 * Do this checking under a lock; as multiple PCI devices 402 * Do this checking under a lock; as multiple PCI devices
325 * in one slot might report errors simultaneously, and we 403 * in one slot might report errors simultaneously, and we
326 * only want one error recovery routine running. 404 * only want one error recovery routine running.
327 */ 405 */
328 raw_spin_lock_irqsave(&confirm_error_lock, flags); 406 eeh_serialize_lock(&flags);
329 rc = 1; 407 rc = 1;
330 if (pe->state & EEH_PE_ISOLATED) { 408 if (pe->state & EEH_PE_ISOLATED) {
331 pe->check_count++; 409 pe->check_count++;
@@ -368,13 +446,13 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
368 } 446 }
369 447
370 eeh_stats.slot_resets++; 448 eeh_stats.slot_resets++;
371 449
372 /* Avoid repeated reports of this failure, including problems 450 /* Avoid repeated reports of this failure, including problems
373 * with other functions on this device, and functions under 451 * with other functions on this device, and functions under
374 * bridges. 452 * bridges.
375 */ 453 */
376 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 454 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
377 raw_spin_unlock_irqrestore(&confirm_error_lock, flags); 455 eeh_serialize_unlock(flags);
378 456
379 eeh_send_failure_event(pe); 457 eeh_send_failure_event(pe);
380 458
@@ -382,11 +460,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
382 * a stack trace will help the device-driver authors figure 460 * a stack trace will help the device-driver authors figure
383 * out what happened. So print that out. 461 * out what happened. So print that out.
384 */ 462 */
385 WARN(1, "EEH: failure detected\n"); 463 pr_err("EEH: Frozen PE#%x detected on PHB#%x\n",
464 pe->addr, pe->phb->global_number);
465 dump_stack();
466
386 return 1; 467 return 1;
387 468
388dn_unlock: 469dn_unlock:
389 raw_spin_unlock_irqrestore(&confirm_error_lock, flags); 470 eeh_serialize_unlock(flags);
390 return rc; 471 return rc;
391} 472}
392 473
@@ -418,8 +499,6 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
418 } 499 }
419 500
420 eeh_dev_check_failure(edev); 501 eeh_dev_check_failure(edev);
421
422 pci_dev_put(eeh_dev_to_pci_dev(edev));
423 return val; 502 return val;
424} 503}
425 504
@@ -525,7 +604,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe)
525 * or a fundamental reset (3). 604 * or a fundamental reset (3).
526 * A fundamental reset required by any device under 605 * A fundamental reset required by any device under
527 * Partitionable Endpoint trumps hot-reset. 606 * Partitionable Endpoint trumps hot-reset.
528 */ 607 */
529 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 608 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
530 609
531 if (freset) 610 if (freset)
@@ -538,8 +617,8 @@ static void eeh_reset_pe_once(struct eeh_pe *pe)
538 */ 617 */
539#define PCI_BUS_RST_HOLD_TIME_MSEC 250 618#define PCI_BUS_RST_HOLD_TIME_MSEC 250
540 msleep(PCI_BUS_RST_HOLD_TIME_MSEC); 619 msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
541 620
542 /* We might get hit with another EEH freeze as soon as the 621 /* We might get hit with another EEH freeze as soon as the
543 * pci slot reset line is dropped. Make sure we don't miss 622 * pci slot reset line is dropped. Make sure we don't miss
544 * these, and clear the flag now. 623 * these, and clear the flag now.
545 */ 624 */
@@ -565,6 +644,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe)
565 */ 644 */
566int eeh_reset_pe(struct eeh_pe *pe) 645int eeh_reset_pe(struct eeh_pe *pe)
567{ 646{
647 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
568 int i, rc; 648 int i, rc;
569 649
570 /* Take three shots at resetting the bus */ 650 /* Take three shots at resetting the bus */
@@ -572,7 +652,7 @@ int eeh_reset_pe(struct eeh_pe *pe)
572 eeh_reset_pe_once(pe); 652 eeh_reset_pe_once(pe);
573 653
574 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 654 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
575 if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) 655 if ((rc & flags) == flags)
576 return 0; 656 return 0;
577 657
578 if (rc < 0) { 658 if (rc < 0) {
@@ -604,7 +684,7 @@ void eeh_save_bars(struct eeh_dev *edev)
604 if (!edev) 684 if (!edev)
605 return; 685 return;
606 dn = eeh_dev_to_of_node(edev); 686 dn = eeh_dev_to_of_node(edev);
607 687
608 for (i = 0; i < 16; i++) 688 for (i = 0; i < 16; i++)
609 eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); 689 eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
610} 690}
@@ -674,11 +754,21 @@ int __exit eeh_ops_unregister(const char *name)
674 * Even if force-off is set, the EEH hardware is still enabled, so that 754 * Even if force-off is set, the EEH hardware is still enabled, so that
675 * newer systems can boot. 755 * newer systems can boot.
676 */ 756 */
677static int __init eeh_init(void) 757int eeh_init(void)
678{ 758{
679 struct pci_controller *hose, *tmp; 759 struct pci_controller *hose, *tmp;
680 struct device_node *phb; 760 struct device_node *phb;
681 int ret; 761 static int cnt = 0;
762 int ret = 0;
763
764 /*
765 * We have to delay the initialization on PowerNV after
766 * the PCI hierarchy tree has been built because the PEs
767 * are figured out based on PCI devices instead of device
768 * tree nodes
769 */
770 if (machine_is(powernv) && cnt++ <= 0)
771 return ret;
682 772
683 /* call platform initialization function */ 773 /* call platform initialization function */
684 if (!eeh_ops) { 774 if (!eeh_ops) {
@@ -691,7 +781,10 @@ static int __init eeh_init(void)
691 return ret; 781 return ret;
692 } 782 }
693 783
694 raw_spin_lock_init(&confirm_error_lock); 784 /* Initialize EEH event */
785 ret = eeh_event_init();
786 if (ret)
787 return ret;
695 788
696 /* Enable EEH for all adapters */ 789 /* Enable EEH for all adapters */
697 if (eeh_probe_mode_devtree()) { 790 if (eeh_probe_mode_devtree()) {
@@ -700,6 +793,25 @@ static int __init eeh_init(void)
700 phb = hose->dn; 793 phb = hose->dn;
701 traverse_pci_devices(phb, eeh_ops->of_probe, NULL); 794 traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
702 } 795 }
796 } else if (eeh_probe_mode_dev()) {
797 list_for_each_entry_safe(hose, tmp,
798 &hose_list, list_node)
799 pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
800 } else {
801 pr_warning("%s: Invalid probe mode %d\n",
802 __func__, eeh_probe_mode);
803 return -EINVAL;
804 }
805
806 /*
807 * Call platform post-initialization. Actually, It's good chance
808 * to inform platform that EEH is ready to supply service if the
809 * I/O cache stuff has been built up.
810 */
811 if (eeh_ops->post_init) {
812 ret = eeh_ops->post_init();
813 if (ret)
814 return ret;
703 } 815 }
704 816
705 if (eeh_subsystem_enabled) 817 if (eeh_subsystem_enabled)
@@ -724,10 +836,18 @@ core_initcall_sync(eeh_init);
724 * on the CEC architecture, type of the device, on earlier boot 836 * on the CEC architecture, type of the device, on earlier boot
725 * command-line arguments & etc. 837 * command-line arguments & etc.
726 */ 838 */
727static void eeh_add_device_early(struct device_node *dn) 839void eeh_add_device_early(struct device_node *dn)
728{ 840{
729 struct pci_controller *phb; 841 struct pci_controller *phb;
730 842
843 /*
844 * If we're doing EEH probe based on PCI device, we
845 * would delay the probe until late stage because
846 * the PCI device isn't available this moment.
847 */
848 if (!eeh_probe_mode_devtree())
849 return;
850
731 if (!of_node_to_eeh_dev(dn)) 851 if (!of_node_to_eeh_dev(dn))
732 return; 852 return;
733 phb = of_node_to_eeh_dev(dn)->phb; 853 phb = of_node_to_eeh_dev(dn)->phb;
@@ -736,7 +856,6 @@ static void eeh_add_device_early(struct device_node *dn)
736 if (NULL == phb || 0 == phb->buid) 856 if (NULL == phb || 0 == phb->buid)
737 return; 857 return;
738 858
739 /* FIXME: hotplug support on POWERNV */
740 eeh_ops->of_probe(dn, NULL); 859 eeh_ops->of_probe(dn, NULL);
741} 860}
742 861
@@ -765,7 +884,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
765 * This routine must be used to complete EEH initialization for PCI 884 * This routine must be used to complete EEH initialization for PCI
766 * devices that were added after system boot (e.g. hotplug, dlpar). 885 * devices that were added after system boot (e.g. hotplug, dlpar).
767 */ 886 */
768static void eeh_add_device_late(struct pci_dev *dev) 887void eeh_add_device_late(struct pci_dev *dev)
769{ 888{
770 struct device_node *dn; 889 struct device_node *dn;
771 struct eeh_dev *edev; 890 struct eeh_dev *edev;
@@ -781,12 +900,33 @@ static void eeh_add_device_late(struct pci_dev *dev)
781 pr_debug("EEH: Already referenced !\n"); 900 pr_debug("EEH: Already referenced !\n");
782 return; 901 return;
783 } 902 }
784 WARN_ON(edev->pdev);
785 903
786 pci_dev_get(dev); 904 /*
905 * The EEH cache might not be removed correctly because of
906 * unbalanced kref to the device during unplug time, which
907 * relies on pcibios_release_device(). So we have to remove
908 * that here explicitly.
909 */
910 if (edev->pdev) {
911 eeh_rmv_from_parent_pe(edev);
912 eeh_addr_cache_rmv_dev(edev->pdev);
913 eeh_sysfs_remove_device(edev->pdev);
914 edev->mode &= ~EEH_DEV_SYSFS;
915
916 edev->pdev = NULL;
917 dev->dev.archdata.edev = NULL;
918 }
919
787 edev->pdev = dev; 920 edev->pdev = dev;
788 dev->dev.archdata.edev = edev; 921 dev->dev.archdata.edev = edev;
789 922
923 /*
924 * We have to do the EEH probe here because the PCI device
925 * hasn't been created yet in the early stage.
926 */
927 if (eeh_probe_mode_dev())
928 eeh_ops->dev_probe(dev, NULL);
929
790 eeh_addr_cache_insert_dev(dev); 930 eeh_addr_cache_insert_dev(dev);
791} 931}
792 932
@@ -803,12 +943,12 @@ void eeh_add_device_tree_late(struct pci_bus *bus)
803 struct pci_dev *dev; 943 struct pci_dev *dev;
804 944
805 list_for_each_entry(dev, &bus->devices, bus_list) { 945 list_for_each_entry(dev, &bus->devices, bus_list) {
806 eeh_add_device_late(dev); 946 eeh_add_device_late(dev);
807 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 947 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
808 struct pci_bus *subbus = dev->subordinate; 948 struct pci_bus *subbus = dev->subordinate;
809 if (subbus) 949 if (subbus)
810 eeh_add_device_tree_late(subbus); 950 eeh_add_device_tree_late(subbus);
811 } 951 }
812 } 952 }
813} 953}
814EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 954EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
@@ -839,7 +979,6 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
839/** 979/**
840 * eeh_remove_device - Undo EEH setup for the indicated pci device 980 * eeh_remove_device - Undo EEH setup for the indicated pci device
841 * @dev: pci device to be removed 981 * @dev: pci device to be removed
842 * @purge_pe: remove the PE or not
843 * 982 *
844 * This routine should be called when a device is removed from 983 * This routine should be called when a device is removed from
845 * a running system (e.g. by hotplug or dlpar). It unregisters 984 * a running system (e.g. by hotplug or dlpar). It unregisters
@@ -847,7 +986,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
847 * this device will no longer be detected after this call; thus, 986 * this device will no longer be detected after this call; thus,
848 * i/o errors affecting this slot may leave this device unusable. 987 * i/o errors affecting this slot may leave this device unusable.
849 */ 988 */
850static void eeh_remove_device(struct pci_dev *dev, int purge_pe) 989void eeh_remove_device(struct pci_dev *dev)
851{ 990{
852 struct eeh_dev *edev; 991 struct eeh_dev *edev;
853 992
@@ -858,42 +997,29 @@ static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
858 /* Unregister the device with the EEH/PCI address search system */ 997 /* Unregister the device with the EEH/PCI address search system */
859 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 998 pr_debug("EEH: Removing device %s\n", pci_name(dev));
860 999
861 if (!edev || !edev->pdev) { 1000 if (!edev || !edev->pdev || !edev->pe) {
862 pr_debug("EEH: Not referenced !\n"); 1001 pr_debug("EEH: Not referenced !\n");
863 return; 1002 return;
864 } 1003 }
1004
1005 /*
1006 * During the hotplug for EEH error recovery, we need the EEH
1007 * device attached to the parent PE in order for BAR restore
1008 * a bit later. So we keep it for BAR restore and remove it
1009 * from the parent PE during the BAR resotre.
1010 */
865 edev->pdev = NULL; 1011 edev->pdev = NULL;
866 dev->dev.archdata.edev = NULL; 1012 dev->dev.archdata.edev = NULL;
867 pci_dev_put(dev); 1013 if (!(edev->pe->state & EEH_PE_KEEP))
1014 eeh_rmv_from_parent_pe(edev);
1015 else
1016 edev->mode |= EEH_DEV_DISCONNECTED;
868 1017
869 eeh_rmv_from_parent_pe(edev, purge_pe);
870 eeh_addr_cache_rmv_dev(dev); 1018 eeh_addr_cache_rmv_dev(dev);
871 eeh_sysfs_remove_device(dev); 1019 eeh_sysfs_remove_device(dev);
1020 edev->mode &= ~EEH_DEV_SYSFS;
872} 1021}
873 1022
874/**
875 * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
876 * @dev: PCI device
877 * @purge_pe: remove the corresponding PE or not
878 *
879 * This routine must be called when a device is removed from the
880 * running system through hotplug or dlpar. The corresponding
881 * PCI address cache will be removed.
882 */
883void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe)
884{
885 struct pci_bus *bus = dev->subordinate;
886 struct pci_dev *child, *tmp;
887
888 eeh_remove_device(dev, purge_pe);
889
890 if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
891 list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
892 eeh_remove_bus_device(child, purge_pe);
893 }
894}
895EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
896
897static int proc_eeh_show(struct seq_file *m, void *v) 1023static int proc_eeh_show(struct seq_file *m, void *v)
898{ 1024{
899 if (0 == eeh_subsystem_enabled) { 1025 if (0 == eeh_subsystem_enabled) {
@@ -935,7 +1061,7 @@ static const struct file_operations proc_eeh_operations = {
935 1061
936static int __init eeh_init_proc(void) 1062static int __init eeh_init_proc(void)
937{ 1063{
938 if (machine_is(pseries)) 1064 if (machine_is(pseries) || machine_is(powernv))
939 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1065 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
940 return 0; 1066 return 0;
941} 1067}
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 5ce3ba7ad137..e8c9fd546a5c 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -68,16 +68,12 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
68 struct pci_io_addr_range *piar; 68 struct pci_io_addr_range *piar;
69 piar = rb_entry(n, struct pci_io_addr_range, rb_node); 69 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
70 70
71 if (addr < piar->addr_lo) { 71 if (addr < piar->addr_lo)
72 n = n->rb_left; 72 n = n->rb_left;
73 } else { 73 else if (addr > piar->addr_hi)
74 if (addr > piar->addr_hi) { 74 n = n->rb_right;
75 n = n->rb_right; 75 else
76 } else { 76 return piar->edev;
77 pci_dev_get(piar->pcidev);
78 return piar->edev;
79 }
80 }
81 } 77 }
82 78
83 return NULL; 79 return NULL;
@@ -156,7 +152,6 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
156 if (!piar) 152 if (!piar)
157 return NULL; 153 return NULL;
158 154
159 pci_dev_get(dev);
160 piar->addr_lo = alo; 155 piar->addr_lo = alo;
161 piar->addr_hi = ahi; 156 piar->addr_hi = ahi;
162 piar->edev = pci_dev_to_eeh_dev(dev); 157 piar->edev = pci_dev_to_eeh_dev(dev);
@@ -194,7 +189,7 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
194 } 189 }
195 190
196 /* Skip any devices for which EEH is not enabled. */ 191 /* Skip any devices for which EEH is not enabled. */
197 if (!edev->pe) { 192 if (!eeh_probe_mode_dev() && !edev->pe) {
198#ifdef DEBUG 193#ifdef DEBUG
199 pr_info("PCI: skip building address cache for=%s - %s\n", 194 pr_info("PCI: skip building address cache for=%s - %s\n",
200 pci_name(dev), dn->full_name); 195 pci_name(dev), dn->full_name);
@@ -250,7 +245,6 @@ restart:
250 245
251 if (piar->pcidev == dev) { 246 if (piar->pcidev == dev) {
252 rb_erase(n, &pci_io_addr_cache_root.rb_root); 247 rb_erase(n, &pci_io_addr_cache_root.rb_root);
253 pci_dev_put(piar->pcidev);
254 kfree(piar); 248 kfree(piar);
255 goto restart; 249 goto restart;
256 } 250 }
@@ -285,7 +279,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
285 * Must be run late in boot process, after the pci controllers 279 * Must be run late in boot process, after the pci controllers
286 * have been scanned for devices (after all device resources are known). 280 * have been scanned for devices (after all device resources are known).
287 */ 281 */
288void __init eeh_addr_cache_build(void) 282void eeh_addr_cache_build(void)
289{ 283{
290 struct device_node *dn; 284 struct device_node *dn;
291 struct eeh_dev *edev; 285 struct eeh_dev *edev;
@@ -302,12 +296,10 @@ void __init eeh_addr_cache_build(void)
302 if (!edev) 296 if (!edev)
303 continue; 297 continue;
304 298
305 pci_dev_get(dev); /* matching put is in eeh_remove_device() */
306 dev->dev.archdata.edev = edev; 299 dev->dev.archdata.edev = edev;
307 edev->pdev = dev; 300 edev->pdev = dev;
308 301
309 eeh_addr_cache_insert_dev(dev); 302 eeh_addr_cache_insert_dev(dev);
310
311 eeh_sysfs_add_device(dev); 303 eeh_sysfs_add_device(dev);
312 } 304 }
313 305
@@ -316,4 +308,3 @@ void __init eeh_addr_cache_build(void)
316 eeh_addr_cache_print(&pci_io_addr_cache_root); 308 eeh_addr_cache_print(&pci_io_addr_cache_root);
317#endif 309#endif
318} 310}
319
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index 1efa28f5fc54..1efa28f5fc54 100644
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index a3fefb61097c..36bed5a12750 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -143,10 +143,14 @@ static void eeh_disable_irq(struct pci_dev *dev)
143static void eeh_enable_irq(struct pci_dev *dev) 143static void eeh_enable_irq(struct pci_dev *dev)
144{ 144{
145 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 145 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
146 struct irq_desc *desc;
146 147
147 if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { 148 if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
148 edev->mode &= ~EEH_DEV_IRQ_DISABLED; 149 edev->mode &= ~EEH_DEV_IRQ_DISABLED;
149 enable_irq(dev->irq); 150
151 desc = irq_to_desc(dev->irq);
152 if (desc && desc->depth > 0)
153 enable_irq(dev->irq);
150 } 154 }
151} 155}
152 156
@@ -154,9 +158,9 @@ static void eeh_enable_irq(struct pci_dev *dev)
154 * eeh_report_error - Report pci error to each device driver 158 * eeh_report_error - Report pci error to each device driver
155 * @data: eeh device 159 * @data: eeh device
156 * @userdata: return value 160 * @userdata: return value
157 * 161 *
158 * Report an EEH error to each device driver, collect up and 162 * Report an EEH error to each device driver, collect up and
159 * merge the device driver responses. Cumulative response 163 * merge the device driver responses. Cumulative response
160 * passed back in "userdata". 164 * passed back in "userdata".
161 */ 165 */
162static void *eeh_report_error(void *data, void *userdata) 166static void *eeh_report_error(void *data, void *userdata)
@@ -338,6 +342,54 @@ static void *eeh_report_failure(void *data, void *userdata)
338 return NULL; 342 return NULL;
339} 343}
340 344
345static void *eeh_rmv_device(void *data, void *userdata)
346{
347 struct pci_driver *driver;
348 struct eeh_dev *edev = (struct eeh_dev *)data;
349 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
350 int *removed = (int *)userdata;
351
352 /*
353 * Actually, we should remove the PCI bridges as well.
354 * However, that's lots of complexity to do that,
355 * particularly some of devices under the bridge might
356 * support EEH. So we just care about PCI devices for
357 * simplicity here.
358 */
359 if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
360 return NULL;
361 driver = eeh_pcid_get(dev);
362 if (driver && driver->err_handler)
363 return NULL;
364
365 /* Remove it from PCI subsystem */
366 pr_debug("EEH: Removing %s without EEH sensitive driver\n",
367 pci_name(dev));
368 edev->bus = dev->bus;
369 edev->mode |= EEH_DEV_DISCONNECTED;
370 (*removed)++;
371
372 pci_stop_and_remove_bus_device(dev);
373
374 return NULL;
375}
376
377static void *eeh_pe_detach_dev(void *data, void *userdata)
378{
379 struct eeh_pe *pe = (struct eeh_pe *)data;
380 struct eeh_dev *edev, *tmp;
381
382 eeh_pe_for_each_dev(pe, edev, tmp) {
383 if (!(edev->mode & EEH_DEV_DISCONNECTED))
384 continue;
385
386 edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
387 eeh_rmv_from_parent_pe(edev);
388 }
389
390 return NULL;
391}
392
341/** 393/**
342 * eeh_reset_device - Perform actual reset of a pci slot 394 * eeh_reset_device - Perform actual reset of a pci slot
343 * @pe: EEH PE 395 * @pe: EEH PE
@@ -349,10 +401,13 @@ static void *eeh_report_failure(void *data, void *userdata)
349 */ 401 */
350static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) 402static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
351{ 403{
352 int cnt, rc; 404 struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
405 struct timeval tstamp;
406 int cnt, rc, removed = 0;
353 407
354 /* pcibios will clear the counter; save the value */ 408 /* pcibios will clear the counter; save the value */
355 cnt = pe->freeze_count; 409 cnt = pe->freeze_count;
410 tstamp = pe->tstamp;
356 411
357 /* 412 /*
358 * We don't remove the corresponding PE instances because 413 * We don't remove the corresponding PE instances because
@@ -360,8 +415,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
360 * devices are expected to be attached soon when calling 415 * devices are expected to be attached soon when calling
361 * into pcibios_add_pci_devices(). 416 * into pcibios_add_pci_devices().
362 */ 417 */
418 eeh_pe_state_mark(pe, EEH_PE_KEEP);
363 if (bus) 419 if (bus)
364 __pcibios_remove_pci_devices(bus, 0); 420 pcibios_remove_pci_devices(bus);
421 else if (frozen_bus)
422 eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
365 423
366 /* Reset the pci controller. (Asserts RST#; resets config space). 424 /* Reset the pci controller. (Asserts RST#; resets config space).
367 * Reconfigure bridges and devices. Don't try to bring the system 425 * Reconfigure bridges and devices. Don't try to bring the system
@@ -376,15 +434,32 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
376 eeh_pe_restore_bars(pe); 434 eeh_pe_restore_bars(pe);
377 435
378 /* Give the system 5 seconds to finish running the user-space 436 /* Give the system 5 seconds to finish running the user-space
379 * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, 437 * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
380 * this is a hack, but if we don't do this, and try to bring 438 * this is a hack, but if we don't do this, and try to bring
381 * the device up before the scripts have taken it down, 439 * the device up before the scripts have taken it down,
382 * potentially weird things happen. 440 * potentially weird things happen.
383 */ 441 */
384 if (bus) { 442 if (bus) {
443 pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
385 ssleep(5); 444 ssleep(5);
445
446 /*
447 * The EEH device is still connected with its parent
448 * PE. We should disconnect it so the binding can be
449 * rebuilt when adding PCI devices.
450 */
451 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
386 pcibios_add_pci_devices(bus); 452 pcibios_add_pci_devices(bus);
453 } else if (frozen_bus && removed) {
454 pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
455 ssleep(5);
456
457 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
458 pcibios_add_pci_devices(frozen_bus);
387 } 459 }
460 eeh_pe_state_clear(pe, EEH_PE_KEEP);
461
462 pe->tstamp = tstamp;
388 pe->freeze_count = cnt; 463 pe->freeze_count = cnt;
389 464
390 return 0; 465 return 0;
@@ -395,24 +470,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
395 */ 470 */
396#define MAX_WAIT_FOR_RECOVERY 150 471#define MAX_WAIT_FOR_RECOVERY 150
397 472
398/** 473static void eeh_handle_normal_event(struct eeh_pe *pe)
399 * eeh_handle_event - Reset a PCI device after hard lockup.
400 * @pe: EEH PE
401 *
402 * While PHB detects address or data parity errors on particular PCI
403 * slot, the associated PE will be frozen. Besides, DMA's occurring
404 * to wild addresses (which usually happen due to bugs in device
405 * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
406 * #PERR or other misc PCI-related errors also can trigger EEH errors.
407 *
408 * Recovery process consists of unplugging the device driver (which
409 * generated hotplug events to userspace), then issuing a PCI #RST to
410 * the device, then reconfiguring the PCI config space for all bridges
411 * & devices under this slot, and then finally restarting the device
412 * drivers (which cause a second set of hotplug events to go out to
413 * userspace).
414 */
415void eeh_handle_event(struct eeh_pe *pe)
416{ 474{
417 struct pci_bus *frozen_bus; 475 struct pci_bus *frozen_bus;
418 int rc = 0; 476 int rc = 0;
@@ -425,6 +483,7 @@ void eeh_handle_event(struct eeh_pe *pe)
425 return; 483 return;
426 } 484 }
427 485
486 eeh_pe_update_time_stamp(pe);
428 pe->freeze_count++; 487 pe->freeze_count++;
429 if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) 488 if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
430 goto excess_failures; 489 goto excess_failures;
@@ -437,6 +496,7 @@ void eeh_handle_event(struct eeh_pe *pe)
437 * status ... if any child can't handle the reset, then the entire 496 * status ... if any child can't handle the reset, then the entire
438 * slot is dlpar removed and added. 497 * slot is dlpar removed and added.
439 */ 498 */
499 pr_info("EEH: Notify device drivers to shutdown\n");
440 eeh_pe_dev_traverse(pe, eeh_report_error, &result); 500 eeh_pe_dev_traverse(pe, eeh_report_error, &result);
441 501
442 /* Get the current PCI slot state. This can take a long time, 502 /* Get the current PCI slot state. This can take a long time,
@@ -444,7 +504,7 @@ void eeh_handle_event(struct eeh_pe *pe)
444 */ 504 */
445 rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); 505 rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
446 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { 506 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
447 printk(KERN_WARNING "EEH: Permanent failure\n"); 507 pr_warning("EEH: Permanent failure\n");
448 goto hard_fail; 508 goto hard_fail;
449 } 509 }
450 510
@@ -452,6 +512,7 @@ void eeh_handle_event(struct eeh_pe *pe)
452 * don't post the error log until after all dev drivers 512 * don't post the error log until after all dev drivers
453 * have been informed. 513 * have been informed.
454 */ 514 */
515 pr_info("EEH: Collect temporary log\n");
455 eeh_slot_error_detail(pe, EEH_LOG_TEMP); 516 eeh_slot_error_detail(pe, EEH_LOG_TEMP);
456 517
457 /* If all device drivers were EEH-unaware, then shut 518 /* If all device drivers were EEH-unaware, then shut
@@ -459,15 +520,18 @@ void eeh_handle_event(struct eeh_pe *pe)
459 * go down willingly, without panicing the system. 520 * go down willingly, without panicing the system.
460 */ 521 */
461 if (result == PCI_ERS_RESULT_NONE) { 522 if (result == PCI_ERS_RESULT_NONE) {
523 pr_info("EEH: Reset with hotplug activity\n");
462 rc = eeh_reset_device(pe, frozen_bus); 524 rc = eeh_reset_device(pe, frozen_bus);
463 if (rc) { 525 if (rc) {
464 printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); 526 pr_warning("%s: Unable to reset, err=%d\n",
527 __func__, rc);
465 goto hard_fail; 528 goto hard_fail;
466 } 529 }
467 } 530 }
468 531
469 /* If all devices reported they can proceed, then re-enable MMIO */ 532 /* If all devices reported they can proceed, then re-enable MMIO */
470 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 533 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
534 pr_info("EEH: Enable I/O for affected devices\n");
471 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 535 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
472 536
473 if (rc < 0) 537 if (rc < 0)
@@ -475,6 +539,7 @@ void eeh_handle_event(struct eeh_pe *pe)
475 if (rc) { 539 if (rc) {
476 result = PCI_ERS_RESULT_NEED_RESET; 540 result = PCI_ERS_RESULT_NEED_RESET;
477 } else { 541 } else {
542 pr_info("EEH: Notify device drivers to resume I/O\n");
478 result = PCI_ERS_RESULT_NONE; 543 result = PCI_ERS_RESULT_NONE;
479 eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); 544 eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
480 } 545 }
@@ -482,6 +547,7 @@ void eeh_handle_event(struct eeh_pe *pe)
482 547
483 /* If all devices reported they can proceed, then re-enable DMA */ 548 /* If all devices reported they can proceed, then re-enable DMA */
484 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 549 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
550 pr_info("EEH: Enabled DMA for affected devices\n");
485 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 551 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
486 552
487 if (rc < 0) 553 if (rc < 0)
@@ -494,17 +560,22 @@ void eeh_handle_event(struct eeh_pe *pe)
494 560
495 /* If any device has a hard failure, then shut off everything. */ 561 /* If any device has a hard failure, then shut off everything. */
496 if (result == PCI_ERS_RESULT_DISCONNECT) { 562 if (result == PCI_ERS_RESULT_DISCONNECT) {
497 printk(KERN_WARNING "EEH: Device driver gave up\n"); 563 pr_warning("EEH: Device driver gave up\n");
498 goto hard_fail; 564 goto hard_fail;
499 } 565 }
500 566
501 /* If any device called out for a reset, then reset the slot */ 567 /* If any device called out for a reset, then reset the slot */
502 if (result == PCI_ERS_RESULT_NEED_RESET) { 568 if (result == PCI_ERS_RESULT_NEED_RESET) {
569 pr_info("EEH: Reset without hotplug activity\n");
503 rc = eeh_reset_device(pe, NULL); 570 rc = eeh_reset_device(pe, NULL);
504 if (rc) { 571 if (rc) {
505 printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); 572 pr_warning("%s: Cannot reset, err=%d\n",
573 __func__, rc);
506 goto hard_fail; 574 goto hard_fail;
507 } 575 }
576
577 pr_info("EEH: Notify device drivers "
578 "the completion of reset\n");
508 result = PCI_ERS_RESULT_NONE; 579 result = PCI_ERS_RESULT_NONE;
509 eeh_pe_dev_traverse(pe, eeh_report_reset, &result); 580 eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
510 } 581 }
@@ -512,15 +583,16 @@ void eeh_handle_event(struct eeh_pe *pe)
512 /* All devices should claim they have recovered by now. */ 583 /* All devices should claim they have recovered by now. */
513 if ((result != PCI_ERS_RESULT_RECOVERED) && 584 if ((result != PCI_ERS_RESULT_RECOVERED) &&
514 (result != PCI_ERS_RESULT_NONE)) { 585 (result != PCI_ERS_RESULT_NONE)) {
515 printk(KERN_WARNING "EEH: Not recovered\n"); 586 pr_warning("EEH: Not recovered\n");
516 goto hard_fail; 587 goto hard_fail;
517 } 588 }
518 589
519 /* Tell all device drivers that they can resume operations */ 590 /* Tell all device drivers that they can resume operations */
591 pr_info("EEH: Notify device driver to resume\n");
520 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); 592 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
521 593
522 return; 594 return;
523 595
524excess_failures: 596excess_failures:
525 /* 597 /*
526 * About 90% of all real-life EEH failures in the field 598 * About 90% of all real-life EEH failures in the field
@@ -550,3 +622,111 @@ perm_error:
550 pcibios_remove_pci_devices(frozen_bus); 622 pcibios_remove_pci_devices(frozen_bus);
551} 623}
552 624
625static void eeh_handle_special_event(void)
626{
627 struct eeh_pe *pe, *phb_pe;
628 struct pci_bus *bus;
629 struct pci_controller *hose, *tmp;
630 unsigned long flags;
631 int rc = 0;
632
633 /*
634 * The return value from next_error() has been classified as follows.
635 * It might be good to enumerate them. However, next_error() is only
636 * supported by PowerNV platform for now. So it would be fine to use
637 * integer directly:
638 *
639 * 4 - Dead IOC 3 - Dead PHB
640 * 2 - Fenced PHB 1 - Frozen PE
641 * 0 - No error found
642 *
643 */
644 rc = eeh_ops->next_error(&pe);
645 if (rc <= 0)
646 return;
647
648 switch (rc) {
649 case 4:
650 /* Mark all PHBs in dead state */
651 eeh_serialize_lock(&flags);
652 list_for_each_entry_safe(hose, tmp,
653 &hose_list, list_node) {
654 phb_pe = eeh_phb_pe_get(hose);
655 if (!phb_pe) continue;
656
657 eeh_pe_state_mark(phb_pe,
658 EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
659 }
660 eeh_serialize_unlock(flags);
661
662 /* Purge all events */
663 eeh_remove_event(NULL);
664 break;
665 case 3:
666 case 2:
667 case 1:
668 /* Mark the PE in fenced state */
669 eeh_serialize_lock(&flags);
670 if (rc == 3)
671 eeh_pe_state_mark(pe,
672 EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
673 else
674 eeh_pe_state_mark(pe,
675 EEH_PE_ISOLATED | EEH_PE_RECOVERING);
676 eeh_serialize_unlock(flags);
677
678 /* Purge all events of the PHB */
679 eeh_remove_event(pe);
680 break;
681 default:
682 pr_err("%s: Invalid value %d from next_error()\n",
683 __func__, rc);
684 return;
685 }
686
687 /*
688 * For fenced PHB and frozen PE, it's handled as normal
689 * event. We have to remove the affected PHBs for dead
690 * PHB and IOC
691 */
692 if (rc == 2 || rc == 1)
693 eeh_handle_normal_event(pe);
694 else {
695 list_for_each_entry_safe(hose, tmp,
696 &hose_list, list_node) {
697 phb_pe = eeh_phb_pe_get(hose);
698 if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD))
699 continue;
700
701 bus = eeh_pe_bus_get(phb_pe);
702 /* Notify all devices that they're about to go down. */
703 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
704 pcibios_remove_pci_devices(bus);
705 }
706 }
707}
708
709/**
710 * eeh_handle_event - Reset a PCI device after hard lockup.
711 * @pe: EEH PE
712 *
713 * While PHB detects address or data parity errors on particular PCI
714 * slot, the associated PE will be frozen. Besides, DMA's occurring
715 * to wild addresses (which usually happen due to bugs in device
716 * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
717 * #PERR or other misc PCI-related errors also can trigger EEH errors.
718 *
719 * Recovery process consists of unplugging the device driver (which
720 * generated hotplug events to userspace), then issuing a PCI #RST to
721 * the device, then reconfiguring the PCI config space for all bridges
722 * & devices under this slot, and then finally restarting the device
723 * drivers (which cause a second set of hotplug events to go out to
724 * userspace).
725 */
726void eeh_handle_event(struct eeh_pe *pe)
727{
728 if (pe)
729 eeh_handle_normal_event(pe);
730 else
731 eeh_handle_special_event();
732}
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 185bedd926df..d27c5afc90ae 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -18,11 +18,10 @@
18 18
19#include <linux/delay.h> 19#include <linux/delay.h>
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/mutex.h>
22#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/semaphore.h>
23#include <linux/pci.h> 23#include <linux/pci.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include <linux/kthread.h> 25#include <linux/kthread.h>
27#include <asm/eeh_event.h> 26#include <asm/eeh_event.h>
28#include <asm/ppc-pci.h> 27#include <asm/ppc-pci.h>
@@ -35,14 +34,9 @@
35 * work-queue, where a worker thread can drive recovery. 34 * work-queue, where a worker thread can drive recovery.
36 */ 35 */
37 36
38/* EEH event workqueue setup. */
39static DEFINE_SPINLOCK(eeh_eventlist_lock); 37static DEFINE_SPINLOCK(eeh_eventlist_lock);
38static struct semaphore eeh_eventlist_sem;
40LIST_HEAD(eeh_eventlist); 39LIST_HEAD(eeh_eventlist);
41static void eeh_thread_launcher(struct work_struct *);
42DECLARE_WORK(eeh_event_wq, eeh_thread_launcher);
43
44/* Serialize reset sequences for a given pci device */
45DEFINE_MUTEX(eeh_event_mutex);
46 40
47/** 41/**
48 * eeh_event_handler - Dispatch EEH events. 42 * eeh_event_handler - Dispatch EEH events.
@@ -60,55 +54,63 @@ static int eeh_event_handler(void * dummy)
60 struct eeh_event *event; 54 struct eeh_event *event;
61 struct eeh_pe *pe; 55 struct eeh_pe *pe;
62 56
63 spin_lock_irqsave(&eeh_eventlist_lock, flags); 57 while (!kthread_should_stop()) {
64 event = NULL; 58 if (down_interruptible(&eeh_eventlist_sem))
65 59 break;
66 /* Unqueue the event, get ready to process. */ 60
67 if (!list_empty(&eeh_eventlist)) { 61 /* Fetch EEH event from the queue */
68 event = list_entry(eeh_eventlist.next, struct eeh_event, list); 62 spin_lock_irqsave(&eeh_eventlist_lock, flags);
69 list_del(&event->list); 63 event = NULL;
70 } 64 if (!list_empty(&eeh_eventlist)) {
71 spin_unlock_irqrestore(&eeh_eventlist_lock, flags); 65 event = list_entry(eeh_eventlist.next,
72 66 struct eeh_event, list);
73 if (event == NULL) 67 list_del(&event->list);
74 return 0; 68 }
75 69 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
76 /* Serialize processing of EEH events */ 70 if (!event)
77 mutex_lock(&eeh_event_mutex); 71 continue;
78 pe = event->pe; 72
79 eeh_pe_state_mark(pe, EEH_PE_RECOVERING); 73 /* We might have event without binding PE */
80 pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", 74 pe = event->pe;
81 pe->phb->global_number, pe->addr); 75 if (pe) {
82 76 eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
83 set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */ 77 pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
84 eeh_handle_event(pe); 78 pe->phb->global_number, pe->addr);
85 eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 79 eeh_handle_event(pe);
86 80 eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
87 kfree(event); 81 } else {
88 mutex_unlock(&eeh_event_mutex); 82 eeh_handle_event(NULL);
89 83 }
90 /* If there are no new errors after an hour, clear the counter. */ 84
91 if (pe && pe->freeze_count > 0) { 85 kfree(event);
92 msleep_interruptible(3600*1000);
93 if (pe->freeze_count > 0)
94 pe->freeze_count--;
95
96 } 86 }
97 87
98 return 0; 88 return 0;
99} 89}
100 90
101/** 91/**
102 * eeh_thread_launcher - Start kernel thread to handle EEH events 92 * eeh_event_init - Start kernel thread to handle EEH events
103 * @dummy - unused
104 * 93 *
105 * This routine is called to start the kernel thread for processing 94 * This routine is called to start the kernel thread for processing
106 * EEH event. 95 * EEH event.
107 */ 96 */
108static void eeh_thread_launcher(struct work_struct *dummy) 97int eeh_event_init(void)
109{ 98{
110 if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd"))) 99 struct task_struct *t;
111 printk(KERN_ERR "Failed to start EEH daemon\n"); 100 int ret = 0;
101
102 /* Initialize semaphore */
103 sema_init(&eeh_eventlist_sem, 0);
104
105 t = kthread_run(eeh_event_handler, NULL, "eehd");
106 if (IS_ERR(t)) {
107 ret = PTR_ERR(t);
108 pr_err("%s: Failed to start EEH daemon (%d)\n",
109 __func__, ret);
110 return ret;
111 }
112
113 return 0;
112} 114}
113 115
114/** 116/**
@@ -136,7 +138,45 @@ int eeh_send_failure_event(struct eeh_pe *pe)
136 list_add(&event->list, &eeh_eventlist); 138 list_add(&event->list, &eeh_eventlist);
137 spin_unlock_irqrestore(&eeh_eventlist_lock, flags); 139 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
138 140
139 schedule_work(&eeh_event_wq); 141 /* For EEH deamon to knick in */
142 up(&eeh_eventlist_sem);
140 143
141 return 0; 144 return 0;
142} 145}
146
147/**
148 * eeh_remove_event - Remove EEH event from the queue
149 * @pe: Event binding to the PE
150 *
151 * On PowerNV platform, we might have subsequent coming events
152 * is part of the former one. For that case, those subsequent
153 * coming events are totally duplicated and unnecessary, thus
154 * they should be removed.
155 */
156void eeh_remove_event(struct eeh_pe *pe)
157{
158 unsigned long flags;
159 struct eeh_event *event, *tmp;
160
161 spin_lock_irqsave(&eeh_eventlist_lock, flags);
162 list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
163 /*
164 * If we don't have valid PE passed in, that means
165 * we already have event corresponding to dead IOC
166 * and all events should be purged.
167 */
168 if (!pe) {
169 list_del(&event->list);
170 kfree(event);
171 } else if (pe->type & EEH_PE_PHB) {
172 if (event->pe && event->pe->phb == pe->phb) {
173 list_del(&event->list);
174 kfree(event);
175 }
176 } else if (event->pe == pe) {
177 list_del(&event->list);
178 kfree(event);
179 }
180 }
181 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
182}
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 9d4a9e8562b2..f9450537e335 100644
--- a/arch/powerpc/platforms/pseries/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -22,6 +22,7 @@
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */ 23 */
24 24
25#include <linux/delay.h>
25#include <linux/export.h> 26#include <linux/export.h>
26#include <linux/gfp.h> 27#include <linux/gfp.h>
27#include <linux/init.h> 28#include <linux/init.h>
@@ -78,9 +79,7 @@ int eeh_phb_pe_create(struct pci_controller *phb)
78 } 79 }
79 80
80 /* Put it into the list */ 81 /* Put it into the list */
81 eeh_lock();
82 list_add_tail(&pe->child, &eeh_phb_pe); 82 list_add_tail(&pe->child, &eeh_phb_pe);
83 eeh_unlock();
84 83
85 pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); 84 pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
86 85
@@ -95,7 +94,7 @@ int eeh_phb_pe_create(struct pci_controller *phb)
95 * hierarchy tree is composed of PHB PEs. The function is used 94 * hierarchy tree is composed of PHB PEs. The function is used
96 * to retrieve the corresponding PHB PE according to the given PHB. 95 * to retrieve the corresponding PHB PE according to the given PHB.
97 */ 96 */
98static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) 97struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
99{ 98{
100 struct eeh_pe *pe; 99 struct eeh_pe *pe;
101 100
@@ -150,8 +149,8 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
150 * callback returns something other than NULL, or no more PEs 149 * callback returns something other than NULL, or no more PEs
151 * to be traversed. 150 * to be traversed.
152 */ 151 */
153static void *eeh_pe_traverse(struct eeh_pe *root, 152void *eeh_pe_traverse(struct eeh_pe *root,
154 eeh_traverse_func fn, void *flag) 153 eeh_traverse_func fn, void *flag)
155{ 154{
156 struct eeh_pe *pe; 155 struct eeh_pe *pe;
157 void *ret; 156 void *ret;
@@ -177,7 +176,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
177 eeh_traverse_func fn, void *flag) 176 eeh_traverse_func fn, void *flag)
178{ 177{
179 struct eeh_pe *pe; 178 struct eeh_pe *pe;
180 struct eeh_dev *edev; 179 struct eeh_dev *edev, *tmp;
181 void *ret; 180 void *ret;
182 181
183 if (!root) { 182 if (!root) {
@@ -185,21 +184,15 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
185 return NULL; 184 return NULL;
186 } 185 }
187 186
188 eeh_lock();
189
190 /* Traverse root PE */ 187 /* Traverse root PE */
191 for (pe = root; pe; pe = eeh_pe_next(pe, root)) { 188 for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
192 eeh_pe_for_each_dev(pe, edev) { 189 eeh_pe_for_each_dev(pe, edev, tmp) {
193 ret = fn(edev, flag); 190 ret = fn(edev, flag);
194 if (ret) { 191 if (ret)
195 eeh_unlock();
196 return ret; 192 return ret;
197 }
198 } 193 }
199 } 194 }
200 195
201 eeh_unlock();
202
203 return NULL; 196 return NULL;
204} 197}
205 198
@@ -228,7 +221,7 @@ static void *__eeh_pe_get(void *data, void *flag)
228 return pe; 221 return pe;
229 222
230 /* Try BDF address */ 223 /* Try BDF address */
231 if (edev->pe_config_addr && 224 if (edev->config_addr &&
232 (edev->config_addr == pe->config_addr)) 225 (edev->config_addr == pe->config_addr))
233 return pe; 226 return pe;
234 227
@@ -246,7 +239,7 @@ static void *__eeh_pe_get(void *data, void *flag)
246 * which is composed of PCI bus/device/function number, or unified 239 * which is composed of PCI bus/device/function number, or unified
247 * PE address. 240 * PE address.
248 */ 241 */
249static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) 242struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
250{ 243{
251 struct eeh_pe *root = eeh_phb_pe_get(edev->phb); 244 struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
252 struct eeh_pe *pe; 245 struct eeh_pe *pe;
@@ -305,8 +298,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
305{ 298{
306 struct eeh_pe *pe, *parent; 299 struct eeh_pe *pe, *parent;
307 300
308 eeh_lock();
309
310 /* 301 /*
311 * Search the PE has been existing or not according 302 * Search the PE has been existing or not according
312 * to the PE address. If that has been existing, the 303 * to the PE address. If that has been existing, the
@@ -316,7 +307,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
316 pe = eeh_pe_get(edev); 307 pe = eeh_pe_get(edev);
317 if (pe && !(pe->type & EEH_PE_INVALID)) { 308 if (pe && !(pe->type & EEH_PE_INVALID)) {
318 if (!edev->pe_config_addr) { 309 if (!edev->pe_config_addr) {
319 eeh_unlock();
320 pr_err("%s: PE with addr 0x%x already exists\n", 310 pr_err("%s: PE with addr 0x%x already exists\n",
321 __func__, edev->config_addr); 311 __func__, edev->config_addr);
322 return -EEXIST; 312 return -EEXIST;
@@ -328,7 +318,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
328 318
329 /* Put the edev to PE */ 319 /* Put the edev to PE */
330 list_add_tail(&edev->list, &pe->edevs); 320 list_add_tail(&edev->list, &pe->edevs);
331 eeh_unlock();
332 pr_debug("EEH: Add %s to Bus PE#%x\n", 321 pr_debug("EEH: Add %s to Bus PE#%x\n",
333 edev->dn->full_name, pe->addr); 322 edev->dn->full_name, pe->addr);
334 323
@@ -344,10 +333,9 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
344 while (parent) { 333 while (parent) {
345 if (!(parent->type & EEH_PE_INVALID)) 334 if (!(parent->type & EEH_PE_INVALID))
346 break; 335 break;
347 parent->type &= ~EEH_PE_INVALID; 336 parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
348 parent = parent->parent; 337 parent = parent->parent;
349 } 338 }
350 eeh_unlock();
351 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", 339 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
352 edev->dn->full_name, pe->addr, pe->parent->addr); 340 edev->dn->full_name, pe->addr, pe->parent->addr);
353 341
@@ -357,7 +345,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
357 /* Create a new EEH PE */ 345 /* Create a new EEH PE */
358 pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); 346 pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
359 if (!pe) { 347 if (!pe) {
360 eeh_unlock();
361 pr_err("%s: out of memory!\n", __func__); 348 pr_err("%s: out of memory!\n", __func__);
362 return -ENOMEM; 349 return -ENOMEM;
363 } 350 }
@@ -365,6 +352,17 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
365 pe->config_addr = edev->config_addr; 352 pe->config_addr = edev->config_addr;
366 353
367 /* 354 /*
355 * While doing PE reset, we probably hot-reset the
356 * upstream bridge. However, the PCI devices including
357 * the associated EEH devices might be removed when EEH
358 * core is doing recovery. So that won't safe to retrieve
359 * the bridge through downstream EEH device. We have to
360 * trace the parent PCI bus, then the upstream bridge.
361 */
362 if (eeh_probe_mode_dev())
363 pe->bus = eeh_dev_to_pci_dev(edev)->bus;
364
365 /*
368 * Put the new EEH PE into hierarchy tree. If the parent 366 * Put the new EEH PE into hierarchy tree. If the parent
369 * can't be found, the newly created PE will be attached 367 * can't be found, the newly created PE will be attached
370 * to PHB directly. Otherwise, we have to associate the 368 * to PHB directly. Otherwise, we have to associate the
@@ -374,7 +372,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
374 if (!parent) { 372 if (!parent) {
375 parent = eeh_phb_pe_get(edev->phb); 373 parent = eeh_phb_pe_get(edev->phb);
376 if (!parent) { 374 if (!parent) {
377 eeh_unlock();
378 pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", 375 pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
379 __func__, edev->phb->global_number); 376 __func__, edev->phb->global_number);
380 edev->pe = NULL; 377 edev->pe = NULL;
@@ -391,7 +388,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
391 list_add_tail(&pe->child, &parent->child_list); 388 list_add_tail(&pe->child, &parent->child_list);
392 list_add_tail(&edev->list, &pe->edevs); 389 list_add_tail(&edev->list, &pe->edevs);
393 edev->pe = pe; 390 edev->pe = pe;
394 eeh_unlock();
395 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", 391 pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
396 edev->dn->full_name, pe->addr, pe->parent->addr); 392 edev->dn->full_name, pe->addr, pe->parent->addr);
397 393
@@ -401,26 +397,23 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
401/** 397/**
402 * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE 398 * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
403 * @edev: EEH device 399 * @edev: EEH device
404 * @purge_pe: remove PE or not
405 * 400 *
406 * The PE hierarchy tree might be changed when doing PCI hotplug. 401 * The PE hierarchy tree might be changed when doing PCI hotplug.
407 * Also, the PCI devices or buses could be removed from the system 402 * Also, the PCI devices or buses could be removed from the system
408 * during EEH recovery. So we have to call the function remove the 403 * during EEH recovery. So we have to call the function remove the
409 * corresponding PE accordingly if necessary. 404 * corresponding PE accordingly if necessary.
410 */ 405 */
411int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) 406int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
412{ 407{
413 struct eeh_pe *pe, *parent, *child; 408 struct eeh_pe *pe, *parent, *child;
414 int cnt; 409 int cnt;
415 410
416 if (!edev->pe) { 411 if (!edev->pe) {
417 pr_warning("%s: No PE found for EEH device %s\n", 412 pr_debug("%s: No PE found for EEH device %s\n",
418 __func__, edev->dn->full_name); 413 __func__, edev->dn->full_name);
419 return -EEXIST; 414 return -EEXIST;
420 } 415 }
421 416
422 eeh_lock();
423
424 /* Remove the EEH device */ 417 /* Remove the EEH device */
425 pe = edev->pe; 418 pe = edev->pe;
426 edev->pe = NULL; 419 edev->pe = NULL;
@@ -437,7 +430,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
437 if (pe->type & EEH_PE_PHB) 430 if (pe->type & EEH_PE_PHB)
438 break; 431 break;
439 432
440 if (purge_pe) { 433 if (!(pe->state & EEH_PE_KEEP)) {
441 if (list_empty(&pe->edevs) && 434 if (list_empty(&pe->edevs) &&
442 list_empty(&pe->child_list)) { 435 list_empty(&pe->child_list)) {
443 list_del(&pe->child); 436 list_del(&pe->child);
@@ -465,12 +458,37 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
465 pe = parent; 458 pe = parent;
466 } 459 }
467 460
468 eeh_unlock();
469
470 return 0; 461 return 0;
471} 462}
472 463
473/** 464/**
465 * eeh_pe_update_time_stamp - Update PE's frozen time stamp
466 * @pe: EEH PE
467 *
468 * We have time stamp for each PE to trace its time of getting
469 * frozen in last hour. The function should be called to update
470 * the time stamp on first error of the specific PE. On the other
471 * handle, we needn't account for errors happened in last hour.
472 */
473void eeh_pe_update_time_stamp(struct eeh_pe *pe)
474{
475 struct timeval tstamp;
476
477 if (!pe) return;
478
479 if (pe->freeze_count <= 0) {
480 pe->freeze_count = 0;
481 do_gettimeofday(&pe->tstamp);
482 } else {
483 do_gettimeofday(&tstamp);
484 if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) {
485 pe->tstamp = tstamp;
486 pe->freeze_count = 0;
487 }
488 }
489}
490
491/**
474 * __eeh_pe_state_mark - Mark the state for the PE 492 * __eeh_pe_state_mark - Mark the state for the PE
475 * @data: EEH PE 493 * @data: EEH PE
476 * @flag: state 494 * @flag: state
@@ -483,7 +501,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
483{ 501{
484 struct eeh_pe *pe = (struct eeh_pe *)data; 502 struct eeh_pe *pe = (struct eeh_pe *)data;
485 int state = *((int *)flag); 503 int state = *((int *)flag);
486 struct eeh_dev *tmp; 504 struct eeh_dev *edev, *tmp;
487 struct pci_dev *pdev; 505 struct pci_dev *pdev;
488 506
489 /* 507 /*
@@ -493,8 +511,8 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
493 * the PCI device driver. 511 * the PCI device driver.
494 */ 512 */
495 pe->state |= state; 513 pe->state |= state;
496 eeh_pe_for_each_dev(pe, tmp) { 514 eeh_pe_for_each_dev(pe, edev, tmp) {
497 pdev = eeh_dev_to_pci_dev(tmp); 515 pdev = eeh_dev_to_pci_dev(edev);
498 if (pdev) 516 if (pdev)
499 pdev->error_state = pci_channel_io_frozen; 517 pdev->error_state = pci_channel_io_frozen;
500 } 518 }
@@ -512,9 +530,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
512 */ 530 */
513void eeh_pe_state_mark(struct eeh_pe *pe, int state) 531void eeh_pe_state_mark(struct eeh_pe *pe, int state)
514{ 532{
515 eeh_lock();
516 eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); 533 eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
517 eeh_unlock();
518} 534}
519 535
520/** 536/**
@@ -548,35 +564,135 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
548 */ 564 */
549void eeh_pe_state_clear(struct eeh_pe *pe, int state) 565void eeh_pe_state_clear(struct eeh_pe *pe, int state)
550{ 566{
551 eeh_lock();
552 eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); 567 eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
553 eeh_unlock();
554} 568}
555 569
556/** 570/*
557 * eeh_restore_one_device_bars - Restore the Base Address Registers for one device 571 * Some PCI bridges (e.g. PLX bridges) have primary/secondary
558 * @data: EEH device 572 * buses assigned explicitly by firmware, and we probably have
559 * @flag: Unused 573 * lost that after reset. So we have to delay the check until
574 * the PCI-CFG registers have been restored for the parent
575 * bridge.
560 * 576 *
561 * Loads the PCI configuration space base address registers, 577 * Don't use normal PCI-CFG accessors, which probably has been
562 * the expansion ROM base address, the latency timer, and etc. 578 * blocked on normal path during the stage. So we need utilize
563 * from the saved values in the device node. 579 * eeh operations, which is always permitted.
564 */ 580 */
565static void *eeh_restore_one_device_bars(void *data, void *flag) 581static void eeh_bridge_check_link(struct eeh_dev *edev,
582 struct device_node *dn)
583{
584 int cap;
585 uint32_t val;
586 int timeout = 0;
587
588 /*
589 * We only check root port and downstream ports of
590 * PCIe switches
591 */
592 if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
593 return;
594
595 pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
596 __func__, edev->phb->global_number,
597 edev->config_addr >> 8,
598 PCI_SLOT(edev->config_addr & 0xFF),
599 PCI_FUNC(edev->config_addr & 0xFF));
600
601 /* Check slot status */
602 cap = edev->pcie_cap;
603 eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
604 if (!(val & PCI_EXP_SLTSTA_PDS)) {
605 pr_debug(" No card in the slot (0x%04x) !\n", val);
606 return;
607 }
608
609 /* Check power status if we have the capability */
610 eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val);
611 if (val & PCI_EXP_SLTCAP_PCP) {
612 eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val);
613 if (val & PCI_EXP_SLTCTL_PCC) {
614 pr_debug(" In power-off state, power it on ...\n");
615 val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
616 val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
617 eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val);
618 msleep(2 * 1000);
619 }
620 }
621
622 /* Enable link */
623 eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val);
624 val &= ~PCI_EXP_LNKCTL_LD;
625 eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val);
626
627 /* Check link */
628 eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val);
629 if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
630 pr_debug(" No link reporting capability (0x%08x) \n", val);
631 msleep(1000);
632 return;
633 }
634
635 /* Wait the link is up until timeout (5s) */
636 timeout = 0;
637 while (timeout < 5000) {
638 msleep(20);
639 timeout += 20;
640
641 eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val);
642 if (val & PCI_EXP_LNKSTA_DLLLA)
643 break;
644 }
645
646 if (val & PCI_EXP_LNKSTA_DLLLA)
647 pr_debug(" Link up (%s)\n",
648 (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
649 else
650 pr_debug(" Link not ready (0x%04x)\n", val);
651}
652
653#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
654#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
655
656static void eeh_restore_bridge_bars(struct eeh_dev *edev,
657 struct device_node *dn)
658{
659 int i;
660
661 /*
662 * Device BARs: 0x10 - 0x18
663 * Bus numbers and windows: 0x18 - 0x30
664 */
665 for (i = 4; i < 13; i++)
666 eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
667 /* Rom: 0x38 */
668 eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]);
669
670 /* Cache line & Latency timer: 0xC 0xD */
671 eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
672 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
673 eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
674 SAVED_BYTE(PCI_LATENCY_TIMER));
675 /* Max latency, min grant, interrupt ping and line: 0x3C */
676 eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
677
678 /* PCI Command: 0x4 */
679 eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
680
681 /* Check the PCIe link is ready */
682 eeh_bridge_check_link(edev, dn);
683}
684
685static void eeh_restore_device_bars(struct eeh_dev *edev,
686 struct device_node *dn)
566{ 687{
567 int i; 688 int i;
568 u32 cmd; 689 u32 cmd;
569 struct eeh_dev *edev = (struct eeh_dev *)data;
570 struct device_node *dn = eeh_dev_to_of_node(edev);
571 690
572 for (i = 4; i < 10; i++) 691 for (i = 4; i < 10; i++)
573 eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); 692 eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
574 /* 12 == Expansion ROM Address */ 693 /* 12 == Expansion ROM Address */
575 eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); 694 eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
576 695
577#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
578#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
579
580 eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, 696 eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
581 SAVED_BYTE(PCI_CACHE_LINE_SIZE)); 697 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
582 eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, 698 eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
@@ -599,6 +715,27 @@ static void *eeh_restore_one_device_bars(void *data, void *flag)
599 else 715 else
600 cmd &= ~PCI_COMMAND_SERR; 716 cmd &= ~PCI_COMMAND_SERR;
601 eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); 717 eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
718}
719
720/**
721 * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
722 * @data: EEH device
723 * @flag: Unused
724 *
725 * Loads the PCI configuration space base address registers,
726 * the expansion ROM base address, the latency timer, and etc.
727 * from the saved values in the device node.
728 */
729static void *eeh_restore_one_device_bars(void *data, void *flag)
730{
731 struct eeh_dev *edev = (struct eeh_dev *)data;
732 struct device_node *dn = eeh_dev_to_of_node(edev);
733
734 /* Do special restore for bridges */
735 if (edev->mode & EEH_DEV_BRIDGE)
736 eeh_restore_bridge_bars(edev, dn);
737 else
738 eeh_restore_device_bars(edev, dn);
602 739
603 return NULL; 740 return NULL;
604} 741}
@@ -635,19 +772,21 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
635 struct eeh_dev *edev; 772 struct eeh_dev *edev;
636 struct pci_dev *pdev; 773 struct pci_dev *pdev;
637 774
638 eeh_lock();
639
640 if (pe->type & EEH_PE_PHB) { 775 if (pe->type & EEH_PE_PHB) {
641 bus = pe->phb->bus; 776 bus = pe->phb->bus;
642 } else if (pe->type & EEH_PE_BUS || 777 } else if (pe->type & EEH_PE_BUS ||
643 pe->type & EEH_PE_DEVICE) { 778 pe->type & EEH_PE_DEVICE) {
779 if (pe->bus) {
780 bus = pe->bus;
781 goto out;
782 }
783
644 edev = list_first_entry(&pe->edevs, struct eeh_dev, list); 784 edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
645 pdev = eeh_dev_to_pci_dev(edev); 785 pdev = eeh_dev_to_pci_dev(edev);
646 if (pdev) 786 if (pdev)
647 bus = pdev->bus; 787 bus = pdev->bus;
648 } 788 }
649 789
650 eeh_unlock(); 790out:
651
652 return bus; 791 return bus;
653} 792}
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index d37708360f2e..5d753d4f2c75 100644
--- a/arch/powerpc/platforms/pseries/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -56,20 +56,40 @@ EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
56 56
57void eeh_sysfs_add_device(struct pci_dev *pdev) 57void eeh_sysfs_add_device(struct pci_dev *pdev)
58{ 58{
59 struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
59 int rc=0; 60 int rc=0;
60 61
62 if (edev && (edev->mode & EEH_DEV_SYSFS))
63 return;
64
61 rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); 65 rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
62 rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); 66 rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
63 rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); 67 rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
64 68
65 if (rc) 69 if (rc)
66 printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); 70 printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
71 else if (edev)
72 edev->mode |= EEH_DEV_SYSFS;
67} 73}
68 74
69void eeh_sysfs_remove_device(struct pci_dev *pdev) 75void eeh_sysfs_remove_device(struct pci_dev *pdev)
70{ 76{
77 struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
78
79 /*
80 * The parent directory might have been removed. We needn't
81 * continue for that case.
82 */
83 if (!pdev->dev.kobj.sd) {
84 if (edev)
85 edev->mode &= ~EEH_DEV_SYSFS;
86 return;
87 }
88
71 device_remove_file(&pdev->dev, &dev_attr_eeh_mode); 89 device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
72 device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); 90 device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
73 device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); 91 device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
74}
75 92
93 if (edev)
94 edev->mode &= ~EEH_DEV_SYSFS;
95}
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 8741c854e03d..2bd0b885b0fe 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -449,15 +449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
449 449
450#ifdef CONFIG_PPC_BOOK3S_64 450#ifdef CONFIG_PPC_BOOK3S_64
451BEGIN_FTR_SECTION 451BEGIN_FTR_SECTION
452 /*
453 * Back up the TAR across context switches. Note that the TAR is not
454 * available for use in the kernel. (To provide this, the TAR should
455 * be backed up/restored on exception entry/exit instead, and be in
456 * pt_regs. FIXME, this should be in pt_regs anyway (for debug).)
457 */
458 mfspr r0,SPRN_TAR
459 std r0,THREAD_TAR(r3)
460
461 /* Event based branch registers */ 452 /* Event based branch registers */
462 mfspr r0, SPRN_BESCR 453 mfspr r0, SPRN_BESCR
463 std r0, THREAD_BESCR(r3) 454 std r0, THREAD_BESCR(r3)
@@ -584,9 +575,34 @@ BEGIN_FTR_SECTION
584 ld r7,DSCR_DEFAULT@toc(2) 575 ld r7,DSCR_DEFAULT@toc(2)
585 ld r0,THREAD_DSCR(r4) 576 ld r0,THREAD_DSCR(r4)
586 cmpwi r6,0 577 cmpwi r6,0
578 li r8, FSCR_DSCR
587 bne 1f 579 bne 1f
588 ld r0,0(r7) 580 ld r0,0(r7)
5891: cmpd r0,r25 581 b 3f
5821:
583 BEGIN_FTR_SECTION_NESTED(70)
584 mfspr r6, SPRN_FSCR
585 or r6, r6, r8
586 mtspr SPRN_FSCR, r6
587 BEGIN_FTR_SECTION_NESTED(69)
588 mfspr r6, SPRN_HFSCR
589 or r6, r6, r8
590 mtspr SPRN_HFSCR, r6
591 END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69)
592 b 4f
593 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
5943:
595 BEGIN_FTR_SECTION_NESTED(70)
596 mfspr r6, SPRN_FSCR
597 andc r6, r6, r8
598 mtspr SPRN_FSCR, r6
599 BEGIN_FTR_SECTION_NESTED(69)
600 mfspr r6, SPRN_HFSCR
601 andc r6, r6, r8
602 mtspr SPRN_HFSCR, r6
603 END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69)
604 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
6054: cmpd r0,r25
590 beq 2f 606 beq 2f
591 mtspr SPRN_DSCR,r0 607 mtspr SPRN_DSCR,r0
5922: 6082:
@@ -629,21 +645,43 @@ _GLOBAL(ret_from_except_lite)
629 645
630 CURRENT_THREAD_INFO(r9, r1) 646 CURRENT_THREAD_INFO(r9, r1)
631 ld r3,_MSR(r1) 647 ld r3,_MSR(r1)
648#ifdef CONFIG_PPC_BOOK3E
649 ld r10,PACACURRENT(r13)
650#endif /* CONFIG_PPC_BOOK3E */
632 ld r4,TI_FLAGS(r9) 651 ld r4,TI_FLAGS(r9)
633 andi. r3,r3,MSR_PR 652 andi. r3,r3,MSR_PR
634 beq resume_kernel 653 beq resume_kernel
654#ifdef CONFIG_PPC_BOOK3E
655 lwz r3,(THREAD+THREAD_DBCR0)(r10)
656#endif /* CONFIG_PPC_BOOK3E */
635 657
636 /* Check current_thread_info()->flags */ 658 /* Check current_thread_info()->flags */
637 andi. r0,r4,_TIF_USER_WORK_MASK 659 andi. r0,r4,_TIF_USER_WORK_MASK
660#ifdef CONFIG_PPC_BOOK3E
661 bne 1f
662 /*
663 * Check to see if the dbcr0 register is set up to debug.
664 * Use the internal debug mode bit to do this.
665 */
666 andis. r0,r3,DBCR0_IDM@h
638 beq restore 667 beq restore
639 668 mfmsr r0
640 andi. r0,r4,_TIF_NEED_RESCHED 669 rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
641 beq 1f 670 mtmsr r0
671 mtspr SPRN_DBCR0,r3
672 li r10, -1
673 mtspr SPRN_DBSR,r10
674 b restore
675#else
676 beq restore
677#endif
6781: andi. r0,r4,_TIF_NEED_RESCHED
679 beq 2f
642 bl .restore_interrupts 680 bl .restore_interrupts
643 SCHEDULE_USER 681 SCHEDULE_USER
644 b .ret_from_except_lite 682 b .ret_from_except_lite
645 683
6461: bl .save_nvgprs 6842: bl .save_nvgprs
647 bl .restore_interrupts 685 bl .restore_interrupts
648 addi r3,r1,STACK_FRAME_OVERHEAD 686 addi r3,r1,STACK_FRAME_OVERHEAD
649 bl .do_notify_resume 687 bl .do_notify_resume
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 40e4a17c8ba0..902ca3c6b4b6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -341,10 +341,17 @@ vsx_unavailable_pSeries_1:
341 EXCEPTION_PROLOG_0(PACA_EXGEN) 341 EXCEPTION_PROLOG_0(PACA_EXGEN)
342 b vsx_unavailable_pSeries 342 b vsx_unavailable_pSeries
343 343
344facility_unavailable_trampoline:
344 . = 0xf60 345 . = 0xf60
345 SET_SCRATCH0(r13) 346 SET_SCRATCH0(r13)
346 EXCEPTION_PROLOG_0(PACA_EXGEN) 347 EXCEPTION_PROLOG_0(PACA_EXGEN)
347 b tm_unavailable_pSeries 348 b facility_unavailable_pSeries
349
350hv_facility_unavailable_trampoline:
351 . = 0xf80
352 SET_SCRATCH0(r13)
353 EXCEPTION_PROLOG_0(PACA_EXGEN)
354 b facility_unavailable_hv
348 355
349#ifdef CONFIG_CBE_RAS 356#ifdef CONFIG_CBE_RAS
350 STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) 357 STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
@@ -522,8 +529,10 @@ denorm_done:
522 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) 529 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20)
523 STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) 530 STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
524 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) 531 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
525 STD_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) 532 STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
526 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) 533 KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60)
534 STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable)
535 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82)
527 536
528/* 537/*
529 * An interrupt came in while soft-disabled. We set paca->irq_happened, then: 538 * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -793,14 +802,10 @@ system_call_relon_pSeries:
793 STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) 802 STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step)
794 803
795 . = 0x4e00 804 . = 0x4e00
796 SET_SCRATCH0(r13) 805 b . /* Can't happen, see v2.07 Book III-S section 6.5 */
797 EXCEPTION_PROLOG_0(PACA_EXGEN)
798 b h_data_storage_relon_hv
799 806
800 . = 0x4e20 807 . = 0x4e20
801 SET_SCRATCH0(r13) 808 b . /* Can't happen, see v2.07 Book III-S section 6.5 */
802 EXCEPTION_PROLOG_0(PACA_EXGEN)
803 b h_instr_storage_relon_hv
804 809
805 . = 0x4e40 810 . = 0x4e40
806 SET_SCRATCH0(r13) 811 SET_SCRATCH0(r13)
@@ -808,9 +813,7 @@ system_call_relon_pSeries:
808 b emulation_assist_relon_hv 813 b emulation_assist_relon_hv
809 814
810 . = 0x4e60 815 . = 0x4e60
811 SET_SCRATCH0(r13) 816 b . /* Can't happen, see v2.07 Book III-S section 6.5 */
812 EXCEPTION_PROLOG_0(PACA_EXGEN)
813 b hmi_exception_relon_hv
814 817
815 . = 0x4e80 818 . = 0x4e80
816 SET_SCRATCH0(r13) 819 SET_SCRATCH0(r13)
@@ -835,11 +838,17 @@ vsx_unavailable_relon_pSeries_1:
835 EXCEPTION_PROLOG_0(PACA_EXGEN) 838 EXCEPTION_PROLOG_0(PACA_EXGEN)
836 b vsx_unavailable_relon_pSeries 839 b vsx_unavailable_relon_pSeries
837 840
838tm_unavailable_relon_pSeries_1: 841facility_unavailable_relon_trampoline:
839 . = 0x4f60 842 . = 0x4f60
840 SET_SCRATCH0(r13) 843 SET_SCRATCH0(r13)
841 EXCEPTION_PROLOG_0(PACA_EXGEN) 844 EXCEPTION_PROLOG_0(PACA_EXGEN)
842 b tm_unavailable_relon_pSeries 845 b facility_unavailable_relon_pSeries
846
847hv_facility_unavailable_relon_trampoline:
848 . = 0x4f80
849 SET_SCRATCH0(r13)
850 EXCEPTION_PROLOG_0(PACA_EXGEN)
851 b hv_facility_unavailable_relon_hv
843 852
844 STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) 853 STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint)
845#ifdef CONFIG_PPC_DENORMALISATION 854#ifdef CONFIG_PPC_DENORMALISATION
@@ -1165,36 +1174,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
1165 bl .vsx_unavailable_exception 1174 bl .vsx_unavailable_exception
1166 b .ret_from_except 1175 b .ret_from_except
1167 1176
1168 .align 7 1177 STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception)
1169 .globl tm_unavailable_common 1178 STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception)
1170tm_unavailable_common:
1171 EXCEPTION_PROLOG_COMMON(0xf60, PACA_EXGEN)
1172 bl .save_nvgprs
1173 DISABLE_INTS
1174 addi r3,r1,STACK_FRAME_OVERHEAD
1175 bl .tm_unavailable_exception
1176 b .ret_from_except
1177 1179
1178 .align 7 1180 .align 7
1179 .globl __end_handlers 1181 .globl __end_handlers
1180__end_handlers: 1182__end_handlers:
1181 1183
1182 /* Equivalents to the above handlers for relocation-on interrupt vectors */ 1184 /* Equivalents to the above handlers for relocation-on interrupt vectors */
1183 STD_RELON_EXCEPTION_HV_OOL(0xe00, h_data_storage)
1184 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00)
1185 STD_RELON_EXCEPTION_HV_OOL(0xe20, h_instr_storage)
1186 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20)
1187 STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) 1185 STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
1188 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40)
1189 STD_RELON_EXCEPTION_HV_OOL(0xe60, hmi_exception)
1190 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60)
1191 MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) 1186 MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
1192 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80)
1193 1187
1194 STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) 1188 STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
1195 STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) 1189 STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
1196 STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) 1190 STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
1197 STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) 1191 STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
1192 STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
1198 1193
1199#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) 1194#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
1200/* 1195/*
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index a949bdfc9623..f0b47d1a6b0e 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -176,7 +176,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
176 length_max = 512 ; /* 64 doublewords */ 176 length_max = 512 ; /* 64 doublewords */
177 /* DAWR region can't cross 512 boundary */ 177 /* DAWR region can't cross 512 boundary */
178 if ((bp->attr.bp_addr >> 10) != 178 if ((bp->attr.bp_addr >> 10) !=
179 ((bp->attr.bp_addr + bp->attr.bp_len) >> 10)) 179 ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10))
180 return -EINVAL; 180 return -EINVAL;
181 } 181 }
182 if (info->len > 182 if (info->len >
@@ -250,6 +250,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args)
250 * we still need to single-step the instruction, but we don't 250 * we still need to single-step the instruction, but we don't
251 * generate an event. 251 * generate an event.
252 */ 252 */
253 info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
253 if (!((bp->attr.bp_addr <= dar) && 254 if (!((bp->attr.bp_addr <= dar) &&
254 (dar - bp->attr.bp_addr < bp->attr.bp_len))) 255 (dar - bp->attr.bp_addr < bp->attr.bp_len)))
255 info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; 256 info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 8220baa46faf..16a7c2326d48 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -205,7 +205,7 @@ static int ibmebus_create_devices(const struct of_device_id *matches)
205 return ret; 205 return ret;
206} 206}
207 207
208int ibmebus_register_driver(struct of_platform_driver *drv) 208int ibmebus_register_driver(struct platform_driver *drv)
209{ 209{
210 /* If the driver uses devices that ibmebus doesn't know, add them */ 210 /* If the driver uses devices that ibmebus doesn't know, add them */
211 ibmebus_create_devices(drv->driver.of_match_table); 211 ibmebus_create_devices(drv->driver.of_match_table);
@@ -215,7 +215,7 @@ int ibmebus_register_driver(struct of_platform_driver *drv)
215} 215}
216EXPORT_SYMBOL(ibmebus_register_driver); 216EXPORT_SYMBOL(ibmebus_register_driver);
217 217
218void ibmebus_unregister_driver(struct of_platform_driver *drv) 218void ibmebus_unregister_driver(struct platform_driver *drv)
219{ 219{
220 driver_unregister(&drv->driver); 220 driver_unregister(&drv->driver);
221} 221}
@@ -338,11 +338,10 @@ static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv)
338static int ibmebus_bus_device_probe(struct device *dev) 338static int ibmebus_bus_device_probe(struct device *dev)
339{ 339{
340 int error = -ENODEV; 340 int error = -ENODEV;
341 struct of_platform_driver *drv; 341 struct platform_driver *drv;
342 struct platform_device *of_dev; 342 struct platform_device *of_dev;
343 const struct of_device_id *match;
344 343
345 drv = to_of_platform_driver(dev->driver); 344 drv = to_platform_driver(dev->driver);
346 of_dev = to_platform_device(dev); 345 of_dev = to_platform_device(dev);
347 346
348 if (!drv->probe) 347 if (!drv->probe)
@@ -350,9 +349,8 @@ static int ibmebus_bus_device_probe(struct device *dev)
350 349
351 of_dev_get(of_dev); 350 of_dev_get(of_dev);
352 351
353 match = of_match_device(drv->driver.of_match_table, dev); 352 if (of_driver_match_device(dev, dev->driver))
354 if (match) 353 error = drv->probe(of_dev);
355 error = drv->probe(of_dev, match);
356 if (error) 354 if (error)
357 of_dev_put(of_dev); 355 of_dev_put(of_dev);
358 356
@@ -362,7 +360,7 @@ static int ibmebus_bus_device_probe(struct device *dev)
362static int ibmebus_bus_device_remove(struct device *dev) 360static int ibmebus_bus_device_remove(struct device *dev)
363{ 361{
364 struct platform_device *of_dev = to_platform_device(dev); 362 struct platform_device *of_dev = to_platform_device(dev);
365 struct of_platform_driver *drv = to_of_platform_driver(dev->driver); 363 struct platform_driver *drv = to_platform_driver(dev->driver);
366 364
367 if (dev->driver && drv->remove) 365 if (dev->driver && drv->remove)
368 drv->remove(of_dev); 366 drv->remove(of_dev);
@@ -372,7 +370,7 @@ static int ibmebus_bus_device_remove(struct device *dev)
372static void ibmebus_bus_device_shutdown(struct device *dev) 370static void ibmebus_bus_device_shutdown(struct device *dev)
373{ 371{
374 struct platform_device *of_dev = to_platform_device(dev); 372 struct platform_device *of_dev = to_platform_device(dev);
375 struct of_platform_driver *drv = to_of_platform_driver(dev->driver); 373 struct platform_driver *drv = to_platform_driver(dev->driver);
376 374
377 if (dev->driver && drv->shutdown) 375 if (dev->driver && drv->shutdown)
378 drv->shutdown(of_dev); 376 drv->shutdown(of_dev);
@@ -419,7 +417,7 @@ struct device_attribute ibmebus_bus_device_attrs[] = {
419static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) 417static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg)
420{ 418{
421 struct platform_device *of_dev = to_platform_device(dev); 419 struct platform_device *of_dev = to_platform_device(dev);
422 struct of_platform_driver *drv = to_of_platform_driver(dev->driver); 420 struct platform_driver *drv = to_platform_driver(dev->driver);
423 int ret = 0; 421 int ret = 0;
424 422
425 if (dev->driver && drv->suspend) 423 if (dev->driver && drv->suspend)
@@ -430,7 +428,7 @@ static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg)
430static int ibmebus_bus_legacy_resume(struct device *dev) 428static int ibmebus_bus_legacy_resume(struct device *dev)
431{ 429{
432 struct platform_device *of_dev = to_platform_device(dev); 430 struct platform_device *of_dev = to_platform_device(dev);
433 struct of_platform_driver *drv = to_of_platform_driver(dev->driver); 431 struct platform_driver *drv = to_platform_driver(dev->driver);
434 int ret = 0; 432 int ret = 0;
435 433
436 if (dev->driver && drv->resume) 434 if (dev->driver && drv->resume)
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 939ea7ef0dc8..d7216c9abda1 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -85,7 +85,7 @@ int powersave_nap;
85/* 85/*
86 * Register the sysctl to set/clear powersave_nap. 86 * Register the sysctl to set/clear powersave_nap.
87 */ 87 */
88static ctl_table powersave_nap_ctl_table[]={ 88static struct ctl_table powersave_nap_ctl_table[] = {
89 { 89 {
90 .procname = "powersave-nap", 90 .procname = "powersave-nap",
91 .data = &powersave_nap, 91 .data = &powersave_nap,
@@ -95,7 +95,7 @@ static ctl_table powersave_nap_ctl_table[]={
95 }, 95 },
96 {} 96 {}
97}; 97};
98static ctl_table powersave_nap_sysctl_root[] = { 98static struct ctl_table powersave_nap_sysctl_root[] = {
99 { 99 {
100 .procname = "kernel", 100 .procname = "kernel",
101 .mode = 0555, 101 .mode = 0555,
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 50e90b7e7139..fa0b54b2a362 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -55,6 +55,7 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
55 55
56struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) 56struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
57{ 57{
58 unsigned hugepage_shift;
58 struct iowa_bus *bus; 59 struct iowa_bus *bus;
59 int token; 60 int token;
60 61
@@ -70,11 +71,17 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
70 if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) 71 if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
71 return NULL; 72 return NULL;
72 73
73 ptep = find_linux_pte(init_mm.pgd, vaddr); 74 ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
75 &hugepage_shift);
74 if (ptep == NULL) 76 if (ptep == NULL)
75 paddr = 0; 77 paddr = 0;
76 else 78 else {
79 /*
80 * we don't have hugepages backing iomem
81 */
82 WARN_ON(hugepage_shift);
77 paddr = pte_pfn(*ptep) << PAGE_SHIFT; 83 paddr = pte_pfn(*ptep) << PAGE_SHIFT;
84 }
78 bus = iowa_pci_find(vaddr, paddr); 85 bus = iowa_pci_find(vaddr, paddr);
79 86
80 if (bus == NULL) 87 if (bus == NULL)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index c0d0dbddfba1..b20ff173a671 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -36,6 +36,8 @@
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/fault-inject.h> 37#include <linux/fault-inject.h>
38#include <linux/pci.h> 38#include <linux/pci.h>
39#include <linux/iommu.h>
40#include <linux/sched.h>
39#include <asm/io.h> 41#include <asm/io.h>
40#include <asm/prom.h> 42#include <asm/prom.h>
41#include <asm/iommu.h> 43#include <asm/iommu.h>
@@ -44,6 +46,7 @@
44#include <asm/kdump.h> 46#include <asm/kdump.h>
45#include <asm/fadump.h> 47#include <asm/fadump.h>
46#include <asm/vio.h> 48#include <asm/vio.h>
49#include <asm/tce.h>
47 50
48#define DBG(...) 51#define DBG(...)
49 52
@@ -724,6 +727,13 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
724 if (tbl->it_offset == 0) 727 if (tbl->it_offset == 0)
725 clear_bit(0, tbl->it_map); 728 clear_bit(0, tbl->it_map);
726 729
730#ifdef CONFIG_IOMMU_API
731 if (tbl->it_group) {
732 iommu_group_put(tbl->it_group);
733 BUG_ON(tbl->it_group);
734 }
735#endif
736
727 /* verify that table contains no entries */ 737 /* verify that table contains no entries */
728 if (!bitmap_empty(tbl->it_map, tbl->it_size)) 738 if (!bitmap_empty(tbl->it_map, tbl->it_size))
729 pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); 739 pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
@@ -860,3 +870,316 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
860 free_pages((unsigned long)vaddr, get_order(size)); 870 free_pages((unsigned long)vaddr, get_order(size));
861 } 871 }
862} 872}
873
874#ifdef CONFIG_IOMMU_API
875/*
876 * SPAPR TCE API
877 */
878static void group_release(void *iommu_data)
879{
880 struct iommu_table *tbl = iommu_data;
881 tbl->it_group = NULL;
882}
883
884void iommu_register_group(struct iommu_table *tbl,
885 int pci_domain_number, unsigned long pe_num)
886{
887 struct iommu_group *grp;
888 char *name;
889
890 grp = iommu_group_alloc();
891 if (IS_ERR(grp)) {
892 pr_warn("powerpc iommu api: cannot create new group, err=%ld\n",
893 PTR_ERR(grp));
894 return;
895 }
896 tbl->it_group = grp;
897 iommu_group_set_iommudata(grp, tbl, group_release);
898 name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
899 pci_domain_number, pe_num);
900 if (!name)
901 return;
902 iommu_group_set_name(grp, name);
903 kfree(name);
904}
905
906enum dma_data_direction iommu_tce_direction(unsigned long tce)
907{
908 if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE))
909 return DMA_BIDIRECTIONAL;
910 else if (tce & TCE_PCI_READ)
911 return DMA_TO_DEVICE;
912 else if (tce & TCE_PCI_WRITE)
913 return DMA_FROM_DEVICE;
914 else
915 return DMA_NONE;
916}
917EXPORT_SYMBOL_GPL(iommu_tce_direction);
918
919void iommu_flush_tce(struct iommu_table *tbl)
920{
921 /* Flush/invalidate TLB caches if necessary */
922 if (ppc_md.tce_flush)
923 ppc_md.tce_flush(tbl);
924
925 /* Make sure updates are seen by hardware */
926 mb();
927}
928EXPORT_SYMBOL_GPL(iommu_flush_tce);
929
930int iommu_tce_clear_param_check(struct iommu_table *tbl,
931 unsigned long ioba, unsigned long tce_value,
932 unsigned long npages)
933{
934 /* ppc_md.tce_free() does not support any value but 0 */
935 if (tce_value)
936 return -EINVAL;
937
938 if (ioba & ~IOMMU_PAGE_MASK)
939 return -EINVAL;
940
941 ioba >>= IOMMU_PAGE_SHIFT;
942 if (ioba < tbl->it_offset)
943 return -EINVAL;
944
945 if ((ioba + npages) > (tbl->it_offset + tbl->it_size))
946 return -EINVAL;
947
948 return 0;
949}
950EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
951
952int iommu_tce_put_param_check(struct iommu_table *tbl,
953 unsigned long ioba, unsigned long tce)
954{
955 if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
956 return -EINVAL;
957
958 if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ))
959 return -EINVAL;
960
961 if (ioba & ~IOMMU_PAGE_MASK)
962 return -EINVAL;
963
964 ioba >>= IOMMU_PAGE_SHIFT;
965 if (ioba < tbl->it_offset)
966 return -EINVAL;
967
968 if ((ioba + 1) > (tbl->it_offset + tbl->it_size))
969 return -EINVAL;
970
971 return 0;
972}
973EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
974
975unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
976{
977 unsigned long oldtce;
978 struct iommu_pool *pool = get_pool(tbl, entry);
979
980 spin_lock(&(pool->lock));
981
982 oldtce = ppc_md.tce_get(tbl, entry);
983 if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
984 ppc_md.tce_free(tbl, entry, 1);
985 else
986 oldtce = 0;
987
988 spin_unlock(&(pool->lock));
989
990 return oldtce;
991}
992EXPORT_SYMBOL_GPL(iommu_clear_tce);
993
994int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
995 unsigned long entry, unsigned long pages)
996{
997 unsigned long oldtce;
998 struct page *page;
999
1000 for ( ; pages; --pages, ++entry) {
1001 oldtce = iommu_clear_tce(tbl, entry);
1002 if (!oldtce)
1003 continue;
1004
1005 page = pfn_to_page(oldtce >> PAGE_SHIFT);
1006 WARN_ON(!page);
1007 if (page) {
1008 if (oldtce & TCE_PCI_WRITE)
1009 SetPageDirty(page);
1010 put_page(page);
1011 }
1012 }
1013
1014 return 0;
1015}
1016EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
1017
1018/*
1019 * hwaddr is a kernel virtual address here (0xc... bazillion),
1020 * tce_build converts it to a physical address.
1021 */
1022int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
1023 unsigned long hwaddr, enum dma_data_direction direction)
1024{
1025 int ret = -EBUSY;
1026 unsigned long oldtce;
1027 struct iommu_pool *pool = get_pool(tbl, entry);
1028
1029 spin_lock(&(pool->lock));
1030
1031 oldtce = ppc_md.tce_get(tbl, entry);
1032 /* Add new entry if it is not busy */
1033 if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
1034 ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL);
1035
1036 spin_unlock(&(pool->lock));
1037
1038 /* if (unlikely(ret))
1039 pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
1040 __func__, hwaddr, entry << IOMMU_PAGE_SHIFT,
1041 hwaddr, ret); */
1042
1043 return ret;
1044}
1045EXPORT_SYMBOL_GPL(iommu_tce_build);
1046
1047int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
1048 unsigned long tce)
1049{
1050 int ret;
1051 struct page *page = NULL;
1052 unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK;
1053 enum dma_data_direction direction = iommu_tce_direction(tce);
1054
1055 ret = get_user_pages_fast(tce & PAGE_MASK, 1,
1056 direction != DMA_TO_DEVICE, &page);
1057 if (unlikely(ret != 1)) {
1058 /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
1059 tce, entry << IOMMU_PAGE_SHIFT, ret); */
1060 return -EFAULT;
1061 }
1062 hwaddr = (unsigned long) page_address(page) + offset;
1063
1064 ret = iommu_tce_build(tbl, entry, hwaddr, direction);
1065 if (ret)
1066 put_page(page);
1067
1068 if (ret < 0)
1069 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
1070 __func__, entry << IOMMU_PAGE_SHIFT, tce, ret);
1071
1072 return ret;
1073}
1074EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
1075
1076int iommu_take_ownership(struct iommu_table *tbl)
1077{
1078 unsigned long sz = (tbl->it_size + 7) >> 3;
1079
1080 if (tbl->it_offset == 0)
1081 clear_bit(0, tbl->it_map);
1082
1083 if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
1084 pr_err("iommu_tce: it_map is not empty");
1085 return -EBUSY;
1086 }
1087
1088 memset(tbl->it_map, 0xff, sz);
1089 iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
1090
1091 return 0;
1092}
1093EXPORT_SYMBOL_GPL(iommu_take_ownership);
1094
1095void iommu_release_ownership(struct iommu_table *tbl)
1096{
1097 unsigned long sz = (tbl->it_size + 7) >> 3;
1098
1099 iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
1100 memset(tbl->it_map, 0, sz);
1101
1102 /* Restore bit#0 set by iommu_init_table() */
1103 if (tbl->it_offset == 0)
1104 set_bit(0, tbl->it_map);
1105}
1106EXPORT_SYMBOL_GPL(iommu_release_ownership);
1107
1108static int iommu_add_device(struct device *dev)
1109{
1110 struct iommu_table *tbl;
1111 int ret = 0;
1112
1113 if (WARN_ON(dev->iommu_group)) {
1114 pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n",
1115 dev_name(dev),
1116 iommu_group_id(dev->iommu_group));
1117 return -EBUSY;
1118 }
1119
1120 tbl = get_iommu_table_base(dev);
1121 if (!tbl || !tbl->it_group) {
1122 pr_debug("iommu_tce: skipping device %s with no tbl\n",
1123 dev_name(dev));
1124 return 0;
1125 }
1126
1127 pr_debug("iommu_tce: adding %s to iommu group %d\n",
1128 dev_name(dev), iommu_group_id(tbl->it_group));
1129
1130 ret = iommu_group_add_device(tbl->it_group, dev);
1131 if (ret < 0)
1132 pr_err("iommu_tce: %s has not been added, ret=%d\n",
1133 dev_name(dev), ret);
1134
1135 return ret;
1136}
1137
1138static void iommu_del_device(struct device *dev)
1139{
1140 iommu_group_remove_device(dev);
1141}
1142
1143static int iommu_bus_notifier(struct notifier_block *nb,
1144 unsigned long action, void *data)
1145{
1146 struct device *dev = data;
1147
1148 switch (action) {
1149 case BUS_NOTIFY_ADD_DEVICE:
1150 return iommu_add_device(dev);
1151 case BUS_NOTIFY_DEL_DEVICE:
1152 iommu_del_device(dev);
1153 return 0;
1154 default:
1155 return 0;
1156 }
1157}
1158
1159static struct notifier_block tce_iommu_bus_nb = {
1160 .notifier_call = iommu_bus_notifier,
1161};
1162
1163static int __init tce_iommu_init(void)
1164{
1165 struct pci_dev *pdev = NULL;
1166
1167 BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE);
1168
1169 for_each_pci_dev(pdev)
1170 iommu_add_device(&pdev->dev);
1171
1172 bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1173 return 0;
1174}
1175
1176subsys_initcall_sync(tce_iommu_init);
1177
1178#else
1179
1180void iommu_register_group(struct iommu_table *tbl,
1181 int pci_domain_number, unsigned long pe_num)
1182{
1183}
1184
1185#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ea185e0b3cae..c69440cef7af 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -116,8 +116,6 @@ static inline notrace int decrementer_check_overflow(void)
116 u64 now = get_tb_or_rtc(); 116 u64 now = get_tb_or_rtc();
117 u64 *next_tb = &__get_cpu_var(decrementers_next_tb); 117 u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
118 118
119 if (now >= *next_tb)
120 set_dec(1);
121 return now >= *next_tb; 119 return now >= *next_tb;
122} 120}
123 121
@@ -364,7 +362,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
364 seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); 362 seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs);
365 seq_printf(p, " Spurious interrupts\n"); 363 seq_printf(p, " Spurious interrupts\n");
366 364
367 seq_printf(p, "%*s: ", prec, "CNT"); 365 seq_printf(p, "%*s: ", prec, "PMI");
368 for_each_online_cpu(j) 366 for_each_online_cpu(j)
369 seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); 367 seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs);
370 seq_printf(p, " Performance monitoring interrupts\n"); 368 seq_printf(p, " Performance monitoring interrupts\n");
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 11f5b03a0b06..2156ea90eb54 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -36,12 +36,6 @@
36#include <asm/sstep.h> 36#include <asm/sstep.h>
37#include <asm/uaccess.h> 37#include <asm/uaccess.h>
38 38
39#ifdef CONFIG_PPC_ADV_DEBUG_REGS
40#define MSR_SINGLESTEP (MSR_DE)
41#else
42#define MSR_SINGLESTEP (MSR_SE)
43#endif
44
45DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 39DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
46DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); 40DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
47 41
@@ -104,19 +98,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
104 98
105static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) 99static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
106{ 100{
107 /* We turn off async exceptions to ensure that the single step will 101 enable_single_step(regs);
108 * be for the instruction we have the kprobe on, if we dont its
109 * possible we'd get the single step reported for an exception handler
110 * like Decrementer or External Interrupt */
111 regs->msr &= ~MSR_EE;
112 regs->msr |= MSR_SINGLESTEP;
113#ifdef CONFIG_PPC_ADV_DEBUG_REGS
114 regs->msr &= ~MSR_CE;
115 mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
116#ifdef CONFIG_PPC_47x
117 isync();
118#endif
119#endif
120 102
121 /* 103 /*
122 * On powerpc we should single step on the original 104 * On powerpc we should single step on the original
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 6782221d49bd..db28032e320e 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -750,13 +750,8 @@ EXPORT_SYMBOL_GPL(kvm_hypercall);
750 750
751static __init void kvm_free_tmp(void) 751static __init void kvm_free_tmp(void)
752{ 752{
753 unsigned long start, end; 753 free_reserved_area(&kvm_tmp[kvm_tmp_index],
754 754 &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
755 start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK;
756 end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK;
757
758 /* Free the tmp space we don't need */
759 free_reserved_area(start, end, 0, NULL);
760} 755}
761 756
762static int __init kvm_guest_init(void) 757static int __init kvm_guest_init(void)
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 48fbc2b97e95..8213ee1eb05a 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -84,22 +84,30 @@ static ssize_t dev_nvram_read(struct file *file, char __user *buf,
84 char *tmp = NULL; 84 char *tmp = NULL;
85 ssize_t size; 85 ssize_t size;
86 86
87 ret = -ENODEV; 87 if (!ppc_md.nvram_size) {
88 if (!ppc_md.nvram_size) 88 ret = -ENODEV;
89 goto out; 89 goto out;
90 }
90 91
91 ret = 0;
92 size = ppc_md.nvram_size(); 92 size = ppc_md.nvram_size();
93 if (*ppos >= size || size < 0) 93 if (size < 0) {
94 ret = size;
95 goto out;
96 }
97
98 if (*ppos >= size) {
99 ret = 0;
94 goto out; 100 goto out;
101 }
95 102
96 count = min_t(size_t, count, size - *ppos); 103 count = min_t(size_t, count, size - *ppos);
97 count = min(count, PAGE_SIZE); 104 count = min(count, PAGE_SIZE);
98 105
99 ret = -ENOMEM;
100 tmp = kmalloc(count, GFP_KERNEL); 106 tmp = kmalloc(count, GFP_KERNEL);
101 if (!tmp) 107 if (!tmp) {
108 ret = -ENOMEM;
102 goto out; 109 goto out;
110 }
103 111
104 ret = ppc_md.nvram_read(tmp, count, ppos); 112 ret = ppc_md.nvram_read(tmp, count, ppos);
105 if (ret <= 0) 113 if (ret <= 0)
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f46914a0f33e..7d22a675fe1a 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1462,6 +1462,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
1462 /* Allocate bus and devices resources */ 1462 /* Allocate bus and devices resources */
1463 pcibios_allocate_bus_resources(bus); 1463 pcibios_allocate_bus_resources(bus);
1464 pcibios_claim_one_bus(bus); 1464 pcibios_claim_one_bus(bus);
1465 if (!pci_has_flag(PCI_PROBE_ONLY))
1466 pci_assign_unassigned_bus_resources(bus);
1465 1467
1466 /* Fixup EEH */ 1468 /* Fixup EEH */
1467 eeh_add_device_tree_late(bus); 1469 eeh_add_device_tree_late(bus);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
new file mode 100644
index 000000000000..c1e17ae68a08
--- /dev/null
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -0,0 +1,110 @@
1/*
2 * Derived from "arch/powerpc/platforms/pseries/pci_dlpar.c"
3 *
4 * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
5 * Copyright (C) 2005 International Business Machines
6 *
7 * Updates, 2005, John Rose <johnrose@austin.ibm.com>
8 * Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
9 * Updates, 2013, Gavin Shan <shangw@linux.vnet.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 */
16
17#include <linux/pci.h>
18#include <linux/export.h>
19#include <asm/pci-bridge.h>
20#include <asm/ppc-pci.h>
21#include <asm/firmware.h>
22#include <asm/eeh.h>
23
24/**
25 * pcibios_release_device - release PCI device
26 * @dev: PCI device
27 *
28 * The function is called before releasing the indicated PCI device.
29 */
30void pcibios_release_device(struct pci_dev *dev)
31{
32 eeh_remove_device(dev);
33}
34
35/**
36 * pcibios_remove_pci_devices - remove all devices under this bus
37 * @bus: the indicated PCI bus
38 *
39 * Remove all of the PCI devices under this bus both from the
40 * linux pci device tree, and from the powerpc EEH address cache.
41 */
42void pcibios_remove_pci_devices(struct pci_bus *bus)
43{
44 struct pci_dev *dev, *tmp;
45 struct pci_bus *child_bus;
46
47 /* First go down child busses */
48 list_for_each_entry(child_bus, &bus->children, node)
49 pcibios_remove_pci_devices(child_bus);
50
51 pr_debug("PCI: Removing devices on bus %04x:%02x\n",
52 pci_domain_nr(bus), bus->number);
53 list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
54 pr_debug(" Removing %s...\n", pci_name(dev));
55 pci_stop_and_remove_bus_device(dev);
56 }
57}
58
59EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
60
61/**
62 * pcibios_add_pci_devices - adds new pci devices to bus
63 * @bus: the indicated PCI bus
64 *
65 * This routine will find and fixup new pci devices under
66 * the indicated bus. This routine presumes that there
67 * might already be some devices under this bridge, so
68 * it carefully tries to add only new devices. (And that
69 * is how this routine differs from other, similar pcibios
70 * routines.)
71 */
72void pcibios_add_pci_devices(struct pci_bus * bus)
73{
74 int slotno, mode, pass, max;
75 struct pci_dev *dev;
76 struct device_node *dn = pci_bus_to_OF_node(bus);
77
78 eeh_add_device_tree_early(dn);
79
80 mode = PCI_PROBE_NORMAL;
81 if (ppc_md.pci_probe_mode)
82 mode = ppc_md.pci_probe_mode(bus);
83
84 if (mode == PCI_PROBE_DEVTREE) {
85 /* use ofdt-based probe */
86 of_rescan_bus(dn, bus);
87 } else if (mode == PCI_PROBE_NORMAL) {
88 /*
89 * Use legacy probe. In the partial hotplug case, we
90 * probably have grandchildren devices unplugged. So
91 * we don't check the return value from pci_scan_slot() in
92 * order for fully rescan all the way down to pick them up.
93 * They can have been removed during partial hotplug.
94 */
95 slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
96 pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
97 pcibios_setup_bus_devices(bus);
98 max = bus->busn_res.start;
99 for (pass = 0; pass < 2; pass++) {
100 list_for_each_entry(dev, &bus->devices, bus_list) {
101 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
102 dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
103 max = pci_scan_bridge(bus, dev,
104 max, pass);
105 }
106 }
107 }
108 pcibios_finish_adding_to_bus(bus);
109}
110EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 2a67e9baa59f..15d9105323bf 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -128,7 +128,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
128 const char *type; 128 const char *type;
129 struct pci_slot *slot; 129 struct pci_slot *slot;
130 130
131 dev = alloc_pci_dev(); 131 dev = pci_alloc_dev(bus);
132 if (!dev) 132 if (!dev)
133 return NULL; 133 return NULL;
134 type = of_get_property(node, "device_type", NULL); 134 type = of_get_property(node, "device_type", NULL);
@@ -137,7 +137,6 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
137 137
138 pr_debug(" create device, devfn: %x, type: %s\n", devfn, type); 138 pr_debug(" create device, devfn: %x, type: %s\n", devfn, type);
139 139
140 dev->bus = bus;
141 dev->dev.of_node = of_node_get(node); 140 dev->dev.of_node = of_node_get(node);
142 dev->dev.parent = bus->bridge; 141 dev->dev.parent = bus->bridge;
143 dev->dev.bus = &pci_bus_type; 142 dev->dev.bus = &pci_bus_type;
@@ -165,7 +164,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
165 pr_debug(" class: 0x%x\n", dev->class); 164 pr_debug(" class: 0x%x\n", dev->class);
166 pr_debug(" revision: 0x%x\n", dev->revision); 165 pr_debug(" revision: 0x%x\n", dev->revision);
167 166
168 dev->current_state = 4; /* unknown power state */ 167 dev->current_state = PCI_UNKNOWN; /* unknown power state */
169 dev->error_state = pci_channel_io_normal; 168 dev->error_state = pci_channel_io_normal;
170 dev->dma_mask = 0xffffffff; 169 dev->dma_mask = 0xffffffff;
171 170
@@ -231,11 +230,14 @@ void of_scan_pci_bridge(struct pci_dev *dev)
231 return; 230 return;
232 } 231 }
233 232
234 bus = pci_add_new_bus(dev->bus, dev, busrange[0]); 233 bus = pci_find_bus(pci_domain_nr(dev->bus), busrange[0]);
235 if (!bus) { 234 if (!bus) {
236 printk(KERN_ERR "Failed to create pci bus for %s\n", 235 bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
237 node->full_name); 236 if (!bus) {
238 return; 237 printk(KERN_ERR "Failed to create pci bus for %s\n",
238 node->full_name);
239 return;
240 }
239 } 241 }
240 242
241 bus->primary = dev->bus->number; 243 bus->primary = dev->bus->number;
@@ -293,6 +295,38 @@ void of_scan_pci_bridge(struct pci_dev *dev)
293} 295}
294EXPORT_SYMBOL(of_scan_pci_bridge); 296EXPORT_SYMBOL(of_scan_pci_bridge);
295 297
298static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
299 struct device_node *dn)
300{
301 struct pci_dev *dev = NULL;
302 const u32 *reg;
303 int reglen, devfn;
304
305 pr_debug(" * %s\n", dn->full_name);
306 if (!of_device_is_available(dn))
307 return NULL;
308
309 reg = of_get_property(dn, "reg", &reglen);
310 if (reg == NULL || reglen < 20)
311 return NULL;
312 devfn = (reg[0] >> 8) & 0xff;
313
314 /* Check if the PCI device is already there */
315 dev = pci_get_slot(bus, devfn);
316 if (dev) {
317 pci_dev_put(dev);
318 return dev;
319 }
320
321 /* create a new pci_dev for this device */
322 dev = of_create_pci_dev(dn, bus, devfn);
323 if (!dev)
324 return NULL;
325
326 pr_debug(" dev header type: %x\n", dev->hdr_type);
327 return dev;
328}
329
296/** 330/**
297 * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices 331 * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices
298 * @node: device tree node for the PCI bus 332 * @node: device tree node for the PCI bus
@@ -303,8 +337,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
303 int rescan_existing) 337 int rescan_existing)
304{ 338{
305 struct device_node *child; 339 struct device_node *child;
306 const u32 *reg;
307 int reglen, devfn;
308 struct pci_dev *dev; 340 struct pci_dev *dev;
309 341
310 pr_debug("of_scan_bus(%s) bus no %d...\n", 342 pr_debug("of_scan_bus(%s) bus no %d...\n",
@@ -312,16 +344,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
312 344
313 /* Scan direct children */ 345 /* Scan direct children */
314 for_each_child_of_node(node, child) { 346 for_each_child_of_node(node, child) {
315 pr_debug(" * %s\n", child->full_name); 347 dev = of_scan_pci_dev(bus, child);
316 if (!of_device_is_available(child))
317 continue;
318 reg = of_get_property(child, "reg", &reglen);
319 if (reg == NULL || reglen < 20)
320 continue;
321 devfn = (reg[0] >> 8) & 0xff;
322
323 /* create a new pci_dev for this device */
324 dev = of_create_pci_dev(child, bus, devfn);
325 if (!dev) 348 if (!dev)
326 continue; 349 continue;
327 pr_debug(" dev header type: %x\n", dev->hdr_type); 350 pr_debug(" dev header type: %x\n", dev->hdr_type);
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index feb8580fdc84..c30612aad68e 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -29,25 +29,9 @@
29 29
30#ifdef CONFIG_PPC64 30#ifdef CONFIG_PPC64
31 31
32static loff_t page_map_seek( struct file *file, loff_t off, int whence) 32static loff_t page_map_seek(struct file *file, loff_t off, int whence)
33{ 33{
34 loff_t new; 34 return fixed_size_llseek(file, off, whence, PAGE_SIZE);
35 switch(whence) {
36 case 0:
37 new = off;
38 break;
39 case 1:
40 new = file->f_pos + off;
41 break;
42 case 2:
43 new = PAGE_SIZE + off;
44 break;
45 default:
46 return -EINVAL;
47 }
48 if ( new < 0 || new > PAGE_SIZE )
49 return -EINVAL;
50 return (file->f_pos = new);
51} 35}
52 36
53static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, 37static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 076d1242507a..8083be20fe5e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -600,6 +600,16 @@ struct task_struct *__switch_to(struct task_struct *prev,
600 struct ppc64_tlb_batch *batch; 600 struct ppc64_tlb_batch *batch;
601#endif 601#endif
602 602
603 /* Back up the TAR across context switches.
604 * Note that the TAR is not available for use in the kernel. (To
605 * provide this, the TAR should be backed up/restored on exception
606 * entry/exit instead, and be in pt_regs. FIXME, this should be in
607 * pt_regs anyway (for debug).)
608 * Save the TAR here before we do treclaim/trecheckpoint as these
609 * will change the TAR.
610 */
611 save_tar(&prev->thread);
612
603 __switch_to_tm(prev); 613 __switch_to_tm(prev);
604 614
605#ifdef CONFIG_SMP 615#ifdef CONFIG_SMP
@@ -916,7 +926,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
916 flush_altivec_to_thread(src); 926 flush_altivec_to_thread(src);
917 flush_vsx_to_thread(src); 927 flush_vsx_to_thread(src);
918 flush_spe_to_thread(src); 928 flush_spe_to_thread(src);
929
919 *dst = *src; 930 *dst = *src;
931
932 clear_task_ebb(dst);
933
920 return 0; 934 return 0;
921} 935}
922 936
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 8b6f7a99cce2..eb23ac92abb9 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -559,6 +559,35 @@ void __init early_init_dt_setup_initrd_arch(unsigned long start,
559} 559}
560#endif 560#endif
561 561
562static void __init early_reserve_mem_dt(void)
563{
564 unsigned long i, len, dt_root;
565 const __be32 *prop;
566
567 dt_root = of_get_flat_dt_root();
568
569 prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len);
570
571 if (!prop)
572 return;
573
574 DBG("Found new-style reserved-ranges\n");
575
576 /* Each reserved range is an (address,size) pair, 2 cells each,
577 * totalling 4 cells per range. */
578 for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
579 u64 base, size;
580
581 base = of_read_number(prop + (i * 4) + 0, 2);
582 size = of_read_number(prop + (i * 4) + 2, 2);
583
584 if (size) {
585 DBG("reserving: %llx -> %llx\n", base, size);
586 memblock_reserve(base, size);
587 }
588 }
589}
590
562static void __init early_reserve_mem(void) 591static void __init early_reserve_mem(void)
563{ 592{
564 u64 base, size; 593 u64 base, size;
@@ -574,12 +603,16 @@ static void __init early_reserve_mem(void)
574 self_size = initial_boot_params->totalsize; 603 self_size = initial_boot_params->totalsize;
575 memblock_reserve(self_base, self_size); 604 memblock_reserve(self_base, self_size);
576 605
606 /* Look for the new "reserved-regions" property in the DT */
607 early_reserve_mem_dt();
608
577#ifdef CONFIG_BLK_DEV_INITRD 609#ifdef CONFIG_BLK_DEV_INITRD
578 /* then reserve the initrd, if any */ 610 /* Then reserve the initrd, if any */
579 if (initrd_start && (initrd_end > initrd_start)) 611 if (initrd_start && (initrd_end > initrd_start)) {
580 memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), 612 memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE),
581 _ALIGN_UP(initrd_end, PAGE_SIZE) - 613 _ALIGN_UP(initrd_end, PAGE_SIZE) -
582 _ALIGN_DOWN(initrd_start, PAGE_SIZE)); 614 _ALIGN_DOWN(initrd_start, PAGE_SIZE));
615 }
583#endif /* CONFIG_BLK_DEV_INITRD */ 616#endif /* CONFIG_BLK_DEV_INITRD */
584 617
585#ifdef CONFIG_PPC32 618#ifdef CONFIG_PPC32
@@ -591,6 +624,8 @@ static void __init early_reserve_mem(void)
591 u32 base_32, size_32; 624 u32 base_32, size_32;
592 u32 *reserve_map_32 = (u32 *)reserve_map; 625 u32 *reserve_map_32 = (u32 *)reserve_map;
593 626
627 DBG("Found old 32-bit reserve map\n");
628
594 while (1) { 629 while (1) {
595 base_32 = *(reserve_map_32++); 630 base_32 = *(reserve_map_32++);
596 size_32 = *(reserve_map_32++); 631 size_32 = *(reserve_map_32++);
@@ -605,6 +640,9 @@ static void __init early_reserve_mem(void)
605 return; 640 return;
606 } 641 }
607#endif 642#endif
643 DBG("Processing reserve map\n");
644
645 /* Handle the reserve map in the fdt blob if it exists */
608 while (1) { 646 while (1) {
609 base = *(reserve_map++); 647 base = *(reserve_map++);
610 size = *(reserve_map++); 648 size = *(reserve_map++);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 5eccda9fd33f..607902424e73 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -644,7 +644,8 @@ unsigned char ibm_architecture_vec[] = {
644 W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ 644 W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */
645 W(0xffff0000), W(0x003e0000), /* POWER6 */ 645 W(0xffff0000), W(0x003e0000), /* POWER6 */
646 W(0xffff0000), W(0x003f0000), /* POWER7 */ 646 W(0xffff0000), W(0x003f0000), /* POWER7 */
647 W(0xffff0000), W(0x004b0000), /* POWER8 */ 647 W(0xffff0000), W(0x004b0000), /* POWER8E */
648 W(0xffff0000), W(0x004d0000), /* POWER8 */
648 W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ 649 W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */
649 W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ 650 W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */
650 W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ 651 W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */
@@ -706,7 +707,7 @@ unsigned char ibm_architecture_vec[] = {
706 * must match by the macro below. Update the definition if 707 * must match by the macro below. Update the definition if
707 * the structure layout changes. 708 * the structure layout changes.
708 */ 709 */
709#define IBM_ARCH_VEC_NRCORES_OFFSET 117 710#define IBM_ARCH_VEC_NRCORES_OFFSET 125
710 W(NR_CPUS), /* number of cores supported */ 711 W(NR_CPUS), /* number of cores supported */
711 0, 712 0,
712 0, 713 0,
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 98c2fc198712..9a0d24c390a3 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -975,16 +975,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
975 hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; 975 hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
976 hw_brk.len = 8; 976 hw_brk.len = 8;
977#ifdef CONFIG_HAVE_HW_BREAKPOINT 977#ifdef CONFIG_HAVE_HW_BREAKPOINT
978 if (ptrace_get_breakpoints(task) < 0)
979 return -ESRCH;
980
981 bp = thread->ptrace_bps[0]; 978 bp = thread->ptrace_bps[0];
982 if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) { 979 if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) {
983 if (bp) { 980 if (bp) {
984 unregister_hw_breakpoint(bp); 981 unregister_hw_breakpoint(bp);
985 thread->ptrace_bps[0] = NULL; 982 thread->ptrace_bps[0] = NULL;
986 } 983 }
987 ptrace_put_breakpoints(task);
988 return 0; 984 return 0;
989 } 985 }
990 if (bp) { 986 if (bp) {
@@ -997,11 +993,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
997 993
998 ret = modify_user_hw_breakpoint(bp, &attr); 994 ret = modify_user_hw_breakpoint(bp, &attr);
999 if (ret) { 995 if (ret) {
1000 ptrace_put_breakpoints(task);
1001 return ret; 996 return ret;
1002 } 997 }
1003 thread->ptrace_bps[0] = bp; 998 thread->ptrace_bps[0] = bp;
1004 ptrace_put_breakpoints(task);
1005 thread->hw_brk = hw_brk; 999 thread->hw_brk = hw_brk;
1006 return 0; 1000 return 0;
1007 } 1001 }
@@ -1016,12 +1010,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
1016 ptrace_triggered, NULL, task); 1010 ptrace_triggered, NULL, task);
1017 if (IS_ERR(bp)) { 1011 if (IS_ERR(bp)) {
1018 thread->ptrace_bps[0] = NULL; 1012 thread->ptrace_bps[0] = NULL;
1019 ptrace_put_breakpoints(task);
1020 return PTR_ERR(bp); 1013 return PTR_ERR(bp);
1021 } 1014 }
1022 1015
1023 ptrace_put_breakpoints(task);
1024
1025#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 1016#endif /* CONFIG_HAVE_HW_BREAKPOINT */
1026 task->thread.hw_brk = hw_brk; 1017 task->thread.hw_brk = hw_brk;
1027#else /* CONFIG_PPC_ADV_DEBUG_REGS */ 1018#else /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -1440,24 +1431,19 @@ static long ppc_set_hwdebug(struct task_struct *child,
1440 if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) 1431 if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
1441 brk.type |= HW_BRK_TYPE_WRITE; 1432 brk.type |= HW_BRK_TYPE_WRITE;
1442#ifdef CONFIG_HAVE_HW_BREAKPOINT 1433#ifdef CONFIG_HAVE_HW_BREAKPOINT
1443 if (ptrace_get_breakpoints(child) < 0)
1444 return -ESRCH;
1445
1446 /* 1434 /*
1447 * Check if the request is for 'range' breakpoints. We can 1435 * Check if the request is for 'range' breakpoints. We can
1448 * support it if range < 8 bytes. 1436 * support it if range < 8 bytes.
1449 */ 1437 */
1450 if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) { 1438 if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
1451 len = bp_info->addr2 - bp_info->addr; 1439 len = bp_info->addr2 - bp_info->addr;
1452 } else if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { 1440 else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
1453 ptrace_put_breakpoints(child); 1441 len = 1;
1442 else
1454 return -EINVAL; 1443 return -EINVAL;
1455 }
1456 bp = thread->ptrace_bps[0]; 1444 bp = thread->ptrace_bps[0];
1457 if (bp) { 1445 if (bp)
1458 ptrace_put_breakpoints(child);
1459 return -ENOSPC; 1446 return -ENOSPC;
1460 }
1461 1447
1462 /* Create a new breakpoint request if one doesn't exist already */ 1448 /* Create a new breakpoint request if one doesn't exist already */
1463 hw_breakpoint_init(&attr); 1449 hw_breakpoint_init(&attr);
@@ -1469,11 +1455,9 @@ static long ppc_set_hwdebug(struct task_struct *child,
1469 ptrace_triggered, NULL, child); 1455 ptrace_triggered, NULL, child);
1470 if (IS_ERR(bp)) { 1456 if (IS_ERR(bp)) {
1471 thread->ptrace_bps[0] = NULL; 1457 thread->ptrace_bps[0] = NULL;
1472 ptrace_put_breakpoints(child);
1473 return PTR_ERR(bp); 1458 return PTR_ERR(bp);
1474 } 1459 }
1475 1460
1476 ptrace_put_breakpoints(child);
1477 return 1; 1461 return 1;
1478#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 1462#endif /* CONFIG_HAVE_HW_BREAKPOINT */
1479 1463
@@ -1517,16 +1501,12 @@ static long ppc_del_hwdebug(struct task_struct *child, long data)
1517 return -EINVAL; 1501 return -EINVAL;
1518 1502
1519#ifdef CONFIG_HAVE_HW_BREAKPOINT 1503#ifdef CONFIG_HAVE_HW_BREAKPOINT
1520 if (ptrace_get_breakpoints(child) < 0)
1521 return -ESRCH;
1522
1523 bp = thread->ptrace_bps[0]; 1504 bp = thread->ptrace_bps[0];
1524 if (bp) { 1505 if (bp) {
1525 unregister_hw_breakpoint(bp); 1506 unregister_hw_breakpoint(bp);
1526 thread->ptrace_bps[0] = NULL; 1507 thread->ptrace_bps[0] = NULL;
1527 } else 1508 } else
1528 ret = -ENOENT; 1509 ret = -ENOENT;
1529 ptrace_put_breakpoints(child);
1530 return ret; 1510 return ret;
1531#else /* CONFIG_HAVE_HW_BREAKPOINT */ 1511#else /* CONFIG_HAVE_HW_BREAKPOINT */
1532 if (child->thread.hw_brk.address == 0) 1512 if (child->thread.hw_brk.address == 0)
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
index ef46ba6e094f..f366fedb0872 100644
--- a/arch/powerpc/kernel/reloc_32.S
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -166,7 +166,7 @@ ha16:
166 /* R_PPC_ADDR16_LO */ 166 /* R_PPC_ADDR16_LO */
167lo16: 167lo16:
168 cmpwi r4, R_PPC_ADDR16_LO 168 cmpwi r4, R_PPC_ADDR16_LO
169 bne nxtrela 169 bne unknown_type
170 lwz r4, 0(r9) /* r_offset */ 170 lwz r4, 0(r9) /* r_offset */
171 lwz r0, 8(r9) /* r_addend */ 171 lwz r0, 8(r9) /* r_addend */
172 add r0, r0, r3 172 add r0, r0, r3
@@ -191,6 +191,7 @@ nxtrela:
191 dcbst r4,r7 191 dcbst r4,r7
192 sync /* Ensure the data is flushed before icbi */ 192 sync /* Ensure the data is flushed before icbi */
193 icbi r4,r7 193 icbi r4,r7
194unknown_type:
194 cmpwi r8, 0 /* relasz = 0 ? */ 195 cmpwi r8, 0 /* relasz = 0 ? */
195 ble done 196 ble done
196 add r9, r9, r6 /* move to next entry in the .rela table */ 197 add r9, r9, r6 /* move to next entry in the .rela table */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 52add6f3e201..80b5ef403f68 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1172,7 +1172,7 @@ int __init early_init_dt_scan_rtas(unsigned long node,
1172static arch_spinlock_t timebase_lock; 1172static arch_spinlock_t timebase_lock;
1173static u64 timebase = 0; 1173static u64 timebase = 0;
1174 1174
1175void __cpuinit rtas_give_timebase(void) 1175void rtas_give_timebase(void)
1176{ 1176{
1177 unsigned long flags; 1177 unsigned long flags;
1178 1178
@@ -1189,7 +1189,7 @@ void __cpuinit rtas_give_timebase(void)
1189 local_irq_restore(flags); 1189 local_irq_restore(flags);
1190} 1190}
1191 1191
1192void __cpuinit rtas_take_timebase(void) 1192void rtas_take_timebase(void)
1193{ 1193{
1194 while (!timebase) 1194 while (!timebase)
1195 barrier(); 1195 barrier();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e379d3fd1694..389fb8077cc9 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -76,7 +76,7 @@
76#endif 76#endif
77 77
78int boot_cpuid = 0; 78int boot_cpuid = 0;
79int __initdata spinning_secondaries; 79int spinning_secondaries;
80u64 ppc64_pft_size; 80u64 ppc64_pft_size;
81 81
82/* Pick defaults since we might want to patch instructions 82/* Pick defaults since we might want to patch instructions
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 201385c3a1ae..0f83122e6676 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -407,7 +407,8 @@ inline unsigned long copy_transact_fpr_from_user(struct task_struct *task,
407 * altivec/spe instructions at some point. 407 * altivec/spe instructions at some point.
408 */ 408 */
409static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, 409static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
410 int sigret, int ctx_has_vsx_region) 410 struct mcontext __user *tm_frame, int sigret,
411 int ctx_has_vsx_region)
411{ 412{
412 unsigned long msr = regs->msr; 413 unsigned long msr = regs->msr;
413 414
@@ -475,6 +476,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
475 476
476 if (__put_user(msr, &frame->mc_gregs[PT_MSR])) 477 if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
477 return 1; 478 return 1;
479 /* We need to write 0 the MSR top 32 bits in the tm frame so that we
480 * can check it on the restore to see if TM is active
481 */
482 if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR]))
483 return 1;
484
478 if (sigret) { 485 if (sigret) {
479 /* Set up the sigreturn trampoline: li r0,sigret; sc */ 486 /* Set up the sigreturn trampoline: li r0,sigret; sc */
480 if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) 487 if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
@@ -747,7 +754,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
747 struct mcontext __user *tm_sr) 754 struct mcontext __user *tm_sr)
748{ 755{
749 long err; 756 long err;
750 unsigned long msr; 757 unsigned long msr, msr_hi;
751#ifdef CONFIG_VSX 758#ifdef CONFIG_VSX
752 int i; 759 int i;
753#endif 760#endif
@@ -852,8 +859,11 @@ static long restore_tm_user_regs(struct pt_regs *regs,
852 tm_enable(); 859 tm_enable();
853 /* This loads the checkpointed FP/VEC state, if used */ 860 /* This loads the checkpointed FP/VEC state, if used */
854 tm_recheckpoint(&current->thread, msr); 861 tm_recheckpoint(&current->thread, msr);
855 /* The task has moved into TM state S, so ensure MSR reflects this */ 862 /* Get the top half of the MSR */
856 regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; 863 if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
864 return 1;
865 /* Pull in MSR TM from user context */
866 regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK);
857 867
858 /* This loads the speculative FP/VEC state, if used */ 868 /* This loads the speculative FP/VEC state, if used */
859 if (msr & MSR_FP) { 869 if (msr & MSR_FP) {
@@ -952,6 +962,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
952{ 962{
953 struct rt_sigframe __user *rt_sf; 963 struct rt_sigframe __user *rt_sf;
954 struct mcontext __user *frame; 964 struct mcontext __user *frame;
965 struct mcontext __user *tm_frame = NULL;
955 void __user *addr; 966 void __user *addr;
956 unsigned long newsp = 0; 967 unsigned long newsp = 0;
957 int sigret; 968 int sigret;
@@ -985,23 +996,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
985 } 996 }
986 997
987#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 998#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
999 tm_frame = &rt_sf->uc_transact.uc_mcontext;
988 if (MSR_TM_ACTIVE(regs->msr)) { 1000 if (MSR_TM_ACTIVE(regs->msr)) {
989 if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext, 1001 if (save_tm_user_regs(regs, frame, tm_frame, sigret))
990 &rt_sf->uc_transact.uc_mcontext, sigret))
991 goto badframe; 1002 goto badframe;
992 } 1003 }
993 else 1004 else
994#endif 1005#endif
995 if (save_user_regs(regs, frame, sigret, 1)) 1006 {
1007 if (save_user_regs(regs, frame, tm_frame, sigret, 1))
996 goto badframe; 1008 goto badframe;
1009 }
997 regs->link = tramp; 1010 regs->link = tramp;
998 1011
999#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1012#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1000 if (MSR_TM_ACTIVE(regs->msr)) { 1013 if (MSR_TM_ACTIVE(regs->msr)) {
1001 if (__put_user((unsigned long)&rt_sf->uc_transact, 1014 if (__put_user((unsigned long)&rt_sf->uc_transact,
1002 &rt_sf->uc.uc_link) 1015 &rt_sf->uc.uc_link)
1003 || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext), 1016 || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs))
1004 &rt_sf->uc_transact.uc_regs))
1005 goto badframe; 1017 goto badframe;
1006 } 1018 }
1007 else 1019 else
@@ -1170,7 +1182,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
1170 mctx = (struct mcontext __user *) 1182 mctx = (struct mcontext __user *)
1171 ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); 1183 ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
1172 if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) 1184 if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
1173 || save_user_regs(regs, mctx, 0, ctx_has_vsx_region) 1185 || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region)
1174 || put_sigset_t(&old_ctx->uc_sigmask, &current->blocked) 1186 || put_sigset_t(&old_ctx->uc_sigmask, &current->blocked)
1175 || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) 1187 || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))
1176 return -EFAULT; 1188 return -EFAULT;
@@ -1233,7 +1245,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
1233 if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) 1245 if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR]))
1234 goto bad; 1246 goto bad;
1235 1247
1236 if (MSR_TM_SUSPENDED(msr_hi<<32)) { 1248 if (MSR_TM_ACTIVE(msr_hi<<32)) {
1237 /* We only recheckpoint on return if we're 1249 /* We only recheckpoint on return if we're
1238 * transaction. 1250 * transaction.
1239 */ 1251 */
@@ -1392,6 +1404,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
1392{ 1404{
1393 struct sigcontext __user *sc; 1405 struct sigcontext __user *sc;
1394 struct sigframe __user *frame; 1406 struct sigframe __user *frame;
1407 struct mcontext __user *tm_mctx = NULL;
1395 unsigned long newsp = 0; 1408 unsigned long newsp = 0;
1396 int sigret; 1409 int sigret;
1397 unsigned long tramp; 1410 unsigned long tramp;
@@ -1425,6 +1438,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
1425 } 1438 }
1426 1439
1427#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1440#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1441 tm_mctx = &frame->mctx_transact;
1428 if (MSR_TM_ACTIVE(regs->msr)) { 1442 if (MSR_TM_ACTIVE(regs->msr)) {
1429 if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, 1443 if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
1430 sigret)) 1444 sigret))
@@ -1432,8 +1446,10 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
1432 } 1446 }
1433 else 1447 else
1434#endif 1448#endif
1435 if (save_user_regs(regs, &frame->mctx, sigret, 1)) 1449 {
1450 if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1))
1436 goto badframe; 1451 goto badframe;
1452 }
1437 1453
1438 regs->link = tramp; 1454 regs->link = tramp;
1439 1455
@@ -1481,16 +1497,22 @@ badframe:
1481long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, 1497long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
1482 struct pt_regs *regs) 1498 struct pt_regs *regs)
1483{ 1499{
1500 struct sigframe __user *sf;
1484 struct sigcontext __user *sc; 1501 struct sigcontext __user *sc;
1485 struct sigcontext sigctx; 1502 struct sigcontext sigctx;
1486 struct mcontext __user *sr; 1503 struct mcontext __user *sr;
1487 void __user *addr; 1504 void __user *addr;
1488 sigset_t set; 1505 sigset_t set;
1506#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1507 struct mcontext __user *mcp, *tm_mcp;
1508 unsigned long msr_hi;
1509#endif
1489 1510
1490 /* Always make any pending restarted system calls return -EINTR */ 1511 /* Always make any pending restarted system calls return -EINTR */
1491 current_thread_info()->restart_block.fn = do_no_restart_syscall; 1512 current_thread_info()->restart_block.fn = do_no_restart_syscall;
1492 1513
1493 sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); 1514 sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
1515 sc = &sf->sctx;
1494 addr = sc; 1516 addr = sc;
1495 if (copy_from_user(&sigctx, sc, sizeof(sigctx))) 1517 if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
1496 goto badframe; 1518 goto badframe;
@@ -1507,11 +1529,25 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
1507#endif 1529#endif
1508 set_current_blocked(&set); 1530 set_current_blocked(&set);
1509 1531
1510 sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); 1532#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1511 addr = sr; 1533 mcp = (struct mcontext __user *)&sf->mctx;
1512 if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) 1534 tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
1513 || restore_user_regs(regs, sr, 1)) 1535 if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
1514 goto badframe; 1536 goto badframe;
1537 if (MSR_TM_ACTIVE(msr_hi<<32)) {
1538 if (!cpu_has_feature(CPU_FTR_TM))
1539 goto badframe;
1540 if (restore_tm_user_regs(regs, mcp, tm_mcp))
1541 goto badframe;
1542 } else
1543#endif
1544 {
1545 sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
1546 addr = sr;
1547 if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
1548 || restore_user_regs(regs, sr, 1))
1549 goto badframe;
1550 }
1515 1551
1516 set_thread_flag(TIF_RESTOREALL); 1552 set_thread_flag(TIF_RESTOREALL);
1517 return 0; 1553 return 0;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 345947367ec0..887e99d85bc2 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -410,6 +410,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
410 410
411 /* get MSR separately, transfer the LE bit if doing signal return */ 411 /* get MSR separately, transfer the LE bit if doing signal return */
412 err |= __get_user(msr, &sc->gp_regs[PT_MSR]); 412 err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
413 /* pull in MSR TM from user context */
414 regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
415
416 /* pull in MSR LE from user context */
413 regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); 417 regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
414 418
415 /* The following non-GPR non-FPR non-VR state is also checkpointed: */ 419 /* The following non-GPR non-FPR non-VR state is also checkpointed: */
@@ -505,8 +509,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
505 tm_enable(); 509 tm_enable();
506 /* This loads the checkpointed FP/VEC state, if used */ 510 /* This loads the checkpointed FP/VEC state, if used */
507 tm_recheckpoint(&current->thread, msr); 511 tm_recheckpoint(&current->thread, msr);
508 /* The task has moved into TM state S, so ensure MSR reflects this: */
509 regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33);
510 512
511 /* This loads the speculative FP/VEC state, if used */ 513 /* This loads the speculative FP/VEC state, if used */
512 if (msr & MSR_FP) { 514 if (msr & MSR_FP) {
@@ -654,7 +656,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
654#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 656#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
655 if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) 657 if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
656 goto badframe; 658 goto badframe;
657 if (MSR_TM_SUSPENDED(msr)) { 659 if (MSR_TM_ACTIVE(msr)) {
658 /* We recheckpoint on return. */ 660 /* We recheckpoint on return. */
659 struct ucontext __user *uc_transact; 661 struct ucontext __user *uc_transact;
660 if (__get_user(uc_transact, &uc->uc_link)) 662 if (__get_user(uc_transact, &uc->uc_link))
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ee7ac5e6e28a..38b0ba65a735 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -480,7 +480,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
480 secondary_ti = current_set[cpu] = ti; 480 secondary_ti = current_set[cpu] = ti;
481} 481}
482 482
483int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) 483int __cpu_up(unsigned int cpu, struct task_struct *tidle)
484{ 484{
485 int rc, c; 485 int rc, c;
486 486
@@ -610,7 +610,7 @@ static struct device_node *cpu_to_l2cache(int cpu)
610} 610}
611 611
612/* Activate a secondary processor. */ 612/* Activate a secondary processor. */
613__cpuinit void start_secondary(void *unused) 613void start_secondary(void *unused)
614{ 614{
615 unsigned int cpu = smp_processor_id(); 615 unsigned int cpu = smp_processor_id();
616 struct device_node *l2_cache; 616 struct device_node *l2_cache;
@@ -637,12 +637,10 @@ __cpuinit void start_secondary(void *unused)
637 637
638 vdso_getcpu_init(); 638 vdso_getcpu_init();
639#endif 639#endif
640 notify_cpu_starting(cpu);
641 set_cpu_online(cpu, true);
642 /* Update sibling maps */ 640 /* Update sibling maps */
643 base = cpu_first_thread_sibling(cpu); 641 base = cpu_first_thread_sibling(cpu);
644 for (i = 0; i < threads_per_core; i++) { 642 for (i = 0; i < threads_per_core; i++) {
645 if (cpu_is_offline(base + i)) 643 if (cpu_is_offline(base + i) && (cpu != base + i))
646 continue; 644 continue;
647 cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); 645 cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
648 cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); 646 cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
@@ -667,6 +665,10 @@ __cpuinit void start_secondary(void *unused)
667 } 665 }
668 of_node_put(l2_cache); 666 of_node_put(l2_cache);
669 667
668 smp_wmb();
669 notify_cpu_starting(cpu);
670 set_cpu_online(cpu, true);
671
670 local_irq_enable(); 672 local_irq_enable();
671 673
672 cpu_startup_entry(CPUHP_ONLINE); 674 cpu_startup_entry(CPUHP_ONLINE);
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index e68a84568b8b..27a90b99ef67 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -341,7 +341,7 @@ static struct device_attribute pa6t_attrs[] = {
341#endif /* HAS_PPC_PMC_PA6T */ 341#endif /* HAS_PPC_PMC_PA6T */
342#endif /* HAS_PPC_PMC_CLASSIC */ 342#endif /* HAS_PPC_PMC_CLASSIC */
343 343
344static void __cpuinit register_cpu_online(unsigned int cpu) 344static void register_cpu_online(unsigned int cpu)
345{ 345{
346 struct cpu *c = &per_cpu(cpu_devices, cpu); 346 struct cpu *c = &per_cpu(cpu_devices, cpu);
347 struct device *s = &c->dev; 347 struct device *s = &c->dev;
@@ -502,7 +502,7 @@ ssize_t arch_cpu_release(const char *buf, size_t count)
502 502
503#endif /* CONFIG_HOTPLUG_CPU */ 503#endif /* CONFIG_HOTPLUG_CPU */
504 504
505static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, 505static int sysfs_cpu_notify(struct notifier_block *self,
506 unsigned long action, void *hcpu) 506 unsigned long action, void *hcpu)
507{ 507{
508 unsigned int cpu = (unsigned int)(long)hcpu; 508 unsigned int cpu = (unsigned int)(long)hcpu;
@@ -522,7 +522,7 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self,
522 return NOTIFY_OK; 522 return NOTIFY_OK;
523} 523}
524 524
525static struct notifier_block __cpuinitdata sysfs_cpu_nb = { 525static struct notifier_block sysfs_cpu_nb = {
526 .notifier_call = sysfs_cpu_notify, 526 .notifier_call = sysfs_cpu_notify,
527}; 527};
528 528
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 5fc29ad7e26f..65ab9e909377 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -631,7 +631,6 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
631 return found; 631 return found;
632} 632}
633 633
634/* should become __cpuinit when secondary_cpu_time_init also is */
635void start_cpu_decrementer(void) 634void start_cpu_decrementer(void)
636{ 635{
637#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 636#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 2da67e7a16d5..0554d1f6d70d 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -112,9 +112,18 @@ _GLOBAL(tm_reclaim)
112 std r3, STACK_PARAM(0)(r1) 112 std r3, STACK_PARAM(0)(r1)
113 SAVE_NVGPRS(r1) 113 SAVE_NVGPRS(r1)
114 114
115 /* We need to setup MSR for VSX register save instructions. Here we
116 * also clear the MSR RI since when we do the treclaim, we won't have a
117 * valid kernel pointer for a while. We clear RI here as it avoids
118 * adding another mtmsr closer to the treclaim. This makes the region
119 * maked as non-recoverable wider than it needs to be but it saves on
120 * inserting another mtmsrd later.
121 */
115 mfmsr r14 122 mfmsr r14
116 mr r15, r14 123 mr r15, r14
117 ori r15, r15, MSR_FP 124 ori r15, r15, MSR_FP
125 li r16, MSR_RI
126 andc r15, r15, r16
118 oris r15, r15, MSR_VEC@h 127 oris r15, r15, MSR_VEC@h
119#ifdef CONFIG_VSX 128#ifdef CONFIG_VSX
120 BEGIN_FTR_SECTION 129 BEGIN_FTR_SECTION
@@ -224,6 +233,16 @@ dont_backup_fp:
224 std r5, _CCR(r7) 233 std r5, _CCR(r7)
225 std r6, _XER(r7) 234 std r6, _XER(r7)
226 235
236
237 /* ******************** TAR, PPR, DSCR ********** */
238 mfspr r3, SPRN_TAR
239 mfspr r4, SPRN_PPR
240 mfspr r5, SPRN_DSCR
241
242 std r3, THREAD_TM_TAR(r12)
243 std r4, THREAD_TM_PPR(r12)
244 std r5, THREAD_TM_DSCR(r12)
245
227 /* MSR and flags: We don't change CRs, and we don't need to alter 246 /* MSR and flags: We don't change CRs, and we don't need to alter
228 * MSR. 247 * MSR.
229 */ 248 */
@@ -338,6 +357,16 @@ dont_restore_fp:
338 mtmsr r6 /* FP/Vec off again! */ 357 mtmsr r6 /* FP/Vec off again! */
339 358
340restore_gprs: 359restore_gprs:
360
361 /* ******************** TAR, PPR, DSCR ********** */
362 ld r4, THREAD_TM_TAR(r3)
363 ld r5, THREAD_TM_PPR(r3)
364 ld r6, THREAD_TM_DSCR(r3)
365
366 mtspr SPRN_TAR, r4
367 mtspr SPRN_PPR, r5
368 mtspr SPRN_DSCR, r6
369
341 /* ******************** CR,LR,CCR,MSR ********** */ 370 /* ******************** CR,LR,CCR,MSR ********** */
342 ld r3, _CTR(r7) 371 ld r3, _CTR(r7)
343 ld r4, _LINK(r7) 372 ld r4, _LINK(r7)
@@ -349,9 +378,10 @@ restore_gprs:
349 mtcr r5 378 mtcr r5
350 mtxer r6 379 mtxer r6
351 380
352 /* MSR and flags: We don't change CRs, and we don't need to alter 381 /* Clear the MSR RI since we are about to change R1. EE is already off
353 * MSR.
354 */ 382 */
383 li r4, 0
384 mtmsrd r4, 1
355 385
356 REST_4GPRS(0, r7) /* GPR0-3 */ 386 REST_4GPRS(0, r7) /* GPR0-3 */
357 REST_GPR(4, r7) /* GPR4-6 */ 387 REST_GPR(4, r7) /* GPR4-6 */
@@ -377,6 +407,10 @@ restore_gprs:
377 GET_PACA(r13) 407 GET_PACA(r13)
378 GET_SCRATCH0(r1) 408 GET_SCRATCH0(r1)
379 409
410 /* R1 is restored, so we are recoverable again. EE is still off */
411 li r4, MSR_RI
412 mtmsrd r4, 1
413
380 REST_NVGPRS(r1) 414 REST_NVGPRS(r1)
381 415
382 addi r1, r1, TM_FRAME_SIZE 416 addi r1, r1, TM_FRAME_SIZE
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c0e5caf8ccc7..e435bc089ea3 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -44,9 +44,7 @@
44#include <asm/machdep.h> 44#include <asm/machdep.h>
45#include <asm/rtas.h> 45#include <asm/rtas.h>
46#include <asm/pmc.h> 46#include <asm/pmc.h>
47#ifdef CONFIG_PPC32
48#include <asm/reg.h> 47#include <asm/reg.h>
49#endif
50#ifdef CONFIG_PMAC_BACKLIGHT 48#ifdef CONFIG_PMAC_BACKLIGHT
51#include <asm/backlight.h> 49#include <asm/backlight.h>
52#endif 50#endif
@@ -866,6 +864,10 @@ static int emulate_string_inst(struct pt_regs *regs, u32 instword)
866 u8 val; 864 u8 val;
867 u32 shift = 8 * (3 - (pos & 0x3)); 865 u32 shift = 8 * (3 - (pos & 0x3));
868 866
867 /* if process is 32-bit, clear upper 32 bits of EA */
868 if ((regs->msr & MSR_64BIT) == 0)
869 EA &= 0xFFFFFFFF;
870
869 switch ((instword & PPC_INST_STRING_MASK)) { 871 switch ((instword & PPC_INST_STRING_MASK)) {
870 case PPC_INST_LSWX: 872 case PPC_INST_LSWX:
871 case PPC_INST_LSWI: 873 case PPC_INST_LSWI:
@@ -1125,7 +1127,17 @@ void __kprobes program_check_exception(struct pt_regs *regs)
1125 * ESR_DST (!?) or 0. In the process of chasing this with the 1127 * ESR_DST (!?) or 0. In the process of chasing this with the
1126 * hardware people - not sure if it can happen on any illegal 1128 * hardware people - not sure if it can happen on any illegal
1127 * instruction or only on FP instructions, whether there is a 1129 * instruction or only on FP instructions, whether there is a
1128 * pattern to occurrences etc. -dgibson 31/Mar/2003 */ 1130 * pattern to occurrences etc. -dgibson 31/Mar/2003
1131 */
1132
1133 /*
1134 * If we support a HW FPU, we need to ensure the FP state
1135 * if flushed into the thread_struct before attempting
1136 * emulation
1137 */
1138#ifdef CONFIG_PPC_FPU
1139 flush_fp_to_thread(current);
1140#endif
1129 switch (do_mathemu(regs)) { 1141 switch (do_mathemu(regs)) {
1130 case 0: 1142 case 0:
1131 emulate_single_step(regs); 1143 emulate_single_step(regs);
@@ -1282,26 +1294,63 @@ void vsx_unavailable_exception(struct pt_regs *regs)
1282 die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); 1294 die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
1283} 1295}
1284 1296
1285void tm_unavailable_exception(struct pt_regs *regs) 1297#ifdef CONFIG_PPC64
1298void facility_unavailable_exception(struct pt_regs *regs)
1286{ 1299{
1300 static char *facility_strings[] = {
1301 [FSCR_FP_LG] = "FPU",
1302 [FSCR_VECVSX_LG] = "VMX/VSX",
1303 [FSCR_DSCR_LG] = "DSCR",
1304 [FSCR_PM_LG] = "PMU SPRs",
1305 [FSCR_BHRB_LG] = "BHRB",
1306 [FSCR_TM_LG] = "TM",
1307 [FSCR_EBB_LG] = "EBB",
1308 [FSCR_TAR_LG] = "TAR",
1309 };
1310 char *facility = "unknown";
1311 u64 value;
1312 u8 status;
1313 bool hv;
1314
1315 hv = (regs->trap == 0xf80);
1316 if (hv)
1317 value = mfspr(SPRN_HFSCR);
1318 else
1319 value = mfspr(SPRN_FSCR);
1320
1321 status = value >> 56;
1322 if (status == FSCR_DSCR_LG) {
1323 /* User is acessing the DSCR. Set the inherit bit and allow
1324 * the user to set it directly in future by setting via the
1325 * H/FSCR DSCR bit.
1326 */
1327 current->thread.dscr_inherit = 1;
1328 if (hv)
1329 mtspr(SPRN_HFSCR, value | HFSCR_DSCR);
1330 else
1331 mtspr(SPRN_FSCR, value | FSCR_DSCR);
1332 return;
1333 }
1334
1335 if ((status < ARRAY_SIZE(facility_strings)) &&
1336 facility_strings[status])
1337 facility = facility_strings[status];
1338
1287 /* We restore the interrupt state now */ 1339 /* We restore the interrupt state now */
1288 if (!arch_irq_disabled_regs(regs)) 1340 if (!arch_irq_disabled_regs(regs))
1289 local_irq_enable(); 1341 local_irq_enable();
1290 1342
1291 /* Currently we never expect a TMU exception. Catch 1343 pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n",
1292 * this and kill the process! 1344 hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);
1293 */
1294 printk(KERN_EMERG "Unexpected TM unavailable exception at %lx "
1295 "(msr %lx)\n",
1296 regs->nip, regs->msr);
1297 1345
1298 if (user_mode(regs)) { 1346 if (user_mode(regs)) {
1299 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 1347 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1300 return; 1348 return;
1301 } 1349 }
1302 1350
1303 die("Unexpected TM unavailable exception", regs, SIGABRT); 1351 die("Unexpected facility unavailable exception", regs, SIGABRT);
1304} 1352}
1353#endif
1305 1354
1306#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1355#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1307 1356
@@ -1396,8 +1445,7 @@ void performance_monitor_exception(struct pt_regs *regs)
1396void SoftwareEmulation(struct pt_regs *regs) 1445void SoftwareEmulation(struct pt_regs *regs)
1397{ 1446{
1398 extern int do_mathemu(struct pt_regs *); 1447 extern int do_mathemu(struct pt_regs *);
1399 extern int Soft_emulate_8xx(struct pt_regs *); 1448#if defined(CONFIG_MATH_EMULATION)
1400#if defined(CONFIG_MATH_EMULATION) || defined(CONFIG_8XX_MINIMAL_FPEMU)
1401 int errcode; 1449 int errcode;
1402#endif 1450#endif
1403 1451
@@ -1430,23 +1478,6 @@ void SoftwareEmulation(struct pt_regs *regs)
1430 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 1478 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1431 return; 1479 return;
1432 } 1480 }
1433
1434#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
1435 errcode = Soft_emulate_8xx(regs);
1436 if (errcode >= 0)
1437 PPC_WARN_EMULATED(8xx, regs);
1438
1439 switch (errcode) {
1440 case 0:
1441 emulate_single_step(regs);
1442 return;
1443 case 1:
1444 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1445 return;
1446 case -EFAULT:
1447 _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1448 return;
1449 }
1450#else 1481#else
1451 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 1482 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1452#endif 1483#endif
@@ -1796,8 +1827,6 @@ struct ppc_emulated ppc_emulated = {
1796 WARN_EMULATED_SETUP(unaligned), 1827 WARN_EMULATED_SETUP(unaligned),
1797#ifdef CONFIG_MATH_EMULATION 1828#ifdef CONFIG_MATH_EMULATION
1798 WARN_EMULATED_SETUP(math), 1829 WARN_EMULATED_SETUP(math),
1799#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
1800 WARN_EMULATED_SETUP(8xx),
1801#endif 1830#endif
1802#ifdef CONFIG_VSX 1831#ifdef CONFIG_VSX
1803 WARN_EMULATED_SETUP(vsx), 1832 WARN_EMULATED_SETUP(vsx),
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 9d3fdcd66290..a15837519dca 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -50,7 +50,7 @@ void __init udbg_early_init(void)
50 udbg_init_debug_beat(); 50 udbg_init_debug_beat();
51#elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) 51#elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE)
52 udbg_init_pas_realmode(); 52 udbg_init_pas_realmode();
53#elif defined(CONFIG_BOOTX_TEXT) 53#elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX)
54 udbg_init_btext(); 54 udbg_init_btext();
55#elif defined(CONFIG_PPC_EARLY_DEBUG_44x) 55#elif defined(CONFIG_PPC_EARLY_DEBUG_44x)
56 /* PPC44x debug */ 56 /* PPC44x debug */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d4f463ac65b1..1d9c92621b36 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -711,7 +711,7 @@ static void __init vdso_setup_syscall_map(void)
711} 711}
712 712
713#ifdef CONFIG_PPC64 713#ifdef CONFIG_PPC64
714int __cpuinit vdso_getcpu_init(void) 714int vdso_getcpu_init(void)
715{ 715{
716 unsigned long cpu, node, val; 716 unsigned long cpu, node, val;
717 717
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 654e479802f2..f096e72262f4 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -38,9 +38,6 @@ jiffies = jiffies_64 + 4;
38#endif 38#endif
39SECTIONS 39SECTIONS
40{ 40{
41 . = 0;
42 reloc_start = .;
43
44 . = KERNELBASE; 41 . = KERNELBASE;
45 42
46/* 43/*
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 422de3f4d46c..008cd856c5b5 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -5,9 +5,10 @@
5subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror 5subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
6 6
7ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm 7ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
8KVM := ../../../virt/kvm
8 9
9common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ 10common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
10 eventfd.o) 11 $(KVM)/eventfd.o
11 12
12CFLAGS_44x_tlb.o := -I. 13CFLAGS_44x_tlb.o := -I.
13CFLAGS_e500_mmu.o := -I. 14CFLAGS_e500_mmu.o := -I.
@@ -53,7 +54,7 @@ kvm-e500mc-objs := \
53kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) 54kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
54 55
55kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ 56kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
56 ../../../virt/kvm/coalesced_mmio.o \ 57 $(KVM)/coalesced_mmio.o \
57 fpu.o \ 58 fpu.o \
58 book3s_paired_singles.o \ 59 book3s_paired_singles.o \
59 book3s_pr.o \ 60 book3s_pr.o \
@@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
86 book3s_xics.o 87 book3s_xics.o
87 88
88kvm-book3s_64-module-objs := \ 89kvm-book3s_64-module-objs := \
89 ../../../virt/kvm/kvm_main.o \ 90 $(KVM)/kvm_main.o \
90 ../../../virt/kvm/eventfd.o \ 91 $(KVM)/eventfd.o \
91 powerpc.o \ 92 powerpc.o \
92 emulate.o \ 93 emulate.o \
93 book3s.o \ 94 book3s.o \
@@ -111,7 +112,7 @@ kvm-book3s_32-objs := \
111kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) 112kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
112 113
113kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o 114kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
114kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) 115kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
115 116
116kvm-objs := $(kvm-objs-m) $(kvm-objs-y) 117kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
117 118
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index b871721c0050..739bfbadb85e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -26,6 +26,7 @@
26#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
27#include <asm/kvm_ppc.h> 27#include <asm/kvm_ppc.h>
28#include <asm/kvm_book3s.h> 28#include <asm/kvm_book3s.h>
29#include <asm/mmu-hash64.h>
29 30
30/* #define DEBUG_MMU */ 31/* #define DEBUG_MMU */
31 32
@@ -76,6 +77,24 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
76 return NULL; 77 return NULL;
77} 78}
78 79
80static int kvmppc_slb_sid_shift(struct kvmppc_slb *slbe)
81{
82 return slbe->tb ? SID_SHIFT_1T : SID_SHIFT;
83}
84
85static u64 kvmppc_slb_offset_mask(struct kvmppc_slb *slbe)
86{
87 return (1ul << kvmppc_slb_sid_shift(slbe)) - 1;
88}
89
90static u64 kvmppc_slb_calc_vpn(struct kvmppc_slb *slb, gva_t eaddr)
91{
92 eaddr &= kvmppc_slb_offset_mask(slb);
93
94 return (eaddr >> VPN_SHIFT) |
95 ((slb->vsid) << (kvmppc_slb_sid_shift(slb) - VPN_SHIFT));
96}
97
79static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, 98static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
80 bool data) 99 bool data)
81{ 100{
@@ -85,11 +104,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
85 if (!slb) 104 if (!slb)
86 return 0; 105 return 0;
87 106
88 if (slb->tb) 107 return kvmppc_slb_calc_vpn(slb, eaddr);
89 return (((u64)eaddr >> 12) & 0xfffffff) |
90 (((u64)slb->vsid) << 28);
91
92 return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16);
93} 108}
94 109
95static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) 110static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
@@ -100,7 +115,8 @@ static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
100static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) 115static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
101{ 116{
102 int p = kvmppc_mmu_book3s_64_get_pagesize(slbe); 117 int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
103 return ((eaddr & 0xfffffff) >> p); 118
119 return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
104} 120}
105 121
106static hva_t kvmppc_mmu_book3s_64_get_pteg( 122static hva_t kvmppc_mmu_book3s_64_get_pteg(
@@ -109,13 +125,15 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
109 bool second) 125 bool second)
110{ 126{
111 u64 hash, pteg, htabsize; 127 u64 hash, pteg, htabsize;
112 u32 page; 128 u32 ssize;
113 hva_t r; 129 hva_t r;
130 u64 vpn;
114 131
115 page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
116 htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1); 132 htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1);
117 133
118 hash = slbe->vsid ^ page; 134 vpn = kvmppc_slb_calc_vpn(slbe, eaddr);
135 ssize = slbe->tb ? MMU_SEGSIZE_1T : MMU_SEGSIZE_256M;
136 hash = hpt_hash(vpn, kvmppc_mmu_book3s_64_get_pagesize(slbe), ssize);
119 if (second) 137 if (second)
120 hash = ~hash; 138 hash = ~hash;
121 hash &= ((1ULL << 39ULL) - 1ULL); 139 hash &= ((1ULL << 39ULL) - 1ULL);
@@ -146,7 +164,7 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
146 u64 avpn; 164 u64 avpn;
147 165
148 avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); 166 avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
149 avpn |= slbe->vsid << (28 - p); 167 avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
150 168
151 if (p < 24) 169 if (p < 24)
152 avpn >>= ((80 - p) - 56) - 8; 170 avpn >>= ((80 - p) - 56) - 8;
@@ -167,7 +185,6 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
167 int i; 185 int i;
168 u8 key = 0; 186 u8 key = 0;
169 bool found = false; 187 bool found = false;
170 bool perm_err = false;
171 int second = 0; 188 int second = 0;
172 ulong mp_ea = vcpu->arch.magic_page_ea; 189 ulong mp_ea = vcpu->arch.magic_page_ea;
173 190
@@ -190,13 +207,15 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
190 if (!slbe) 207 if (!slbe)
191 goto no_seg_found; 208 goto no_seg_found;
192 209
210 avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
211 if (slbe->tb)
212 avpn |= SLB_VSID_B_1T;
213
193do_second: 214do_second:
194 ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); 215 ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
195 if (kvm_is_error_hva(ptegp)) 216 if (kvm_is_error_hva(ptegp))
196 goto no_page_found; 217 goto no_page_found;
197 218
198 avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
199
200 if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { 219 if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
201 printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp); 220 printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
202 goto no_page_found; 221 goto no_page_found;
@@ -219,7 +238,7 @@ do_second:
219 continue; 238 continue;
220 239
221 /* AVPN compare */ 240 /* AVPN compare */
222 if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) { 241 if (HPTE_V_COMPARE(avpn, v)) {
223 u8 pp = (r & HPTE_R_PP) | key; 242 u8 pp = (r & HPTE_R_PP) | key;
224 int eaddr_mask = 0xFFF; 243 int eaddr_mask = 0xFFF;
225 244
@@ -248,11 +267,6 @@ do_second:
248 break; 267 break;
249 } 268 }
250 269
251 if (!gpte->may_read) {
252 perm_err = true;
253 continue;
254 }
255
256 dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " 270 dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
257 "-> 0x%lx\n", 271 "-> 0x%lx\n",
258 eaddr, avpn, gpte->vpage, gpte->raddr); 272 eaddr, avpn, gpte->vpage, gpte->raddr);
@@ -281,6 +295,8 @@ do_second:
281 if (pteg[i+1] != oldr) 295 if (pteg[i+1] != oldr)
282 copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); 296 copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
283 297
298 if (!gpte->may_read)
299 return -EPERM;
284 return 0; 300 return 0;
285 } else { 301 } else {
286 dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " 302 dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx "
@@ -296,13 +312,7 @@ do_second:
296 } 312 }
297 } 313 }
298 314
299
300no_page_found: 315no_page_found:
301
302
303 if (perm_err)
304 return -EPERM;
305
306 return -ENOENT; 316 return -ENOENT;
307 317
308no_seg_found: 318no_seg_found:
@@ -334,7 +344,7 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
334 slbe->large = (rs & SLB_VSID_L) ? 1 : 0; 344 slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
335 slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0; 345 slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
336 slbe->esid = slbe->tb ? esid_1t : esid; 346 slbe->esid = slbe->tb ? esid_1t : esid;
337 slbe->vsid = rs >> 12; 347 slbe->vsid = (rs & ~SLB_VSID_B) >> (kvmppc_slb_sid_shift(slbe) - 16);
338 slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; 348 slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
339 slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0; 349 slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;
340 slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0; 350 slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0;
@@ -375,6 +385,7 @@ static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
375static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) 385static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
376{ 386{
377 struct kvmppc_slb *slbe; 387 struct kvmppc_slb *slbe;
388 u64 seg_size;
378 389
379 dprintk("KVM MMU: slbie(0x%llx)\n", ea); 390 dprintk("KVM MMU: slbie(0x%llx)\n", ea);
380 391
@@ -386,8 +397,11 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
386 dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid); 397 dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid);
387 398
388 slbe->valid = false; 399 slbe->valid = false;
400 slbe->orige = 0;
401 slbe->origv = 0;
389 402
390 kvmppc_mmu_map_segment(vcpu, ea); 403 seg_size = 1ull << kvmppc_slb_sid_shift(slbe);
404 kvmppc_mmu_flush_segment(vcpu, ea & ~(seg_size - 1), seg_size);
391} 405}
392 406
393static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) 407static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
@@ -396,8 +410,11 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
396 410
397 dprintk("KVM MMU: slbia()\n"); 411 dprintk("KVM MMU: slbia()\n");
398 412
399 for (i = 1; i < vcpu->arch.slb_nr; i++) 413 for (i = 1; i < vcpu->arch.slb_nr; i++) {
400 vcpu->arch.slb[i].valid = false; 414 vcpu->arch.slb[i].valid = false;
415 vcpu->arch.slb[i].orige = 0;
416 vcpu->arch.slb[i].origv = 0;
417 }
401 418
402 if (vcpu->arch.shared->msr & MSR_IR) { 419 if (vcpu->arch.shared->msr & MSR_IR) {
403 kvmppc_mmu_flush_segments(vcpu); 420 kvmppc_mmu_flush_segments(vcpu);
@@ -467,8 +484,14 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
467 484
468 if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 485 if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
469 slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); 486 slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
470 if (slb) 487 if (slb) {
471 gvsid = slb->vsid; 488 gvsid = slb->vsid;
489 if (slb->tb) {
490 gvsid <<= SID_SHIFT_1T - SID_SHIFT;
491 gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
492 gvsid |= VSID_1T;
493 }
494 }
472 } 495 }
473 496
474 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 497 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 3a9a1aceb14f..e5240524bf6c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -34,7 +34,7 @@
34void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 34void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
35{ 35{
36 ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, 36 ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
37 MMU_PAGE_4K, MMU_SEGSIZE_256M, 37 MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M,
38 false); 38 false);
39} 39}
40 40
@@ -301,6 +301,23 @@ out:
301 return r; 301 return r;
302} 302}
303 303
304void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
305{
306 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
307 ulong seg_mask = -seg_size;
308 int i;
309
310 for (i = 1; i < svcpu->slb_max; i++) {
311 if ((svcpu->slb[i].esid & SLB_ESID_V) &&
312 (svcpu->slb[i].esid & seg_mask) == ea) {
313 /* Invalidate this entry */
314 svcpu->slb[i].esid = 0;
315 }
316 }
317
318 svcpu_put(svcpu);
319}
320
304void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) 321void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
305{ 322{
306 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 323 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
@@ -325,9 +342,9 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
325 return -1; 342 return -1;
326 vcpu3s->context_id[0] = err; 343 vcpu3s->context_id[0] = err;
327 344
328 vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1) 345 vcpu3s->proto_vsid_max = ((u64)(vcpu3s->context_id[0] + 1)
329 << ESID_BITS) - 1; 346 << ESID_BITS) - 1;
330 vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << ESID_BITS; 347 vcpu3s->proto_vsid_first = (u64)vcpu3s->context_id[0] << ESID_BITS;
331 vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first; 348 vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first;
332 349
333 kvmppc_mmu_hpte_init(vcpu); 350 kvmppc_mmu_hpte_init(vcpu);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 5880dfb31074..710d31317d81 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
675 } 675 }
676 /* if the guest wants write access, see if that is OK */ 676 /* if the guest wants write access, see if that is OK */
677 if (!writing && hpte_is_writable(r)) { 677 if (!writing && hpte_is_writable(r)) {
678 unsigned int hugepage_shift;
678 pte_t *ptep, pte; 679 pte_t *ptep, pte;
679 680
680 /* 681 /*
@@ -683,9 +684,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
683 */ 684 */
684 rcu_read_lock_sched(); 685 rcu_read_lock_sched();
685 ptep = find_linux_pte_or_hugepte(current->mm->pgd, 686 ptep = find_linux_pte_or_hugepte(current->mm->pgd,
686 hva, NULL); 687 hva, &hugepage_shift);
687 if (ptep && pte_present(*ptep)) { 688 if (ptep) {
688 pte = kvmppc_read_update_linux_pte(ptep, 1); 689 pte = kvmppc_read_update_linux_pte(ptep, 1,
690 hugepage_shift);
689 if (pte_write(pte)) 691 if (pte_write(pte))
690 write_ok = 1; 692 write_ok = 1;
691 } 693 }
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 56b983e7b738..4f0caecc0f9d 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -66,10 +66,6 @@ slb_exit_skip_ ## num:
66 66
67 ld r12, PACA_SLBSHADOWPTR(r13) 67 ld r12, PACA_SLBSHADOWPTR(r13)
68 68
69 /* Save off the first entry so we can slbie it later */
70 ld r10, SHADOW_SLB_ESID(0)(r12)
71 ld r11, SHADOW_SLB_VSID(0)(r12)
72
73 /* Remove bolted entries */ 69 /* Remove bolted entries */
74 UNBOLT_SLB_ENTRY(0) 70 UNBOLT_SLB_ENTRY(0)
75 UNBOLT_SLB_ENTRY(1) 71 UNBOLT_SLB_ENTRY(1)
@@ -81,15 +77,10 @@ slb_exit_skip_ ## num:
81 77
82 /* Flush SLB */ 78 /* Flush SLB */
83 79
80 li r10, 0
81 slbmte r10, r10
84 slbia 82 slbia
85 83
86 /* r0 = esid & ESID_MASK */
87 rldicr r10, r10, 0, 35
88 /* r0 |= CLASS_BIT(VSID) */
89 rldic r12, r11, 56 - 36, 36
90 or r10, r10, r12
91 slbie r10
92
93 /* Fill SLB with our shadow */ 84 /* Fill SLB with our shadow */
94 85
95 lbz r12, SVCPU_SLB_MAX(r3) 86 lbz r12, SVCPU_SLB_MAX(r3)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 550f5928b394..7629cd3eb91a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1809,7 +1809,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1809 rma_size <<= PAGE_SHIFT; 1809 rma_size <<= PAGE_SHIFT;
1810 rmls = lpcr_rmls(rma_size); 1810 rmls = lpcr_rmls(rma_size);
1811 err = -EINVAL; 1811 err = -EINVAL;
1812 if (rmls < 0) { 1812 if ((long)rmls < 0) {
1813 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); 1813 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
1814 goto out_srcu; 1814 goto out_srcu;
1815 } 1815 }
@@ -1864,7 +1864,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1864 1864
1865 up_out: 1865 up_out:
1866 up_read(&current->mm->mmap_sem); 1866 up_read(&current->mm->mmap_sem);
1867 goto out; 1867 goto out_srcu;
1868} 1868}
1869 1869
1870int kvmppc_core_init_vm(struct kvm *kvm) 1870int kvmppc_core_init_vm(struct kvm *kvm)
@@ -1874,7 +1874,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1874 /* Allocate the guest's logical partition ID */ 1874 /* Allocate the guest's logical partition ID */
1875 1875
1876 lpid = kvmppc_alloc_lpid(); 1876 lpid = kvmppc_alloc_lpid();
1877 if (lpid < 0) 1877 if ((long)lpid < 0)
1878 return -ENOMEM; 1878 return -ENOMEM;
1879 kvm->arch.lpid = lpid; 1879 kvm->arch.lpid = lpid;
1880 1880
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 6dcbb49105a4..fc25689a9f35 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -27,7 +27,7 @@ static void *real_vmalloc_addr(void *x)
27 unsigned long addr = (unsigned long) x; 27 unsigned long addr = (unsigned long) x;
28 pte_t *p; 28 pte_t *p;
29 29
30 p = find_linux_pte(swapper_pg_dir, addr); 30 p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
31 if (!p || !pte_present(*p)) 31 if (!p || !pte_present(*p))
32 return NULL; 32 return NULL;
33 /* assume we don't have huge pages in vmalloc space... */ 33 /* assume we don't have huge pages in vmalloc space... */
@@ -139,20 +139,18 @@ static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
139{ 139{
140 pte_t *ptep; 140 pte_t *ptep;
141 unsigned long ps = *pte_sizep; 141 unsigned long ps = *pte_sizep;
142 unsigned int shift; 142 unsigned int hugepage_shift;
143 143
144 ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift); 144 ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
145 if (!ptep) 145 if (!ptep)
146 return __pte(0); 146 return __pte(0);
147 if (shift) 147 if (hugepage_shift)
148 *pte_sizep = 1ul << shift; 148 *pte_sizep = 1ul << hugepage_shift;
149 else 149 else
150 *pte_sizep = PAGE_SIZE; 150 *pte_sizep = PAGE_SIZE;
151 if (ps > *pte_sizep) 151 if (ps > *pte_sizep)
152 return __pte(0); 152 return __pte(0);
153 if (!pte_present(*ptep)) 153 return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
154 return __pte(0);
155 return kvmppc_read_update_linux_pte(ptep, writing);
156} 154}
157 155
158static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) 156static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index bdc40b8e77d9..c6e13d9a9e15 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1047,11 +1047,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1047 if (err) 1047 if (err)
1048 goto free_shadow_vcpu; 1048 goto free_shadow_vcpu;
1049 1049
1050 err = -ENOMEM;
1050 p = __get_free_page(GFP_KERNEL|__GFP_ZERO); 1051 p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
1051 /* the real shared page fills the last 4k of our page */
1052 vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
1053 if (!p) 1052 if (!p)
1054 goto uninit_vcpu; 1053 goto uninit_vcpu;
1054 /* the real shared page fills the last 4k of our page */
1055 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
1055 1056
1056#ifdef CONFIG_PPC_BOOK3S_64 1057#ifdef CONFIG_PPC_BOOK3S_64
1057 /* default to book3s_64 (970fx) */ 1058 /* default to book3s_64 (970fx) */
@@ -1239,8 +1240,7 @@ out:
1239#ifdef CONFIG_PPC64 1240#ifdef CONFIG_PPC64
1240int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) 1241int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1241{ 1242{
1242 /* No flags */ 1243 info->flags = KVM_PPC_1T_SEGMENTS;
1243 info->flags = 0;
1244 1244
1245 /* SLB is always 64 entries */ 1245 /* SLB is always 64 entries */
1246 info->slb_size = 64; 1246 info->slb_size = 64;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1a1b51189773..dcc94f016007 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -796,7 +796,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
796 kvmppc_fill_pt_regs(&regs); 796 kvmppc_fill_pt_regs(&regs);
797 timer_interrupt(&regs); 797 timer_interrupt(&regs);
798 break; 798 break;
799#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64) 799#if defined(CONFIG_PPC_DOORBELL)
800 case BOOKE_INTERRUPT_DOORBELL: 800 case BOOKE_INTERRUPT_DOORBELL:
801 kvmppc_fill_pt_regs(&regs); 801 kvmppc_fill_pt_regs(&regs);
802 doorbell_exception(&regs); 802 doorbell_exception(&regs);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 631a2650e4e4..2c52ada30775 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -169,6 +169,9 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
169 vcpu->arch.shared->sprg3 = spr_val; 169 vcpu->arch.shared->sprg3 = spr_val;
170 break; 170 break;
171 171
172 /* PIR can legally be written, but we ignore it */
173 case SPRN_PIR: break;
174
172 default: 175 default:
173 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, 176 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
174 spr_val); 177 spr_val);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index e15c521846ca..99c7fc16dc0d 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -580,7 +580,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
580 if (instr & 1) 580 if (instr & 1)
581 regs->link = regs->nip; 581 regs->link = regs->nip;
582 if (branch_taken(instr, regs)) 582 if (branch_taken(instr, regs))
583 regs->nip = imm; 583 regs->nip = truncate_if_32bit(regs->msr, imm);
584 return 1; 584 return 1;
585#ifdef CONFIG_PPC64 585#ifdef CONFIG_PPC64
586 case 17: /* sc */ 586 case 17: /* sc */
diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile
index 7d1dba0d57f9..8d035d2d42a6 100644
--- a/arch/powerpc/math-emu/Makefile
+++ b/arch/powerpc/math-emu/Makefile
@@ -4,7 +4,8 @@ obj-$(CONFIG_MATH_EMULATION) += fabs.o fadd.o fadds.o fcmpo.o fcmpu.o \
4 fmadd.o fmadds.o fmsub.o fmsubs.o \ 4 fmadd.o fmadds.o fmsub.o fmsubs.o \
5 fmul.o fmuls.o fnabs.o fneg.o \ 5 fmul.o fmuls.o fnabs.o fneg.o \
6 fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \ 6 fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \
7 fres.o frsp.o frsqrte.o fsel.o lfs.o \ 7 fres.o fre.o frsp.o fsel.o lfs.o \
8 frsqrte.o frsqrtes.o \
8 fsqrt.o fsqrts.o fsub.o fsubs.o \ 9 fsqrt.o fsqrts.o fsub.o fsubs.o \
9 mcrfs.o mffs.o mtfsb0.o mtfsb1.o \ 10 mcrfs.o mffs.o mtfsb0.o mtfsb1.o \
10 mtfsf.o mtfsfi.o stfiwx.o stfs.o \ 11 mtfsf.o mtfsfi.o stfiwx.o stfs.o \
diff --git a/arch/powerpc/math-emu/fre.c b/arch/powerpc/math-emu/fre.c
new file mode 100644
index 000000000000..49ccf2cc6a5a
--- /dev/null
+++ b/arch/powerpc/math-emu/fre.c
@@ -0,0 +1,11 @@
1#include <linux/types.h>
2#include <linux/errno.h>
3#include <asm/uaccess.h>
4
5int fre(void *frD, void *frB)
6{
7#ifdef DEBUG
8 printk("%s: %p %p\n", __func__, frD, frB);
9#endif
10 return -ENOSYS;
11}
diff --git a/arch/powerpc/math-emu/frsqrtes.c b/arch/powerpc/math-emu/frsqrtes.c
new file mode 100644
index 000000000000..7e838e380314
--- /dev/null
+++ b/arch/powerpc/math-emu/frsqrtes.c
@@ -0,0 +1,11 @@
1#include <linux/types.h>
2#include <linux/errno.h>
3#include <asm/uaccess.h>
4
5int frsqrtes(void *frD, void *frB)
6{
7#ifdef DEBUG
8 printk("%s: %p %p\n", __func__, frD, frB);
9#endif
10 return 0;
11}
diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
index 164d55935bd8..0328e66e0799 100644
--- a/arch/powerpc/math-emu/math.c
+++ b/arch/powerpc/math-emu/math.c
@@ -58,8 +58,10 @@ FLOATFUNC(fnabs);
58FLOATFUNC(fneg); 58FLOATFUNC(fneg);
59 59
60/* Optional */ 60/* Optional */
61FLOATFUNC(fre);
61FLOATFUNC(fres); 62FLOATFUNC(fres);
62FLOATFUNC(frsqrte); 63FLOATFUNC(frsqrte);
64FLOATFUNC(frsqrtes);
63FLOATFUNC(fsel); 65FLOATFUNC(fsel);
64FLOATFUNC(fsqrt); 66FLOATFUNC(fsqrt);
65FLOATFUNC(fsqrts); 67FLOATFUNC(fsqrts);
@@ -97,6 +99,7 @@ FLOATFUNC(fsqrts);
97#define FSQRTS 0x016 /* 22 */ 99#define FSQRTS 0x016 /* 22 */
98#define FRES 0x018 /* 24 */ 100#define FRES 0x018 /* 24 */
99#define FMULS 0x019 /* 25 */ 101#define FMULS 0x019 /* 25 */
102#define FRSQRTES 0x01a /* 26 */
100#define FMSUBS 0x01c /* 28 */ 103#define FMSUBS 0x01c /* 28 */
101#define FMADDS 0x01d /* 29 */ 104#define FMADDS 0x01d /* 29 */
102#define FNMSUBS 0x01e /* 30 */ 105#define FNMSUBS 0x01e /* 30 */
@@ -109,6 +112,7 @@ FLOATFUNC(fsqrts);
109#define FADD 0x015 /* 21 */ 112#define FADD 0x015 /* 21 */
110#define FSQRT 0x016 /* 22 */ 113#define FSQRT 0x016 /* 22 */
111#define FSEL 0x017 /* 23 */ 114#define FSEL 0x017 /* 23 */
115#define FRE 0x018 /* 24 */
112#define FMUL 0x019 /* 25 */ 116#define FMUL 0x019 /* 25 */
113#define FRSQRTE 0x01a /* 26 */ 117#define FRSQRTE 0x01a /* 26 */
114#define FMSUB 0x01c /* 28 */ 118#define FMSUB 0x01c /* 28 */
@@ -299,9 +303,10 @@ do_mathemu(struct pt_regs *regs)
299 case FDIVS: func = fdivs; type = AB; break; 303 case FDIVS: func = fdivs; type = AB; break;
300 case FSUBS: func = fsubs; type = AB; break; 304 case FSUBS: func = fsubs; type = AB; break;
301 case FADDS: func = fadds; type = AB; break; 305 case FADDS: func = fadds; type = AB; break;
302 case FSQRTS: func = fsqrts; type = AB; break; 306 case FSQRTS: func = fsqrts; type = XB; break;
303 case FRES: func = fres; type = AB; break; 307 case FRES: func = fres; type = XB; break;
304 case FMULS: func = fmuls; type = AC; break; 308 case FMULS: func = fmuls; type = AC; break;
309 case FRSQRTES: func = frsqrtes;type = XB; break;
305 case FMSUBS: func = fmsubs; type = ABC; break; 310 case FMSUBS: func = fmsubs; type = ABC; break;
306 case FMADDS: func = fmadds; type = ABC; break; 311 case FMADDS: func = fmadds; type = ABC; break;
307 case FNMSUBS: func = fnmsubs; type = ABC; break; 312 case FNMSUBS: func = fnmsubs; type = ABC; break;
@@ -317,10 +322,11 @@ do_mathemu(struct pt_regs *regs)
317 case FDIV: func = fdiv; type = AB; break; 322 case FDIV: func = fdiv; type = AB; break;
318 case FSUB: func = fsub; type = AB; break; 323 case FSUB: func = fsub; type = AB; break;
319 case FADD: func = fadd; type = AB; break; 324 case FADD: func = fadd; type = AB; break;
320 case FSQRT: func = fsqrt; type = AB; break; 325 case FSQRT: func = fsqrt; type = XB; break;
326 case FRE: func = fre; type = XB; break;
321 case FSEL: func = fsel; type = ABC; break; 327 case FSEL: func = fsel; type = ABC; break;
322 case FMUL: func = fmul; type = AC; break; 328 case FMUL: func = fmul; type = AC; break;
323 case FRSQRTE: func = frsqrte; type = AB; break; 329 case FRSQRTE: func = frsqrte; type = XB; break;
324 case FMSUB: func = fmsub; type = ABC; break; 330 case FMSUB: func = fmsub; type = ABC; break;
325 case FMADD: func = fmadd; type = ABC; break; 331 case FMADD: func = fmadd; type = ABC; break;
326 case FNMSUB: func = fnmsub; type = ABC; break; 332 case FNMSUB: func = fnmsub; type = ABC; break;
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 2c9441ee6bb8..82b1ff759e26 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -41,7 +41,7 @@ int icache_44x_need_flush;
41 41
42unsigned long tlb_47x_boltmap[1024/8]; 42unsigned long tlb_47x_boltmap[1024/8];
43 43
44static void __cpuinit ppc44x_update_tlb_hwater(void) 44static void ppc44x_update_tlb_hwater(void)
45{ 45{
46 extern unsigned int tlb_44x_patch_hwater_D[]; 46 extern unsigned int tlb_44x_patch_hwater_D[];
47 extern unsigned int tlb_44x_patch_hwater_I[]; 47 extern unsigned int tlb_44x_patch_hwater_I[];
@@ -134,7 +134,7 @@ static void __init ppc47x_update_boltmap(void)
134/* 134/*
135 * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU 135 * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU
136 */ 136 */
137static void __cpuinit ppc47x_pin_tlb(unsigned int virt, unsigned int phys) 137static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
138{ 138{
139 unsigned int rA; 139 unsigned int rA;
140 int bolted; 140 int bolted;
@@ -229,7 +229,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
229} 229}
230 230
231#ifdef CONFIG_SMP 231#ifdef CONFIG_SMP
232void __cpuinit mmu_init_secondary(int cpu) 232void mmu_init_secondary(int cpu)
233{ 233{
234 unsigned long addr; 234 unsigned long addr;
235 unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); 235 unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index cf16b5733eaa..51230ee6a407 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -6,17 +6,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
6 6
7ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 7ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
8 8
9obj-y := fault.o mem.o pgtable.o gup.o \ 9obj-y := fault.o mem.o pgtable.o gup.o mmap.o \
10 init_$(CONFIG_WORD_SIZE).o \ 10 init_$(CONFIG_WORD_SIZE).o \
11 pgtable_$(CONFIG_WORD_SIZE).o 11 pgtable_$(CONFIG_WORD_SIZE).o
12obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ 12obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
13 tlb_nohash_low.o 13 tlb_nohash_low.o
14obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o 14obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o
15obj-$(CONFIG_PPC64) += mmap_64.o
16hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o 15hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
17obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ 16obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \
18 slb_low.o slb.o stab.o \ 17 slb_low.o slb.o stab.o \
19 mmap_64.o $(hash64-y) 18 $(hash64-y)
20obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o 19obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o
21obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ 20obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \
22 tlb_hash$(CONFIG_WORD_SIZE).o \ 21 tlb_hash$(CONFIG_WORD_SIZE).o \
@@ -28,11 +27,12 @@ obj-$(CONFIG_44x) += 44x_mmu.o
28obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o 27obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o
29obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o 28obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
30obj-$(CONFIG_PPC_MM_SLICES) += slice.o 29obj-$(CONFIG_PPC_MM_SLICES) += slice.o
31ifeq ($(CONFIG_HUGETLB_PAGE),y)
32obj-y += hugetlbpage.o 30obj-y += hugetlbpage.o
31ifeq ($(CONFIG_HUGETLB_PAGE),y)
33obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o 32obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
34obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o 33obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
35endif 34endif
35obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
36obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o 36obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
37obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o 37obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
38obj-$(CONFIG_HIGHMEM) += highmem.o 38obj-$(CONFIG_HIGHMEM) += highmem.o
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index 4b921affa495..49822d90ea96 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -34,7 +34,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
34 34
35 ptep = pte_offset_kernel(&pmd, addr); 35 ptep = pte_offset_kernel(&pmd, addr);
36 do { 36 do {
37 pte_t pte = *ptep; 37 pte_t pte = ACCESS_ONCE(*ptep);
38 struct page *page; 38 struct page *page;
39 39
40 if ((pte_val(pte) & mask) != result) 40 if ((pte_val(pte) & mask) != result)
@@ -63,12 +63,18 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
63 63
64 pmdp = pmd_offset(&pud, addr); 64 pmdp = pmd_offset(&pud, addr);
65 do { 65 do {
66 pmd_t pmd = *pmdp; 66 pmd_t pmd = ACCESS_ONCE(*pmdp);
67 67
68 next = pmd_addr_end(addr, end); 68 next = pmd_addr_end(addr, end);
69 if (pmd_none(pmd)) 69 /*
70 * If we find a splitting transparent hugepage we
71 * return zero. That will result in taking the slow
72 * path which will call wait_split_huge_page()
73 * if the pmd is still in splitting state
74 */
75 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
70 return 0; 76 return 0;
71 if (pmd_huge(pmd)) { 77 if (pmd_huge(pmd) || pmd_large(pmd)) {
72 if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, 78 if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
73 write, pages, nr)) 79 write, pages, nr))
74 return 0; 80 return 0;
@@ -91,7 +97,7 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
91 97
92 pudp = pud_offset(&pgd, addr); 98 pudp = pud_offset(&pgd, addr);
93 do { 99 do {
94 pud_t pud = *pudp; 100 pud_t pud = ACCESS_ONCE(*pudp);
95 101
96 next = pud_addr_end(addr, end); 102 next = pud_addr_end(addr, end);
97 if (pud_none(pud)) 103 if (pud_none(pud))
@@ -154,7 +160,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
154 160
155 pgdp = pgd_offset(mm, addr); 161 pgdp = pgd_offset(mm, addr);
156 do { 162 do {
157 pgd_t pgd = *pgdp; 163 pgd_t pgd = ACCESS_ONCE(*pgdp);
158 164
159 pr_devel(" %016lx: normal pgd %p\n", addr, 165 pr_devel(" %016lx: normal pgd %p\n", addr,
160 (void *)pgd_val(pgd)); 166 (void *)pgd_val(pgd));
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 0e980acae67c..d3cbda62857b 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -289,9 +289,10 @@ htab_modify_pte:
289 289
290 /* Call ppc_md.hpte_updatepp */ 290 /* Call ppc_md.hpte_updatepp */
291 mr r5,r29 /* vpn */ 291 mr r5,r29 /* vpn */
292 li r6,MMU_PAGE_4K /* page size */ 292 li r6,MMU_PAGE_4K /* base page size */
293 ld r7,STK_PARAM(R9)(r1) /* segment size */ 293 li r7,MMU_PAGE_4K /* actual page size */
294 ld r8,STK_PARAM(R8)(r1) /* get "local" param */ 294 ld r8,STK_PARAM(R9)(r1) /* segment size */
295 ld r9,STK_PARAM(R8)(r1) /* get "local" param */
295_GLOBAL(htab_call_hpte_updatepp) 296_GLOBAL(htab_call_hpte_updatepp)
296 bl . /* Patched by htab_finish_init() */ 297 bl . /* Patched by htab_finish_init() */
297 298
@@ -649,9 +650,10 @@ htab_modify_pte:
649 650
650 /* Call ppc_md.hpte_updatepp */ 651 /* Call ppc_md.hpte_updatepp */
651 mr r5,r29 /* vpn */ 652 mr r5,r29 /* vpn */
652 li r6,MMU_PAGE_4K /* page size */ 653 li r6,MMU_PAGE_4K /* base page size */
653 ld r7,STK_PARAM(R9)(r1) /* segment size */ 654 li r7,MMU_PAGE_4K /* actual page size */
654 ld r8,STK_PARAM(R8)(r1) /* get "local" param */ 655 ld r8,STK_PARAM(R9)(r1) /* segment size */
656 ld r9,STK_PARAM(R8)(r1) /* get "local" param */
655_GLOBAL(htab_call_hpte_updatepp) 657_GLOBAL(htab_call_hpte_updatepp)
656 bl . /* patched by htab_finish_init() */ 658 bl . /* patched by htab_finish_init() */
657 659
@@ -937,9 +939,10 @@ ht64_modify_pte:
937 939
938 /* Call ppc_md.hpte_updatepp */ 940 /* Call ppc_md.hpte_updatepp */
939 mr r5,r29 /* vpn */ 941 mr r5,r29 /* vpn */
940 li r6,MMU_PAGE_64K 942 li r6,MMU_PAGE_64K /* base page size */
941 ld r7,STK_PARAM(R9)(r1) /* segment size */ 943 li r7,MMU_PAGE_64K /* actual page size */
942 ld r8,STK_PARAM(R8)(r1) /* get "local" param */ 944 ld r8,STK_PARAM(R9)(r1) /* segment size */
945 ld r9,STK_PARAM(R8)(r1) /* get "local" param */
943_GLOBAL(ht64_call_hpte_updatepp) 946_GLOBAL(ht64_call_hpte_updatepp)
944 bl . /* patched by htab_finish_init() */ 947 bl . /* patched by htab_finish_init() */
945 948
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 4c122c3f1623..c33d939120c9 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -43,6 +43,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
43{ 43{
44 unsigned long va; 44 unsigned long va;
45 unsigned int penc; 45 unsigned int penc;
46 unsigned long sllp;
46 47
47 /* 48 /*
48 * We need 14 to 65 bits of va for a tlibe of 4K page 49 * We need 14 to 65 bits of va for a tlibe of 4K page
@@ -64,7 +65,9 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
64 /* clear out bits after (52) [0....52.....63] */ 65 /* clear out bits after (52) [0....52.....63] */
65 va &= ~((1ul << (64 - 52)) - 1); 66 va &= ~((1ul << (64 - 52)) - 1);
66 va |= ssize << 8; 67 va |= ssize << 8;
67 va |= mmu_psize_defs[apsize].sllp << 6; 68 sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
69 ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
70 va |= sllp << 5;
68 asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) 71 asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
69 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) 72 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
70 : "memory"); 73 : "memory");
@@ -98,6 +101,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
98{ 101{
99 unsigned long va; 102 unsigned long va;
100 unsigned int penc; 103 unsigned int penc;
104 unsigned long sllp;
101 105
102 /* VPN_SHIFT can be atmost 12 */ 106 /* VPN_SHIFT can be atmost 12 */
103 va = vpn << VPN_SHIFT; 107 va = vpn << VPN_SHIFT;
@@ -113,7 +117,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
113 /* clear out bits after(52) [0....52.....63] */ 117 /* clear out bits after(52) [0....52.....63] */
114 va &= ~((1ul << (64 - 52)) - 1); 118 va &= ~((1ul << (64 - 52)) - 1);
115 va |= ssize << 8; 119 va |= ssize << 8;
116 va |= mmu_psize_defs[apsize].sllp << 6; 120 sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
121 ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
122 va |= sllp << 5;
117 asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" 123 asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
118 : : "r"(va) : "memory"); 124 : : "r"(va) : "memory");
119 break; 125 break;
@@ -273,61 +279,15 @@ static long native_hpte_remove(unsigned long hpte_group)
273 return i; 279 return i;
274} 280}
275 281
276static inline int __hpte_actual_psize(unsigned int lp, int psize)
277{
278 int i, shift;
279 unsigned int mask;
280
281 /* start from 1 ignoring MMU_PAGE_4K */
282 for (i = 1; i < MMU_PAGE_COUNT; i++) {
283
284 /* invalid penc */
285 if (mmu_psize_defs[psize].penc[i] == -1)
286 continue;
287 /*
288 * encoding bits per actual page size
289 * PTE LP actual page size
290 * rrrr rrrz >=8KB
291 * rrrr rrzz >=16KB
292 * rrrr rzzz >=32KB
293 * rrrr zzzz >=64KB
294 * .......
295 */
296 shift = mmu_psize_defs[i].shift - LP_SHIFT;
297 if (shift > LP_BITS)
298 shift = LP_BITS;
299 mask = (1 << shift) - 1;
300 if ((lp & mask) == mmu_psize_defs[psize].penc[i])
301 return i;
302 }
303 return -1;
304}
305
306static inline int hpte_actual_psize(struct hash_pte *hptep, int psize)
307{
308 /* Look at the 8 bit LP value */
309 unsigned int lp = (hptep->r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
310
311 if (!(hptep->v & HPTE_V_VALID))
312 return -1;
313
314 /* First check if it is large page */
315 if (!(hptep->v & HPTE_V_LARGE))
316 return MMU_PAGE_4K;
317
318 return __hpte_actual_psize(lp, psize);
319}
320
321static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, 282static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
322 unsigned long vpn, int psize, int ssize, 283 unsigned long vpn, int bpsize,
323 int local) 284 int apsize, int ssize, int local)
324{ 285{
325 struct hash_pte *hptep = htab_address + slot; 286 struct hash_pte *hptep = htab_address + slot;
326 unsigned long hpte_v, want_v; 287 unsigned long hpte_v, want_v;
327 int ret = 0; 288 int ret = 0;
328 int actual_psize;
329 289
330 want_v = hpte_encode_avpn(vpn, psize, ssize); 290 want_v = hpte_encode_avpn(vpn, bpsize, ssize);
331 291
332 DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", 292 DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
333 vpn, want_v & HPTE_V_AVPN, slot, newpp); 293 vpn, want_v & HPTE_V_AVPN, slot, newpp);
@@ -335,7 +295,6 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
335 native_lock_hpte(hptep); 295 native_lock_hpte(hptep);
336 296
337 hpte_v = hptep->v; 297 hpte_v = hptep->v;
338 actual_psize = hpte_actual_psize(hptep, psize);
339 /* 298 /*
340 * We need to invalidate the TLB always because hpte_remove doesn't do 299 * We need to invalidate the TLB always because hpte_remove doesn't do
341 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less 300 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -343,12 +302,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
343 * (hpte_remove) because we assume the old translation is still 302 * (hpte_remove) because we assume the old translation is still
344 * technically "valid". 303 * technically "valid".
345 */ 304 */
346 if (actual_psize < 0) { 305 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
347 actual_psize = psize;
348 ret = -1;
349 goto err_out;
350 }
351 if (!HPTE_V_COMPARE(hpte_v, want_v)) {
352 DBG_LOW(" -> miss\n"); 306 DBG_LOW(" -> miss\n");
353 ret = -1; 307 ret = -1;
354 } else { 308 } else {
@@ -357,11 +311,10 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
357 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | 311 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
358 (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); 312 (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
359 } 313 }
360err_out:
361 native_unlock_hpte(hptep); 314 native_unlock_hpte(hptep);
362 315
363 /* Ensure it is out of the tlb too. */ 316 /* Ensure it is out of the tlb too. */
364 tlbie(vpn, psize, actual_psize, ssize, local); 317 tlbie(vpn, bpsize, apsize, ssize, local);
365 318
366 return ret; 319 return ret;
367} 320}
@@ -402,7 +355,6 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
402static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, 355static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
403 int psize, int ssize) 356 int psize, int ssize)
404{ 357{
405 int actual_psize;
406 unsigned long vpn; 358 unsigned long vpn;
407 unsigned long vsid; 359 unsigned long vsid;
408 long slot; 360 long slot;
@@ -415,36 +367,33 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
415 if (slot == -1) 367 if (slot == -1)
416 panic("could not find page to bolt\n"); 368 panic("could not find page to bolt\n");
417 hptep = htab_address + slot; 369 hptep = htab_address + slot;
418 actual_psize = hpte_actual_psize(hptep, psize);
419 if (actual_psize < 0)
420 actual_psize = psize;
421 370
422 /* Update the HPTE */ 371 /* Update the HPTE */
423 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | 372 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
424 (newpp & (HPTE_R_PP | HPTE_R_N)); 373 (newpp & (HPTE_R_PP | HPTE_R_N));
425 374 /*
426 /* Ensure it is out of the tlb too. */ 375 * Ensure it is out of the tlb too. Bolted entries base and
427 tlbie(vpn, psize, actual_psize, ssize, 0); 376 * actual page size will be same.
377 */
378 tlbie(vpn, psize, psize, ssize, 0);
428} 379}
429 380
430static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, 381static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
431 int psize, int ssize, int local) 382 int bpsize, int apsize, int ssize, int local)
432{ 383{
433 struct hash_pte *hptep = htab_address + slot; 384 struct hash_pte *hptep = htab_address + slot;
434 unsigned long hpte_v; 385 unsigned long hpte_v;
435 unsigned long want_v; 386 unsigned long want_v;
436 unsigned long flags; 387 unsigned long flags;
437 int actual_psize;
438 388
439 local_irq_save(flags); 389 local_irq_save(flags);
440 390
441 DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); 391 DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
442 392
443 want_v = hpte_encode_avpn(vpn, psize, ssize); 393 want_v = hpte_encode_avpn(vpn, bpsize, ssize);
444 native_lock_hpte(hptep); 394 native_lock_hpte(hptep);
445 hpte_v = hptep->v; 395 hpte_v = hptep->v;
446 396
447 actual_psize = hpte_actual_psize(hptep, psize);
448 /* 397 /*
449 * We need to invalidate the TLB always because hpte_remove doesn't do 398 * We need to invalidate the TLB always because hpte_remove doesn't do
450 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less 399 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -452,23 +401,120 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
452 * (hpte_remove) because we assume the old translation is still 401 * (hpte_remove) because we assume the old translation is still
453 * technically "valid". 402 * technically "valid".
454 */ 403 */
455 if (actual_psize < 0) { 404 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
456 actual_psize = psize;
457 native_unlock_hpte(hptep);
458 goto err_out;
459 }
460 if (!HPTE_V_COMPARE(hpte_v, want_v))
461 native_unlock_hpte(hptep); 405 native_unlock_hpte(hptep);
462 else 406 else
463 /* Invalidate the hpte. NOTE: this also unlocks it */ 407 /* Invalidate the hpte. NOTE: this also unlocks it */
464 hptep->v = 0; 408 hptep->v = 0;
465 409
466err_out:
467 /* Invalidate the TLB */ 410 /* Invalidate the TLB */
468 tlbie(vpn, psize, actual_psize, ssize, local); 411 tlbie(vpn, bpsize, apsize, ssize, local);
412
413 local_irq_restore(flags);
414}
415
416static void native_hugepage_invalidate(struct mm_struct *mm,
417 unsigned char *hpte_slot_array,
418 unsigned long addr, int psize)
419{
420 int ssize = 0, i;
421 int lock_tlbie;
422 struct hash_pte *hptep;
423 int actual_psize = MMU_PAGE_16M;
424 unsigned int max_hpte_count, valid;
425 unsigned long flags, s_addr = addr;
426 unsigned long hpte_v, want_v, shift;
427 unsigned long hidx, vpn = 0, vsid, hash, slot;
428
429 shift = mmu_psize_defs[psize].shift;
430 max_hpte_count = 1U << (PMD_SHIFT - shift);
431
432 local_irq_save(flags);
433 for (i = 0; i < max_hpte_count; i++) {
434 valid = hpte_valid(hpte_slot_array, i);
435 if (!valid)
436 continue;
437 hidx = hpte_hash_index(hpte_slot_array, i);
438
439 /* get the vpn */
440 addr = s_addr + (i * (1ul << shift));
441 if (!is_kernel_addr(addr)) {
442 ssize = user_segment_size(addr);
443 vsid = get_vsid(mm->context.id, addr, ssize);
444 WARN_ON(vsid == 0);
445 } else {
446 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
447 ssize = mmu_kernel_ssize;
448 }
449
450 vpn = hpt_vpn(addr, vsid, ssize);
451 hash = hpt_hash(vpn, shift, ssize);
452 if (hidx & _PTEIDX_SECONDARY)
453 hash = ~hash;
454
455 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
456 slot += hidx & _PTEIDX_GROUP_IX;
457
458 hptep = htab_address + slot;
459 want_v = hpte_encode_avpn(vpn, psize, ssize);
460 native_lock_hpte(hptep);
461 hpte_v = hptep->v;
462
463 /* Even if we miss, we need to invalidate the TLB */
464 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
465 native_unlock_hpte(hptep);
466 else
467 /* Invalidate the hpte. NOTE: this also unlocks it */
468 hptep->v = 0;
469 }
470 /*
471 * Since this is a hugepage, we just need a single tlbie.
472 * use the last vpn.
473 */
474 lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
475 if (lock_tlbie)
476 raw_spin_lock(&native_tlbie_lock);
477
478 asm volatile("ptesync":::"memory");
479 __tlbie(vpn, psize, actual_psize, ssize);
480 asm volatile("eieio; tlbsync; ptesync":::"memory");
481
482 if (lock_tlbie)
483 raw_spin_unlock(&native_tlbie_lock);
484
469 local_irq_restore(flags); 485 local_irq_restore(flags);
470} 486}
471 487
488static inline int __hpte_actual_psize(unsigned int lp, int psize)
489{
490 int i, shift;
491 unsigned int mask;
492
493 /* start from 1 ignoring MMU_PAGE_4K */
494 for (i = 1; i < MMU_PAGE_COUNT; i++) {
495
496 /* invalid penc */
497 if (mmu_psize_defs[psize].penc[i] == -1)
498 continue;
499 /*
500 * encoding bits per actual page size
501 * PTE LP actual page size
502 * rrrr rrrz >=8KB
503 * rrrr rrzz >=16KB
504 * rrrr rzzz >=32KB
505 * rrrr zzzz >=64KB
506 * .......
507 */
508 shift = mmu_psize_defs[i].shift - LP_SHIFT;
509 if (shift > LP_BITS)
510 shift = LP_BITS;
511 mask = (1 << shift) - 1;
512 if ((lp & mask) == mmu_psize_defs[psize].penc[i])
513 return i;
514 }
515 return -1;
516}
517
472static void hpte_decode(struct hash_pte *hpte, unsigned long slot, 518static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
473 int *psize, int *apsize, int *ssize, unsigned long *vpn) 519 int *psize, int *apsize, int *ssize, unsigned long *vpn)
474{ 520{
@@ -514,6 +560,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
514 seg_off |= vpi << shift; 560 seg_off |= vpi << shift;
515 } 561 }
516 *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT; 562 *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
563 break;
517 case MMU_SEGSIZE_1T: 564 case MMU_SEGSIZE_1T:
518 /* We only have 40 - 23 bits of seg_off in avpn */ 565 /* We only have 40 - 23 bits of seg_off in avpn */
519 seg_off = (avpn & 0x1ffff) << 23; 566 seg_off = (avpn & 0x1ffff) << 23;
@@ -523,6 +570,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
523 seg_off |= vpi << shift; 570 seg_off |= vpi << shift;
524 } 571 }
525 *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT; 572 *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
573 break;
526 default: 574 default:
527 *vpn = size = 0; 575 *vpn = size = 0;
528 } 576 }
@@ -672,4 +720,5 @@ void __init hpte_init_native(void)
672 ppc_md.hpte_remove = native_hpte_remove; 720 ppc_md.hpte_remove = native_hpte_remove;
673 ppc_md.hpte_clear_all = native_hpte_clear; 721 ppc_md.hpte_clear_all = native_hpte_clear;
674 ppc_md.flush_hash_range = native_flush_hash_range; 722 ppc_md.flush_hash_range = native_flush_hash_range;
723 ppc_md.hugepage_invalidate = native_hugepage_invalidate;
675} 724}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index e303a6d74e3a..6ecc38bd5b24 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -807,7 +807,7 @@ void __init early_init_mmu(void)
807} 807}
808 808
809#ifdef CONFIG_SMP 809#ifdef CONFIG_SMP
810void __cpuinit early_init_mmu_secondary(void) 810void early_init_mmu_secondary(void)
811{ 811{
812 /* Initialize hash table for that CPU */ 812 /* Initialize hash table for that CPU */
813 if (!firmware_has_feature(FW_FEATURE_LPAR)) 813 if (!firmware_has_feature(FW_FEATURE_LPAR))
@@ -1050,13 +1050,26 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
1050 goto bail; 1050 goto bail;
1051 } 1051 }
1052 1052
1053#ifdef CONFIG_HUGETLB_PAGE
1054 if (hugeshift) { 1053 if (hugeshift) {
1055 rc = __hash_page_huge(ea, access, vsid, ptep, trap, local, 1054 if (pmd_trans_huge(*(pmd_t *)ptep))
1056 ssize, hugeshift, psize); 1055 rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
1056 trap, local, ssize, psize);
1057#ifdef CONFIG_HUGETLB_PAGE
1058 else
1059 rc = __hash_page_huge(ea, access, vsid, ptep, trap,
1060 local, ssize, hugeshift, psize);
1061#else
1062 else {
1063 /*
1064 * if we have hugeshift, and is not transhuge with
1065 * hugetlb disabled, something is really wrong.
1066 */
1067 rc = 1;
1068 WARN_ON(1);
1069 }
1070#endif
1057 goto bail; 1071 goto bail;
1058 } 1072 }
1059#endif /* CONFIG_HUGETLB_PAGE */
1060 1073
1061#ifndef CONFIG_PPC_64K_PAGES 1074#ifndef CONFIG_PPC_64K_PAGES
1062 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); 1075 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
@@ -1145,6 +1158,7 @@ EXPORT_SYMBOL_GPL(hash_page);
1145void hash_preload(struct mm_struct *mm, unsigned long ea, 1158void hash_preload(struct mm_struct *mm, unsigned long ea,
1146 unsigned long access, unsigned long trap) 1159 unsigned long access, unsigned long trap)
1147{ 1160{
1161 int hugepage_shift;
1148 unsigned long vsid; 1162 unsigned long vsid;
1149 pgd_t *pgdir; 1163 pgd_t *pgdir;
1150 pte_t *ptep; 1164 pte_t *ptep;
@@ -1166,10 +1180,27 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1166 pgdir = mm->pgd; 1180 pgdir = mm->pgd;
1167 if (pgdir == NULL) 1181 if (pgdir == NULL)
1168 return; 1182 return;
1169 ptep = find_linux_pte(pgdir, ea); 1183
1170 if (!ptep) 1184 /* Get VSID */
1185 ssize = user_segment_size(ea);
1186 vsid = get_vsid(mm->context.id, ea, ssize);
1187 if (!vsid)
1171 return; 1188 return;
1189 /*
1190 * Hash doesn't like irqs. Walking linux page table with irq disabled
1191 * saves us from holding multiple locks.
1192 */
1193 local_irq_save(flags);
1194
1195 /*
1196 * THP pages use update_mmu_cache_pmd. We don't do
1197 * hash preload there. Hence can ignore THP here
1198 */
1199 ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugepage_shift);
1200 if (!ptep)
1201 goto out_exit;
1172 1202
1203 WARN_ON(hugepage_shift);
1173#ifdef CONFIG_PPC_64K_PAGES 1204#ifdef CONFIG_PPC_64K_PAGES
1174 /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on 1205 /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on
1175 * a 64K kernel), then we don't preload, hash_page() will take 1206 * a 64K kernel), then we don't preload, hash_page() will take
@@ -1178,18 +1209,9 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1178 * page size demotion here 1209 * page size demotion here
1179 */ 1210 */
1180 if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) 1211 if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
1181 return; 1212 goto out_exit;
1182#endif /* CONFIG_PPC_64K_PAGES */ 1213#endif /* CONFIG_PPC_64K_PAGES */
1183 1214
1184 /* Get VSID */
1185 ssize = user_segment_size(ea);
1186 vsid = get_vsid(mm->context.id, ea, ssize);
1187 if (!vsid)
1188 return;
1189
1190 /* Hash doesn't like irqs */
1191 local_irq_save(flags);
1192
1193 /* Is that local to this CPU ? */ 1215 /* Is that local to this CPU ? */
1194 if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 1216 if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
1195 local = 1; 1217 local = 1;
@@ -1211,7 +1233,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1211 mm->context.user_psize, 1233 mm->context.user_psize,
1212 mm->context.user_psize, 1234 mm->context.user_psize,
1213 pte_val(*ptep)); 1235 pte_val(*ptep));
1214 1236out_exit:
1215 local_irq_restore(flags); 1237 local_irq_restore(flags);
1216} 1238}
1217 1239
@@ -1232,7 +1254,11 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
1232 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 1254 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
1233 slot += hidx & _PTEIDX_GROUP_IX; 1255 slot += hidx & _PTEIDX_GROUP_IX;
1234 DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); 1256 DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
1235 ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local); 1257 /*
1258 * We use same base page size and actual psize, because we don't
1259 * use these functions for hugepage
1260 */
1261 ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local);
1236 } pte_iterate_hashed_end(); 1262 } pte_iterate_hashed_end();
1237 1263
1238#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1264#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1365,7 +1391,8 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
1365 hash = ~hash; 1391 hash = ~hash;
1366 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 1392 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
1367 slot += hidx & _PTEIDX_GROUP_IX; 1393 slot += hidx & _PTEIDX_GROUP_IX;
1368 ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_kernel_ssize, 0); 1394 ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize,
1395 mmu_kernel_ssize, 0);
1369} 1396}
1370 1397
1371void kernel_map_pages(struct page *page, int numpages, int enable) 1398void kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
new file mode 100644
index 000000000000..34de9e0cdc34
--- /dev/null
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -0,0 +1,175 @@
1/*
2 * Copyright IBM Corporation, 2013
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 */
14
15/*
16 * PPC64 THP Support for hash based MMUs
17 */
18#include <linux/mm.h>
19#include <asm/machdep.h>
20
21int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
22 pmd_t *pmdp, unsigned long trap, int local, int ssize,
23 unsigned int psize)
24{
25 unsigned int index, valid;
26 unsigned char *hpte_slot_array;
27 unsigned long rflags, pa, hidx;
28 unsigned long old_pmd, new_pmd;
29 int ret, lpsize = MMU_PAGE_16M;
30 unsigned long vpn, hash, shift, slot;
31
32 /*
33 * atomically mark the linux large page PMD busy and dirty
34 */
35 do {
36 old_pmd = pmd_val(*pmdp);
37 /* If PMD busy, retry the access */
38 if (unlikely(old_pmd & _PAGE_BUSY))
39 return 0;
40 /* If PMD is trans splitting retry the access */
41 if (unlikely(old_pmd & _PAGE_SPLITTING))
42 return 0;
43 /* If PMD permissions don't match, take page fault */
44 if (unlikely(access & ~old_pmd))
45 return 1;
46 /*
47 * Try to lock the PTE, add ACCESSED and DIRTY if it was
48 * a write access
49 */
50 new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
51 if (access & _PAGE_RW)
52 new_pmd |= _PAGE_DIRTY;
53 } while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
54 old_pmd, new_pmd));
55 /*
56 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
57 * need to add in 0x1 if it's a read-only user page
58 */
59 rflags = new_pmd & _PAGE_USER;
60 if ((new_pmd & _PAGE_USER) && !((new_pmd & _PAGE_RW) &&
61 (new_pmd & _PAGE_DIRTY)))
62 rflags |= 0x1;
63 /*
64 * _PAGE_EXEC -> HW_NO_EXEC since it's inverted
65 */
66 rflags |= ((new_pmd & _PAGE_EXEC) ? 0 : HPTE_R_N);
67
68#if 0
69 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
70
71 /*
72 * No CPU has hugepages but lacks no execute, so we
73 * don't need to worry about that case
74 */
75 rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
76 }
77#endif
78 /*
79 * Find the slot index details for this ea, using base page size.
80 */
81 shift = mmu_psize_defs[psize].shift;
82 index = (ea & ~HPAGE_PMD_MASK) >> shift;
83 BUG_ON(index >= 4096);
84
85 vpn = hpt_vpn(ea, vsid, ssize);
86 hash = hpt_hash(vpn, shift, ssize);
87 hpte_slot_array = get_hpte_slot_array(pmdp);
88
89 valid = hpte_valid(hpte_slot_array, index);
90 if (valid) {
91 /* update the hpte bits */
92 hidx = hpte_hash_index(hpte_slot_array, index);
93 if (hidx & _PTEIDX_SECONDARY)
94 hash = ~hash;
95 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
96 slot += hidx & _PTEIDX_GROUP_IX;
97
98 ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
99 psize, lpsize, ssize, local);
100 /*
101 * We failed to update, try to insert a new entry.
102 */
103 if (ret == -1) {
104 /*
105 * large pte is marked busy, so we can be sure
106 * nobody is looking at hpte_slot_array. hence we can
107 * safely update this here.
108 */
109 valid = 0;
110 new_pmd &= ~_PAGE_HPTEFLAGS;
111 hpte_slot_array[index] = 0;
112 } else
113 /* clear the busy bits and set the hash pte bits */
114 new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
115 }
116
117 if (!valid) {
118 unsigned long hpte_group;
119
120 /* insert new entry */
121 pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
122repeat:
123 hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
124
125 /* clear the busy bits and set the hash pte bits */
126 new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
127
128 /* Add in WIMG bits */
129 rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
130 _PAGE_COHERENT | _PAGE_GUARDED));
131
132 /* Insert into the hash table, primary slot */
133 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
134 psize, lpsize, ssize);
135 /*
136 * Primary is full, try the secondary
137 */
138 if (unlikely(slot == -1)) {
139 hpte_group = ((~hash & htab_hash_mask) *
140 HPTES_PER_GROUP) & ~0x7UL;
141 slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
142 rflags, HPTE_V_SECONDARY,
143 psize, lpsize, ssize);
144 if (slot == -1) {
145 if (mftb() & 0x1)
146 hpte_group = ((hash & htab_hash_mask) *
147 HPTES_PER_GROUP) & ~0x7UL;
148
149 ppc_md.hpte_remove(hpte_group);
150 goto repeat;
151 }
152 }
153 /*
154 * Hypervisor failure. Restore old pmd and return -1
155 * similar to __hash_page_*
156 */
157 if (unlikely(slot == -2)) {
158 *pmdp = __pmd(old_pmd);
159 hash_failure_debug(ea, access, vsid, trap, ssize,
160 psize, lpsize, old_pmd);
161 return -1;
162 }
163 /*
164 * large pte is marked busy, so we can be sure
165 * nobody is looking at hpte_slot_array. hence we can
166 * safely update this here.
167 */
168 mark_hpte_slot_valid(hpte_slot_array, index, slot);
169 }
170 /*
171 * No need to use ldarx/stdcx here
172 */
173 *pmdp = __pmd(new_pmd & ~_PAGE_BUSY);
174 return 0;
175}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 0f1d94a1fb82..0b7fb6761015 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
81 slot += (old_pte & _PAGE_F_GIX) >> 12; 81 slot += (old_pte & _PAGE_F_GIX) >> 12;
82 82
83 if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, 83 if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
84 ssize, local) == -1) 84 mmu_psize, ssize, local) == -1)
85 old_pte &= ~_PAGE_HPTEFLAGS; 85 old_pte &= ~_PAGE_HPTEFLAGS;
86 } 86 }
87 87
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 77fdd2cef33b..834ca8eb38f2 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -21,6 +21,9 @@
21#include <asm/pgalloc.h> 21#include <asm/pgalloc.h>
22#include <asm/tlb.h> 22#include <asm/tlb.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24#include <asm/hugetlb.h>
25
26#ifdef CONFIG_HUGETLB_PAGE
24 27
25#define PAGE_SHIFT_64K 16 28#define PAGE_SHIFT_64K 16
26#define PAGE_SHIFT_16M 24 29#define PAGE_SHIFT_16M 24
@@ -100,68 +103,9 @@ int pgd_huge(pgd_t pgd)
100} 103}
101#endif 104#endif
102 105
103/*
104 * We have 4 cases for pgds and pmds:
105 * (1) invalid (all zeroes)
106 * (2) pointer to next table, as normal; bottom 6 bits == 0
107 * (3) leaf pte for huge page, bottom two bits != 00
108 * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
109 */
110pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
111{
112 pgd_t *pg;
113 pud_t *pu;
114 pmd_t *pm;
115 pte_t *ret_pte;
116 hugepd_t *hpdp = NULL;
117 unsigned pdshift = PGDIR_SHIFT;
118
119 if (shift)
120 *shift = 0;
121
122 pg = pgdir + pgd_index(ea);
123
124 if (pgd_huge(*pg)) {
125 ret_pte = (pte_t *) pg;
126 goto out;
127 } else if (is_hugepd(pg))
128 hpdp = (hugepd_t *)pg;
129 else if (!pgd_none(*pg)) {
130 pdshift = PUD_SHIFT;
131 pu = pud_offset(pg, ea);
132
133 if (pud_huge(*pu)) {
134 ret_pte = (pte_t *) pu;
135 goto out;
136 } else if (is_hugepd(pu))
137 hpdp = (hugepd_t *)pu;
138 else if (!pud_none(*pu)) {
139 pdshift = PMD_SHIFT;
140 pm = pmd_offset(pu, ea);
141
142 if (pmd_huge(*pm)) {
143 ret_pte = (pte_t *) pm;
144 goto out;
145 } else if (is_hugepd(pm))
146 hpdp = (hugepd_t *)pm;
147 else if (!pmd_none(*pm))
148 return pte_offset_kernel(pm, ea);
149 }
150 }
151 if (!hpdp)
152 return NULL;
153
154 ret_pte = hugepte_offset(hpdp, ea, pdshift);
155 pdshift = hugepd_shift(*hpdp);
156out:
157 if (shift)
158 *shift = pdshift;
159 return ret_pte;
160}
161EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
162
163pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 106pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
164{ 107{
108 /* Only called for hugetlbfs pages, hence can ignore THP */
165 return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); 109 return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
166} 110}
167 111
@@ -357,7 +301,7 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
357int alloc_bootmem_huge_page(struct hstate *hstate) 301int alloc_bootmem_huge_page(struct hstate *hstate)
358{ 302{
359 struct huge_bootmem_page *m; 303 struct huge_bootmem_page *m;
360 int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); 304 int idx = shift_to_mmu_psize(huge_page_shift(hstate));
361 int nr_gpages = gpage_freearray[idx].nr_gpages; 305 int nr_gpages = gpage_freearray[idx].nr_gpages;
362 306
363 if (nr_gpages == 0) 307 if (nr_gpages == 0)
@@ -736,11 +680,14 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
736 struct page *page; 680 struct page *page;
737 unsigned shift; 681 unsigned shift;
738 unsigned long mask; 682 unsigned long mask;
739 683 /*
684 * Transparent hugepages are handled by generic code. We can skip them
685 * here.
686 */
740 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); 687 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
741 688
742 /* Verify it is a huge page else bail. */ 689 /* Verify it is a huge page else bail. */
743 if (!ptep || !shift) 690 if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep))
744 return ERR_PTR(-EINVAL); 691 return ERR_PTR(-EINVAL);
745 692
746 mask = (1UL << shift) - 1; 693 mask = (1UL << shift) - 1;
@@ -759,69 +706,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
759 return NULL; 706 return NULL;
760} 707}
761 708
762int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
763 unsigned long end, int write, struct page **pages, int *nr)
764{
765 unsigned long mask;
766 unsigned long pte_end;
767 struct page *head, *page, *tail;
768 pte_t pte;
769 int refs;
770
771 pte_end = (addr + sz) & ~(sz-1);
772 if (pte_end < end)
773 end = pte_end;
774
775 pte = *ptep;
776 mask = _PAGE_PRESENT | _PAGE_USER;
777 if (write)
778 mask |= _PAGE_RW;
779
780 if ((pte_val(pte) & mask) != mask)
781 return 0;
782
783 /* hugepages are never "special" */
784 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
785
786 refs = 0;
787 head = pte_page(pte);
788
789 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
790 tail = page;
791 do {
792 VM_BUG_ON(compound_head(page) != head);
793 pages[*nr] = page;
794 (*nr)++;
795 page++;
796 refs++;
797 } while (addr += PAGE_SIZE, addr != end);
798
799 if (!page_cache_add_speculative(head, refs)) {
800 *nr -= refs;
801 return 0;
802 }
803
804 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
805 /* Could be optimized better */
806 *nr -= refs;
807 while (refs--)
808 put_page(head);
809 return 0;
810 }
811
812 /*
813 * Any tail page need their mapcount reference taken before we
814 * return.
815 */
816 while (refs--) {
817 if (PageTail(tail))
818 get_huge_page_tail(tail);
819 tail++;
820 }
821
822 return 1;
823}
824
825static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, 709static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
826 unsigned long sz) 710 unsigned long sz)
827{ 711{
@@ -1038,3 +922,168 @@ void flush_dcache_icache_hugepage(struct page *page)
1038 } 922 }
1039 } 923 }
1040} 924}
925
926#endif /* CONFIG_HUGETLB_PAGE */
927
928/*
929 * We have 4 cases for pgds and pmds:
930 * (1) invalid (all zeroes)
931 * (2) pointer to next table, as normal; bottom 6 bits == 0
932 * (3) leaf pte for huge page, bottom two bits != 00
933 * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
934 *
935 * So long as we atomically load page table pointers we are safe against teardown,
936 * we can follow the address down to the the page and take a ref on it.
937 */
938
939pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
940{
941 pgd_t pgd, *pgdp;
942 pud_t pud, *pudp;
943 pmd_t pmd, *pmdp;
944 pte_t *ret_pte;
945 hugepd_t *hpdp = NULL;
946 unsigned pdshift = PGDIR_SHIFT;
947
948 if (shift)
949 *shift = 0;
950
951 pgdp = pgdir + pgd_index(ea);
952 pgd = ACCESS_ONCE(*pgdp);
953 /*
954 * Always operate on the local stack value. This make sure the
955 * value don't get updated by a parallel THP split/collapse,
956 * page fault or a page unmap. The return pte_t * is still not
957 * stable. So should be checked there for above conditions.
958 */
959 if (pgd_none(pgd))
960 return NULL;
961 else if (pgd_huge(pgd)) {
962 ret_pte = (pte_t *) pgdp;
963 goto out;
964 } else if (is_hugepd(&pgd))
965 hpdp = (hugepd_t *)&pgd;
966 else {
967 /*
968 * Even if we end up with an unmap, the pgtable will not
969 * be freed, because we do an rcu free and here we are
970 * irq disabled
971 */
972 pdshift = PUD_SHIFT;
973 pudp = pud_offset(&pgd, ea);
974 pud = ACCESS_ONCE(*pudp);
975
976 if (pud_none(pud))
977 return NULL;
978 else if (pud_huge(pud)) {
979 ret_pte = (pte_t *) pudp;
980 goto out;
981 } else if (is_hugepd(&pud))
982 hpdp = (hugepd_t *)&pud;
983 else {
984 pdshift = PMD_SHIFT;
985 pmdp = pmd_offset(&pud, ea);
986 pmd = ACCESS_ONCE(*pmdp);
987 /*
988 * A hugepage collapse is captured by pmd_none, because
989 * it mark the pmd none and do a hpte invalidate.
990 *
991 * A hugepage split is captured by pmd_trans_splitting
992 * because we mark the pmd trans splitting and do a
993 * hpte invalidate
994 *
995 */
996 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
997 return NULL;
998
999 if (pmd_huge(pmd) || pmd_large(pmd)) {
1000 ret_pte = (pte_t *) pmdp;
1001 goto out;
1002 } else if (is_hugepd(&pmd))
1003 hpdp = (hugepd_t *)&pmd;
1004 else
1005 return pte_offset_kernel(&pmd, ea);
1006 }
1007 }
1008 if (!hpdp)
1009 return NULL;
1010
1011 ret_pte = hugepte_offset(hpdp, ea, pdshift);
1012 pdshift = hugepd_shift(*hpdp);
1013out:
1014 if (shift)
1015 *shift = pdshift;
1016 return ret_pte;
1017}
1018EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
1019
1020int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
1021 unsigned long end, int write, struct page **pages, int *nr)
1022{
1023 unsigned long mask;
1024 unsigned long pte_end;
1025 struct page *head, *page, *tail;
1026 pte_t pte;
1027 int refs;
1028
1029 pte_end = (addr + sz) & ~(sz-1);
1030 if (pte_end < end)
1031 end = pte_end;
1032
1033 pte = ACCESS_ONCE(*ptep);
1034 mask = _PAGE_PRESENT | _PAGE_USER;
1035 if (write)
1036 mask |= _PAGE_RW;
1037
1038 if ((pte_val(pte) & mask) != mask)
1039 return 0;
1040
1041#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1042 /*
1043 * check for splitting here
1044 */
1045 if (pmd_trans_splitting(pte_pmd(pte)))
1046 return 0;
1047#endif
1048
1049 /* hugepages are never "special" */
1050 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
1051
1052 refs = 0;
1053 head = pte_page(pte);
1054
1055 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
1056 tail = page;
1057 do {
1058 VM_BUG_ON(compound_head(page) != head);
1059 pages[*nr] = page;
1060 (*nr)++;
1061 page++;
1062 refs++;
1063 } while (addr += PAGE_SIZE, addr != end);
1064
1065 if (!page_cache_add_speculative(head, refs)) {
1066 *nr -= refs;
1067 return 0;
1068 }
1069
1070 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
1071 /* Could be optimized better */
1072 *nr -= refs;
1073 while (refs--)
1074 put_page(head);
1075 return 0;
1076 }
1077
1078 /*
1079 * Any tail page need their mapcount reference taken before we
1080 * return.
1081 */
1082 while (refs--) {
1083 if (PageTail(tail))
1084 get_huge_page_tail(tail);
1085 tail++;
1086 }
1087
1088 return 1;
1089}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a90b9c458990..d0cd9e4c6837 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -88,7 +88,11 @@ static void pgd_ctor(void *addr)
88 88
89static void pmd_ctor(void *addr) 89static void pmd_ctor(void *addr)
90{ 90{
91#ifdef CONFIG_TRANSPARENT_HUGEPAGE
92 memset(addr, 0, PMD_TABLE_SIZE * 2);
93#else
91 memset(addr, 0, PMD_TABLE_SIZE); 94 memset(addr, 0, PMD_TABLE_SIZE);
95#endif
92} 96}
93 97
94struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; 98struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
@@ -137,10 +141,9 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
137void pgtable_cache_init(void) 141void pgtable_cache_init(void)
138{ 142{
139 pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); 143 pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
140 pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor); 144 pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
141 if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE)) 145 if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
142 panic("Couldn't allocate pgtable caches"); 146 panic("Couldn't allocate pgtable caches");
143
144 /* In all current configs, when the PUD index exists it's the 147 /* In all current configs, when the PUD index exists it's the
145 * same size as either the pgd or pmd index. Verify that the 148 * same size as either the pgd or pmd index. Verify that the
146 * initialization above has also created a PUD cache. This 149 * initialization above has also created a PUD cache. This
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0988a26e0413..7f4bea162026 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -299,47 +299,13 @@ void __init paging_init(void)
299 299
300void __init mem_init(void) 300void __init mem_init(void)
301{ 301{
302#ifdef CONFIG_NEED_MULTIPLE_NODES
303 int nid;
304#endif
305 pg_data_t *pgdat;
306 unsigned long i;
307 struct page *page;
308 unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
309
310#ifdef CONFIG_SWIOTLB 302#ifdef CONFIG_SWIOTLB
311 swiotlb_init(0); 303 swiotlb_init(0);
312#endif 304#endif
313 305
314 num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT;
315 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); 306 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
316 307 set_max_mapnr(max_pfn);
317#ifdef CONFIG_NEED_MULTIPLE_NODES 308 free_all_bootmem();
318 for_each_online_node(nid) {
319 if (NODE_DATA(nid)->node_spanned_pages != 0) {
320 printk("freeing bootmem node %d\n", nid);
321 totalram_pages +=
322 free_all_bootmem_node(NODE_DATA(nid));
323 }
324 }
325#else
326 max_mapnr = max_pfn;
327 totalram_pages += free_all_bootmem();
328#endif
329 for_each_online_pgdat(pgdat) {
330 for (i = 0; i < pgdat->node_spanned_pages; i++) {
331 if (!pfn_valid(pgdat->node_start_pfn + i))
332 continue;
333 page = pgdat_page_nr(pgdat, i);
334 if (PageReserved(page))
335 reservedpages++;
336 }
337 }
338
339 codesize = (unsigned long)&_sdata - (unsigned long)&_stext;
340 datasize = (unsigned long)&_edata - (unsigned long)&_sdata;
341 initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
342 bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
343 309
344#ifdef CONFIG_HIGHMEM 310#ifdef CONFIG_HIGHMEM
345 { 311 {
@@ -349,13 +315,9 @@ void __init mem_init(void)
349 for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { 315 for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
350 phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; 316 phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
351 struct page *page = pfn_to_page(pfn); 317 struct page *page = pfn_to_page(pfn);
352 if (memblock_is_reserved(paddr)) 318 if (!memblock_is_reserved(paddr))
353 continue; 319 free_highmem_page(page);
354 free_highmem_page(page);
355 reservedpages--;
356 } 320 }
357 printk(KERN_DEBUG "High memory: %luk\n",
358 totalhigh_pages << (PAGE_SHIFT-10));
359 } 321 }
360#endif /* CONFIG_HIGHMEM */ 322#endif /* CONFIG_HIGHMEM */
361 323
@@ -368,16 +330,7 @@ void __init mem_init(void)
368 (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; 330 (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
369#endif 331#endif
370 332
371 printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " 333 mem_init_print_info(NULL);
372 "%luk reserved, %luk data, %luk bss, %luk init)\n",
373 nr_free_pages() << (PAGE_SHIFT-10),
374 num_physpages << (PAGE_SHIFT-10),
375 codesize >> 10,
376 reservedpages << (PAGE_SHIFT-10),
377 datasize >> 10,
378 bsssize >> 10,
379 initsize >> 10);
380
381#ifdef CONFIG_PPC32 334#ifdef CONFIG_PPC32
382 pr_info("Kernel virtual memory layout:\n"); 335 pr_info("Kernel virtual memory layout:\n");
383 pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); 336 pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP);
@@ -407,7 +360,7 @@ void free_initmem(void)
407#ifdef CONFIG_BLK_DEV_INITRD 360#ifdef CONFIG_BLK_DEV_INITRD
408void __init free_initrd_mem(unsigned long start, unsigned long end) 361void __init free_initrd_mem(unsigned long start, unsigned long end)
409{ 362{
410 free_reserved_area(start, end, 0, "initrd"); 363 free_reserved_area((void *)start, (void *)end, -1, "initrd");
411} 364}
412#endif 365#endif
413 366
@@ -508,6 +461,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
508 pte_t *ptep) 461 pte_t *ptep)
509{ 462{
510#ifdef CONFIG_PPC_STD_MMU 463#ifdef CONFIG_PPC_STD_MMU
464 /*
465 * We don't need to worry about _PAGE_PRESENT here because we are
466 * called with either mm->page_table_lock held or ptl lock held
467 */
511 unsigned long access = 0, trap; 468 unsigned long access = 0, trap;
512 469
513 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ 470 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap.c
index 67a42ed0d2fc..cb8bdbe4972f 100644
--- a/arch/powerpc/mm/mmap_64.c
+++ b/arch/powerpc/mm/mmap.c
@@ -92,10 +92,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
92 if (mmap_is_legacy()) { 92 if (mmap_is_legacy()) {
93 mm->mmap_base = TASK_UNMAPPED_BASE; 93 mm->mmap_base = TASK_UNMAPPED_BASE;
94 mm->get_unmapped_area = arch_get_unmapped_area; 94 mm->get_unmapped_area = arch_get_unmapped_area;
95 mm->unmap_area = arch_unmap_area;
96 } else { 95 } else {
97 mm->mmap_base = mmap_base(); 96 mm->mmap_base = mmap_base();
98 mm->get_unmapped_area = arch_get_unmapped_area_topdown; 97 mm->get_unmapped_area = arch_get_unmapped_area_topdown;
99 mm->unmap_area = arch_unmap_area_topdown;
100 } 98 }
101} 99}
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index e779642c25e5..af3d78e19302 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -112,8 +112,10 @@ static unsigned int steal_context_smp(unsigned int id)
112 */ 112 */
113 for_each_cpu(cpu, mm_cpumask(mm)) { 113 for_each_cpu(cpu, mm_cpumask(mm)) {
114 for (i = cpu_first_thread_sibling(cpu); 114 for (i = cpu_first_thread_sibling(cpu);
115 i <= cpu_last_thread_sibling(cpu); i++) 115 i <= cpu_last_thread_sibling(cpu); i++) {
116 __set_bit(id, stale_map[i]); 116 if (stale_map[i])
117 __set_bit(id, stale_map[i]);
118 }
117 cpu = i - 1; 119 cpu = i - 1;
118 } 120 }
119 return id; 121 return id;
@@ -272,7 +274,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
272 /* XXX This clear should ultimately be part of local_flush_tlb_mm */ 274 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
273 for (i = cpu_first_thread_sibling(cpu); 275 for (i = cpu_first_thread_sibling(cpu);
274 i <= cpu_last_thread_sibling(cpu); i++) { 276 i <= cpu_last_thread_sibling(cpu); i++) {
275 __clear_bit(id, stale_map[i]); 277 if (stale_map[i])
278 __clear_bit(id, stale_map[i]);
276 } 279 }
277 } 280 }
278 281
@@ -329,8 +332,8 @@ void destroy_context(struct mm_struct *mm)
329 332
330#ifdef CONFIG_SMP 333#ifdef CONFIG_SMP
331 334
332static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, 335static int mmu_context_cpu_notify(struct notifier_block *self,
333 unsigned long action, void *hcpu) 336 unsigned long action, void *hcpu)
334{ 337{
335 unsigned int cpu = (unsigned int)(long)hcpu; 338 unsigned int cpu = (unsigned int)(long)hcpu;
336 339
@@ -363,7 +366,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
363 return NOTIFY_OK; 366 return NOTIFY_OK;
364} 367}
365 368
366static struct notifier_block __cpuinitdata mmu_context_cpu_nb = { 369static struct notifier_block mmu_context_cpu_nb = {
367 .notifier_call = mmu_context_cpu_notify, 370 .notifier_call = mmu_context_cpu_notify,
368}; 371};
369 372
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 88c0425dc0a8..5850798826cd 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -27,6 +27,7 @@
27#include <linux/seq_file.h> 27#include <linux/seq_file.h>
28#include <linux/uaccess.h> 28#include <linux/uaccess.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <asm/cputhreads.h>
30#include <asm/sparsemem.h> 31#include <asm/sparsemem.h>
31#include <asm/prom.h> 32#include <asm/prom.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
@@ -516,7 +517,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
516 * Figure out to which domain a cpu belongs and stick it there. 517 * Figure out to which domain a cpu belongs and stick it there.
517 * Return the id of the domain used. 518 * Return the id of the domain used.
518 */ 519 */
519static int __cpuinit numa_setup_cpu(unsigned long lcpu) 520static int numa_setup_cpu(unsigned long lcpu)
520{ 521{
521 int nid = 0; 522 int nid = 0;
522 struct device_node *cpu = of_get_cpu_node(lcpu, NULL); 523 struct device_node *cpu = of_get_cpu_node(lcpu, NULL);
@@ -538,8 +539,7 @@ out:
538 return nid; 539 return nid;
539} 540}
540 541
541static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, 542static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action,
542 unsigned long action,
543 void *hcpu) 543 void *hcpu)
544{ 544{
545 unsigned long lcpu = (unsigned long)hcpu; 545 unsigned long lcpu = (unsigned long)hcpu;
@@ -919,7 +919,7 @@ static void __init *careful_zallocation(int nid, unsigned long size,
919 return ret; 919 return ret;
920} 920}
921 921
922static struct notifier_block __cpuinitdata ppc64_numa_nb = { 922static struct notifier_block ppc64_numa_nb = {
923 .notifier_call = cpu_numa_callback, 923 .notifier_call = cpu_numa_callback,
924 .priority = 1 /* Must run before sched domains notifier. */ 924 .priority = 1 /* Must run before sched domains notifier. */
925}; 925};
@@ -1319,7 +1319,8 @@ static int update_cpu_associativity_changes_mask(void)
1319 } 1319 }
1320 } 1320 }
1321 if (changed) { 1321 if (changed) {
1322 cpumask_set_cpu(cpu, changes); 1322 cpumask_or(changes, changes, cpu_sibling_mask(cpu));
1323 cpu = cpu_last_thread_sibling(cpu);
1323 } 1324 }
1324 } 1325 }
1325 1326
@@ -1427,17 +1428,15 @@ static int update_cpu_topology(void *data)
1427 if (!data) 1428 if (!data)
1428 return -EINVAL; 1429 return -EINVAL;
1429 1430
1430 cpu = get_cpu(); 1431 cpu = smp_processor_id();
1431 1432
1432 for (update = data; update; update = update->next) { 1433 for (update = data; update; update = update->next) {
1433 if (cpu != update->cpu) 1434 if (cpu != update->cpu)
1434 continue; 1435 continue;
1435 1436
1436 unregister_cpu_under_node(update->cpu, update->old_nid);
1437 unmap_cpu_from_node(update->cpu); 1437 unmap_cpu_from_node(update->cpu);
1438 map_cpu_to_node(update->cpu, update->new_nid); 1438 map_cpu_to_node(update->cpu, update->new_nid);
1439 vdso_getcpu_init(); 1439 vdso_getcpu_init();
1440 register_cpu_under_node(update->cpu, update->new_nid);
1441 } 1440 }
1442 1441
1443 return 0; 1442 return 0;
@@ -1449,12 +1448,12 @@ static int update_cpu_topology(void *data)
1449 */ 1448 */
1450int arch_update_cpu_topology(void) 1449int arch_update_cpu_topology(void)
1451{ 1450{
1452 unsigned int cpu, changed = 0; 1451 unsigned int cpu, sibling, changed = 0;
1453 struct topology_update_data *updates, *ud; 1452 struct topology_update_data *updates, *ud;
1454 unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; 1453 unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
1455 cpumask_t updated_cpus; 1454 cpumask_t updated_cpus;
1456 struct device *dev; 1455 struct device *dev;
1457 int weight, i = 0; 1456 int weight, new_nid, i = 0;
1458 1457
1459 weight = cpumask_weight(&cpu_associativity_changes_mask); 1458 weight = cpumask_weight(&cpu_associativity_changes_mask);
1460 if (!weight) 1459 if (!weight)
@@ -1467,24 +1466,54 @@ int arch_update_cpu_topology(void)
1467 cpumask_clear(&updated_cpus); 1466 cpumask_clear(&updated_cpus);
1468 1467
1469 for_each_cpu(cpu, &cpu_associativity_changes_mask) { 1468 for_each_cpu(cpu, &cpu_associativity_changes_mask) {
1470 ud = &updates[i++]; 1469 /*
1471 ud->cpu = cpu; 1470 * If siblings aren't flagged for changes, updates list
1472 vphn_get_associativity(cpu, associativity); 1471 * will be too short. Skip on this update and set for next
1473 ud->new_nid = associativity_to_nid(associativity); 1472 * update.
1474 1473 */
1475 if (ud->new_nid < 0 || !node_online(ud->new_nid)) 1474 if (!cpumask_subset(cpu_sibling_mask(cpu),
1476 ud->new_nid = first_online_node; 1475 &cpu_associativity_changes_mask)) {
1476 pr_info("Sibling bits not set for associativity "
1477 "change, cpu%d\n", cpu);
1478 cpumask_or(&cpu_associativity_changes_mask,
1479 &cpu_associativity_changes_mask,
1480 cpu_sibling_mask(cpu));
1481 cpu = cpu_last_thread_sibling(cpu);
1482 continue;
1483 }
1477 1484
1478 ud->old_nid = numa_cpu_lookup_table[cpu]; 1485 /* Use associativity from first thread for all siblings */
1479 cpumask_set_cpu(cpu, &updated_cpus); 1486 vphn_get_associativity(cpu, associativity);
1487 new_nid = associativity_to_nid(associativity);
1488 if (new_nid < 0 || !node_online(new_nid))
1489 new_nid = first_online_node;
1490
1491 if (new_nid == numa_cpu_lookup_table[cpu]) {
1492 cpumask_andnot(&cpu_associativity_changes_mask,
1493 &cpu_associativity_changes_mask,
1494 cpu_sibling_mask(cpu));
1495 cpu = cpu_last_thread_sibling(cpu);
1496 continue;
1497 }
1480 1498
1481 if (i < weight) 1499 for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
1482 ud->next = &updates[i]; 1500 ud = &updates[i++];
1501 ud->cpu = sibling;
1502 ud->new_nid = new_nid;
1503 ud->old_nid = numa_cpu_lookup_table[sibling];
1504 cpumask_set_cpu(sibling, &updated_cpus);
1505 if (i < weight)
1506 ud->next = &updates[i];
1507 }
1508 cpu = cpu_last_thread_sibling(cpu);
1483 } 1509 }
1484 1510
1485 stop_machine(update_cpu_topology, &updates[0], &updated_cpus); 1511 stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
1486 1512
1487 for (ud = &updates[0]; ud; ud = ud->next) { 1513 for (ud = &updates[0]; ud; ud = ud->next) {
1514 unregister_cpu_under_node(ud->cpu, ud->old_nid);
1515 register_cpu_under_node(ud->cpu, ud->new_nid);
1516
1488 dev = get_cpu_device(ud->cpu); 1517 dev = get_cpu_device(ud->cpu);
1489 if (dev) 1518 if (dev)
1490 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 1519 kobject_uevent(&dev->kobj, KOBJ_CHANGE);
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 214130a4edc6..edda589795c3 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -235,6 +235,14 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
235 pud = pud_offset(pgd, addr); 235 pud = pud_offset(pgd, addr);
236 BUG_ON(pud_none(*pud)); 236 BUG_ON(pud_none(*pud));
237 pmd = pmd_offset(pud, addr); 237 pmd = pmd_offset(pud, addr);
238 /*
239 * khugepaged to collapse normal pages to hugepage, first set
240 * pmd to none to force page fault/gup to take mmap_sem. After
241 * pmd is set to none, we do a pte_clear which does this assertion
242 * so if we find pmd none, return.
243 */
244 if (pmd_none(*pmd))
245 return;
238 BUG_ON(!pmd_present(*pmd)); 246 BUG_ON(!pmd_present(*pmd));
239 assert_spin_locked(pte_lockptr(mm, pmd)); 247 assert_spin_locked(pte_lockptr(mm, pmd));
240} 248}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index a854096e1023..536eec72c0f7 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -338,6 +338,19 @@ EXPORT_SYMBOL(iounmap);
338EXPORT_SYMBOL(__iounmap); 338EXPORT_SYMBOL(__iounmap);
339EXPORT_SYMBOL(__iounmap_at); 339EXPORT_SYMBOL(__iounmap_at);
340 340
341/*
342 * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
343 * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
344 */
345struct page *pmd_page(pmd_t pmd)
346{
347#ifdef CONFIG_TRANSPARENT_HUGEPAGE
348 if (pmd_trans_huge(pmd))
349 return pfn_to_page(pmd_pfn(pmd));
350#endif
351 return virt_to_page(pmd_page_vaddr(pmd));
352}
353
341#ifdef CONFIG_PPC_64K_PAGES 354#ifdef CONFIG_PPC_64K_PAGES
342static pte_t *get_from_cache(struct mm_struct *mm) 355static pte_t *get_from_cache(struct mm_struct *mm)
343{ 356{
@@ -455,3 +468,404 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
455} 468}
456#endif 469#endif
457#endif /* CONFIG_PPC_64K_PAGES */ 470#endif /* CONFIG_PPC_64K_PAGES */
471
472#ifdef CONFIG_TRANSPARENT_HUGEPAGE
473
474/*
475 * This is called when relaxing access to a hugepage. It's also called in the page
476 * fault path when we don't hit any of the major fault cases, ie, a minor
477 * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
478 * handled those two for us, we additionally deal with missing execute
479 * permission here on some processors
480 */
481int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
482 pmd_t *pmdp, pmd_t entry, int dirty)
483{
484 int changed;
485#ifdef CONFIG_DEBUG_VM
486 WARN_ON(!pmd_trans_huge(*pmdp));
487 assert_spin_locked(&vma->vm_mm->page_table_lock);
488#endif
489 changed = !pmd_same(*(pmdp), entry);
490 if (changed) {
491 __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
492 /*
493 * Since we are not supporting SW TLB systems, we don't
494 * have any thing similar to flush_tlb_page_nohash()
495 */
496 }
497 return changed;
498}
499
500unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
501 pmd_t *pmdp, unsigned long clr)
502{
503
504 unsigned long old, tmp;
505
506#ifdef CONFIG_DEBUG_VM
507 WARN_ON(!pmd_trans_huge(*pmdp));
508 assert_spin_locked(&mm->page_table_lock);
509#endif
510
511#ifdef PTE_ATOMIC_UPDATES
512 __asm__ __volatile__(
513 "1: ldarx %0,0,%3\n\
514 andi. %1,%0,%6\n\
515 bne- 1b \n\
516 andc %1,%0,%4 \n\
517 stdcx. %1,0,%3 \n\
518 bne- 1b"
519 : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
520 : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY)
521 : "cc" );
522#else
523 old = pmd_val(*pmdp);
524 *pmdp = __pmd(old & ~clr);
525#endif
526 if (old & _PAGE_HASHPTE)
527 hpte_do_hugepage_flush(mm, addr, pmdp);
528 return old;
529}
530
531pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
532 pmd_t *pmdp)
533{
534 pmd_t pmd;
535
536 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
537 if (pmd_trans_huge(*pmdp)) {
538 pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp);
539 } else {
540 /*
541 * khugepaged calls this for normal pmd
542 */
543 pmd = *pmdp;
544 pmd_clear(pmdp);
545 /*
546 * Wait for all pending hash_page to finish. This is needed
547 * in case of subpage collapse. When we collapse normal pages
548 * to hugepage, we first clear the pmd, then invalidate all
549 * the PTE entries. The assumption here is that any low level
550 * page fault will see a none pmd and take the slow path that
551 * will wait on mmap_sem. But we could very well be in a
552 * hash_page with local ptep pointer value. Such a hash page
553 * can result in adding new HPTE entries for normal subpages.
554 * That means we could be modifying the page content as we
555 * copy them to a huge page. So wait for parallel hash_page
556 * to finish before invalidating HPTE entries. We can do this
557 * by sending an IPI to all the cpus and executing a dummy
558 * function there.
559 */
560 kick_all_cpus_sync();
561 /*
562 * Now invalidate the hpte entries in the range
563 * covered by pmd. This make sure we take a
564 * fault and will find the pmd as none, which will
565 * result in a major fault which takes mmap_sem and
566 * hence wait for collapse to complete. Without this
567 * the __collapse_huge_page_copy can result in copying
568 * the old content.
569 */
570 flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
571 }
572 return pmd;
573}
574
575int pmdp_test_and_clear_young(struct vm_area_struct *vma,
576 unsigned long address, pmd_t *pmdp)
577{
578 return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
579}
580
581/*
582 * We currently remove entries from the hashtable regardless of whether
583 * the entry was young or dirty. The generic routines only flush if the
584 * entry was young or dirty which is not good enough.
585 *
586 * We should be more intelligent about this but for the moment we override
587 * these functions and force a tlb flush unconditionally
588 */
589int pmdp_clear_flush_young(struct vm_area_struct *vma,
590 unsigned long address, pmd_t *pmdp)
591{
592 return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
593}
594
595/*
596 * We mark the pmd splitting and invalidate all the hpte
597 * entries for this hugepage.
598 */
599void pmdp_splitting_flush(struct vm_area_struct *vma,
600 unsigned long address, pmd_t *pmdp)
601{
602 unsigned long old, tmp;
603
604 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
605
606#ifdef CONFIG_DEBUG_VM
607 WARN_ON(!pmd_trans_huge(*pmdp));
608 assert_spin_locked(&vma->vm_mm->page_table_lock);
609#endif
610
611#ifdef PTE_ATOMIC_UPDATES
612
613 __asm__ __volatile__(
614 "1: ldarx %0,0,%3\n\
615 andi. %1,%0,%6\n\
616 bne- 1b \n\
617 ori %1,%0,%4 \n\
618 stdcx. %1,0,%3 \n\
619 bne- 1b"
620 : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
621 : "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY)
622 : "cc" );
623#else
624 old = pmd_val(*pmdp);
625 *pmdp = __pmd(old | _PAGE_SPLITTING);
626#endif
627 /*
628 * If we didn't had the splitting flag set, go and flush the
629 * HPTE entries.
630 */
631 if (!(old & _PAGE_SPLITTING)) {
632 /* We need to flush the hpte */
633 if (old & _PAGE_HASHPTE)
634 hpte_do_hugepage_flush(vma->vm_mm, address, pmdp);
635 }
636}
637
638/*
639 * We want to put the pgtable in pmd and use pgtable for tracking
640 * the base page size hptes
641 */
642void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
643 pgtable_t pgtable)
644{
645 pgtable_t *pgtable_slot;
646 assert_spin_locked(&mm->page_table_lock);
647 /*
648 * we store the pgtable in the second half of PMD
649 */
650 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
651 *pgtable_slot = pgtable;
652 /*
653 * expose the deposited pgtable to other cpus.
654 * before we set the hugepage PTE at pmd level
655 * hash fault code looks at the deposted pgtable
656 * to store hash index values.
657 */
658 smp_wmb();
659}
660
661pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
662{
663 pgtable_t pgtable;
664 pgtable_t *pgtable_slot;
665
666 assert_spin_locked(&mm->page_table_lock);
667 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
668 pgtable = *pgtable_slot;
669 /*
670 * Once we withdraw, mark the entry NULL.
671 */
672 *pgtable_slot = NULL;
673 /*
674 * We store HPTE information in the deposited PTE fragment.
675 * zero out the content on withdraw.
676 */
677 memset(pgtable, 0, PTE_FRAG_SIZE);
678 return pgtable;
679}
680
681/*
682 * set a new huge pmd. We should not be called for updating
683 * an existing pmd entry. That should go via pmd_hugepage_update.
684 */
685void set_pmd_at(struct mm_struct *mm, unsigned long addr,
686 pmd_t *pmdp, pmd_t pmd)
687{
688#ifdef CONFIG_DEBUG_VM
689 WARN_ON(!pmd_none(*pmdp));
690 assert_spin_locked(&mm->page_table_lock);
691 WARN_ON(!pmd_trans_huge(pmd));
692#endif
693 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
694}
695
696void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
697 pmd_t *pmdp)
698{
699 pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT);
700}
701
702/*
703 * A linux hugepage PMD was changed and the corresponding hash table entries
704 * neesd to be flushed.
705 */
706void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
707 pmd_t *pmdp)
708{
709 int ssize, i;
710 unsigned long s_addr;
711 int max_hpte_count;
712 unsigned int psize, valid;
713 unsigned char *hpte_slot_array;
714 unsigned long hidx, vpn, vsid, hash, shift, slot;
715
716 /*
717 * Flush all the hptes mapping this hugepage
718 */
719 s_addr = addr & HPAGE_PMD_MASK;
720 hpte_slot_array = get_hpte_slot_array(pmdp);
721 /*
722 * IF we try to do a HUGE PTE update after a withdraw is done.
723 * we will find the below NULL. This happens when we do
724 * split_huge_page_pmd
725 */
726 if (!hpte_slot_array)
727 return;
728
729 /* get the base page size */
730 psize = get_slice_psize(mm, s_addr);
731
732 if (ppc_md.hugepage_invalidate)
733 return ppc_md.hugepage_invalidate(mm, hpte_slot_array,
734 s_addr, psize);
735 /*
736 * No bluk hpte removal support, invalidate each entry
737 */
738 shift = mmu_psize_defs[psize].shift;
739 max_hpte_count = HPAGE_PMD_SIZE >> shift;
740 for (i = 0; i < max_hpte_count; i++) {
741 /*
742 * 8 bits per each hpte entries
743 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
744 */
745 valid = hpte_valid(hpte_slot_array, i);
746 if (!valid)
747 continue;
748 hidx = hpte_hash_index(hpte_slot_array, i);
749
750 /* get the vpn */
751 addr = s_addr + (i * (1ul << shift));
752 if (!is_kernel_addr(addr)) {
753 ssize = user_segment_size(addr);
754 vsid = get_vsid(mm->context.id, addr, ssize);
755 WARN_ON(vsid == 0);
756 } else {
757 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
758 ssize = mmu_kernel_ssize;
759 }
760
761 vpn = hpt_vpn(addr, vsid, ssize);
762 hash = hpt_hash(vpn, shift, ssize);
763 if (hidx & _PTEIDX_SECONDARY)
764 hash = ~hash;
765
766 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
767 slot += hidx & _PTEIDX_GROUP_IX;
768 ppc_md.hpte_invalidate(slot, vpn, psize,
769 MMU_PAGE_16M, ssize, 0);
770 }
771}
772
773static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
774{
775 pmd_val(pmd) |= pgprot_val(pgprot);
776 return pmd;
777}
778
779pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
780{
781 pmd_t pmd;
782 /*
783 * For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always
784 * set. We use this to check THP page at pmd level.
785 * leaf pte for huge page, bottom two bits != 00
786 */
787 pmd_val(pmd) = pfn << PTE_RPN_SHIFT;
788 pmd_val(pmd) |= _PAGE_THP_HUGE;
789 pmd = pmd_set_protbits(pmd, pgprot);
790 return pmd;
791}
792
793pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
794{
795 return pfn_pmd(page_to_pfn(page), pgprot);
796}
797
798pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
799{
800
801 pmd_val(pmd) &= _HPAGE_CHG_MASK;
802 pmd = pmd_set_protbits(pmd, newprot);
803 return pmd;
804}
805
806/*
807 * This is called at the end of handling a user page fault, when the
808 * fault has been handled by updating a HUGE PMD entry in the linux page tables.
809 * We use it to preload an HPTE into the hash table corresponding to
810 * the updated linux HUGE PMD entry.
811 */
812void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
813 pmd_t *pmd)
814{
815 return;
816}
817
818pmd_t pmdp_get_and_clear(struct mm_struct *mm,
819 unsigned long addr, pmd_t *pmdp)
820{
821 pmd_t old_pmd;
822 pgtable_t pgtable;
823 unsigned long old;
824 pgtable_t *pgtable_slot;
825
826 old = pmd_hugepage_update(mm, addr, pmdp, ~0UL);
827 old_pmd = __pmd(old);
828 /*
829 * We have pmd == none and we are holding page_table_lock.
830 * So we can safely go and clear the pgtable hash
831 * index info.
832 */
833 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
834 pgtable = *pgtable_slot;
835 /*
836 * Let's zero out old valid and hash index details
837 * hash fault look at them.
838 */
839 memset(pgtable, 0, PTE_FRAG_SIZE);
840 return old_pmd;
841}
842
843int has_transparent_hugepage(void)
844{
845 if (!mmu_has_feature(MMU_FTR_16M_PAGE))
846 return 0;
847 /*
848 * We support THP only if PMD_SIZE is 16MB.
849 */
850 if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
851 return 0;
852 /*
853 * We need to make sure that we support 16MB hugepage in a segement
854 * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
855 * of 64K.
856 */
857 /*
858 * If we have 64K HPTE, we will be using that by default
859 */
860 if (mmu_psize_defs[MMU_PAGE_64K].shift &&
861 (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
862 return 0;
863 /*
864 * Ok we only have 4K HPTE
865 */
866 if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
867 return 0;
868
869 return 1;
870}
871#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 7c415ddde948..aa74acb0fdfc 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -130,6 +130,53 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len)
130 up_write(&mm->mmap_sem); 130 up_write(&mm->mmap_sem);
131} 131}
132 132
133#ifdef CONFIG_TRANSPARENT_HUGEPAGE
134static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
135 unsigned long end, struct mm_walk *walk)
136{
137 struct vm_area_struct *vma = walk->private;
138 split_huge_page_pmd(vma, addr, pmd);
139 return 0;
140}
141
142static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
143 unsigned long len)
144{
145 struct vm_area_struct *vma;
146 struct mm_walk subpage_proto_walk = {
147 .mm = mm,
148 .pmd_entry = subpage_walk_pmd_entry,
149 };
150
151 /*
152 * We don't try too hard, we just mark all the vma in that range
153 * VM_NOHUGEPAGE and split them.
154 */
155 vma = find_vma(mm, addr);
156 /*
157 * If the range is in unmapped range, just return
158 */
159 if (vma && ((addr + len) <= vma->vm_start))
160 return;
161
162 while (vma) {
163 if (vma->vm_start >= (addr + len))
164 break;
165 vma->vm_flags |= VM_NOHUGEPAGE;
166 subpage_proto_walk.private = vma;
167 walk_page_range(vma->vm_start, vma->vm_end,
168 &subpage_proto_walk);
169 vma = vma->vm_next;
170 }
171}
172#else
173static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
174 unsigned long len)
175{
176 return;
177}
178#endif
179
133/* 180/*
134 * Copy in a subpage protection map for an address range. 181 * Copy in a subpage protection map for an address range.
135 * The map has 2 bits per 4k subpage, so 32 bits per 64k page. 182 * The map has 2 bits per 4k subpage, so 32 bits per 64k page.
@@ -168,6 +215,7 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
168 return -EFAULT; 215 return -EFAULT;
169 216
170 down_write(&mm->mmap_sem); 217 down_write(&mm->mmap_sem);
218 subpage_mark_vma_nohuge(mm, addr, len);
171 for (limit = addr + len; addr < limit; addr = next) { 219 for (limit = addr + len; addr < limit; addr = next) {
172 next = pmd_addr_end(addr, limit); 220 next = pmd_addr_end(addr, limit);
173 err = -ENOMEM; 221 err = -ENOMEM;
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 023ec8a13f38..36e44b4260eb 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -183,12 +183,13 @@ void tlb_flush(struct mmu_gather *tlb)
183 * since 64K pages may overlap with other bridges when using 64K pages 183 * since 64K pages may overlap with other bridges when using 64K pages
184 * with 4K HW pages on IO space. 184 * with 4K HW pages on IO space.
185 * 185 *
186 * Because of that usage pattern, it's only available with CONFIG_HOTPLUG 186 * Because of that usage pattern, it is implemented for small size rather
187 * and is implemented for small size rather than speed. 187 * than speed.
188 */ 188 */
189void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, 189void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
190 unsigned long end) 190 unsigned long end)
191{ 191{
192 int hugepage_shift;
192 unsigned long flags; 193 unsigned long flags;
193 194
194 start = _ALIGN_DOWN(start, PAGE_SIZE); 195 start = _ALIGN_DOWN(start, PAGE_SIZE);
@@ -206,7 +207,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
206 local_irq_save(flags); 207 local_irq_save(flags);
207 arch_enter_lazy_mmu_mode(); 208 arch_enter_lazy_mmu_mode();
208 for (; start < end; start += PAGE_SIZE) { 209 for (; start < end; start += PAGE_SIZE) {
209 pte_t *ptep = find_linux_pte(mm->pgd, start); 210 pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start,
211 &hugepage_shift);
210 unsigned long pte; 212 unsigned long pte;
211 213
212 if (ptep == NULL) 214 if (ptep == NULL)
@@ -214,7 +216,37 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
214 pte = pte_val(*ptep); 216 pte = pte_val(*ptep);
215 if (!(pte & _PAGE_HASHPTE)) 217 if (!(pte & _PAGE_HASHPTE))
216 continue; 218 continue;
217 hpte_need_flush(mm, start, ptep, pte, 0); 219 if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte)))
220 hpte_do_hugepage_flush(mm, start, (pmd_t *)pte);
221 else
222 hpte_need_flush(mm, start, ptep, pte, 0);
223 }
224 arch_leave_lazy_mmu_mode();
225 local_irq_restore(flags);
226}
227
228void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
229{
230 pte_t *pte;
231 pte_t *start_pte;
232 unsigned long flags;
233
234 addr = _ALIGN_DOWN(addr, PMD_SIZE);
235 /* Note: Normally, we should only ever use a batch within a
236 * PTE locked section. This violates the rule, but will work
237 * since we don't actually modify the PTEs, we just flush the
238 * hash while leaving the PTEs intact (including their reference
239 * to being hashed). This is not the most performance oriented
240 * way to do things but is fine for our needs here.
241 */
242 local_irq_save(flags);
243 arch_enter_lazy_mmu_mode();
244 start_pte = pte_offset_map(pmd, addr);
245 for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
246 unsigned long pteval = pte_val(*pte);
247 if (pteval & _PAGE_HASHPTE)
248 hpte_need_flush(mm, addr, pte, pteval, 0);
249 addr += PAGE_SIZE;
218 } 250 }
219 arch_leave_lazy_mmu_mode(); 251 arch_leave_lazy_mmu_mode();
220 local_irq_restore(flags); 252 local_irq_restore(flags);
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 6888cad5103d..41cd68dee681 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -648,7 +648,7 @@ void __init early_init_mmu(void)
648 __early_init_mmu(1); 648 __early_init_mmu(1);
649} 649}
650 650
651void __cpuinit early_init_mmu_secondary(void) 651void early_init_mmu_secondary(void)
652{ 652{
653 __early_init_mmu(0); 653 __early_init_mmu(0);
654} 654}
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index c427ae36374a..bf56e33f8257 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -650,8 +650,7 @@ void bpf_jit_compile(struct sk_filter *fp)
650 650
651 proglen = cgctx.idx * 4; 651 proglen = cgctx.idx * 4;
652 alloclen = proglen + FUNCTION_DESCR_SIZE; 652 alloclen = proglen + FUNCTION_DESCR_SIZE;
653 image = module_alloc(max_t(unsigned int, alloclen, 653 image = module_alloc(alloclen);
654 sizeof(struct work_struct)));
655 if (!image) 654 if (!image)
656 goto out; 655 goto out;
657 656
@@ -688,20 +687,8 @@ out:
688 return; 687 return;
689} 688}
690 689
691static void jit_free_defer(struct work_struct *arg)
692{
693 module_free(NULL, arg);
694}
695
696/* run from softirq, we must use a work_struct to call
697 * module_free() from process context
698 */
699void bpf_jit_free(struct sk_filter *fp) 690void bpf_jit_free(struct sk_filter *fp)
700{ 691{
701 if (fp->bpf_func != sk_run_filter) { 692 if (fp->bpf_func != sk_run_filter)
702 struct work_struct *work = (struct work_struct *)fp->bpf_func; 693 module_free(NULL, fp->bpf_func);
703
704 INIT_WORK(work, jit_free_defer);
705 schedule_work(work);
706 }
707} 694}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 29c6482890c8..eeae308cf982 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -75,6 +75,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
75 75
76#define MMCR0_FCHV 0 76#define MMCR0_FCHV 0
77#define MMCR0_PMCjCE MMCR0_PMCnCE 77#define MMCR0_PMCjCE MMCR0_PMCnCE
78#define MMCR0_FC56 0
79#define MMCR0_PMAO 0
80#define MMCR0_EBE 0
81#define MMCR0_PMCC 0
82#define MMCR0_PMCC_U6 0
78 83
79#define SPRN_MMCRA SPRN_MMCR2 84#define SPRN_MMCRA SPRN_MMCR2
80#define MMCRA_SAMPLE_ENABLE 0 85#define MMCRA_SAMPLE_ENABLE 0
@@ -102,6 +107,15 @@ static inline int siar_valid(struct pt_regs *regs)
102 return 1; 107 return 1;
103} 108}
104 109
110static bool is_ebb_event(struct perf_event *event) { return false; }
111static int ebb_event_check(struct perf_event *event) { return 0; }
112static void ebb_event_add(struct perf_event *event) { }
113static void ebb_switch_out(unsigned long mmcr0) { }
114static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)
115{
116 return mmcr0;
117}
118
105static inline void power_pmu_bhrb_enable(struct perf_event *event) {} 119static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
106static inline void power_pmu_bhrb_disable(struct perf_event *event) {} 120static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
107void power_pmu_flush_branch_stack(void) {} 121void power_pmu_flush_branch_stack(void) {}
@@ -462,6 +476,89 @@ void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
462 return; 476 return;
463} 477}
464 478
479static bool is_ebb_event(struct perf_event *event)
480{
481 /*
482 * This could be a per-PMU callback, but we'd rather avoid the cost. We
483 * check that the PMU supports EBB, meaning those that don't can still
484 * use bit 63 of the event code for something else if they wish.
485 */
486 return (ppmu->flags & PPMU_EBB) &&
487 ((event->attr.config >> PERF_EVENT_CONFIG_EBB_SHIFT) & 1);
488}
489
490static int ebb_event_check(struct perf_event *event)
491{
492 struct perf_event *leader = event->group_leader;
493
494 /* Event and group leader must agree on EBB */
495 if (is_ebb_event(leader) != is_ebb_event(event))
496 return -EINVAL;
497
498 if (is_ebb_event(event)) {
499 if (!(event->attach_state & PERF_ATTACH_TASK))
500 return -EINVAL;
501
502 if (!leader->attr.pinned || !leader->attr.exclusive)
503 return -EINVAL;
504
505 if (event->attr.inherit || event->attr.sample_period ||
506 event->attr.enable_on_exec || event->attr.freq)
507 return -EINVAL;
508 }
509
510 return 0;
511}
512
513static void ebb_event_add(struct perf_event *event)
514{
515 if (!is_ebb_event(event) || current->thread.used_ebb)
516 return;
517
518 /*
519 * IFF this is the first time we've added an EBB event, set
520 * PMXE in the user MMCR0 so we can detect when it's cleared by
521 * userspace. We need this so that we can context switch while
522 * userspace is in the EBB handler (where PMXE is 0).
523 */
524 current->thread.used_ebb = 1;
525 current->thread.mmcr0 |= MMCR0_PMXE;
526}
527
528static void ebb_switch_out(unsigned long mmcr0)
529{
530 if (!(mmcr0 & MMCR0_EBE))
531 return;
532
533 current->thread.siar = mfspr(SPRN_SIAR);
534 current->thread.sier = mfspr(SPRN_SIER);
535 current->thread.sdar = mfspr(SPRN_SDAR);
536 current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK;
537 current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK;
538}
539
540static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)
541{
542 if (!ebb)
543 goto out;
544
545 /* Enable EBB and read/write to all 6 PMCs for userspace */
546 mmcr0 |= MMCR0_EBE | MMCR0_PMCC_U6;
547
548 /* Add any bits from the user reg, FC or PMAO */
549 mmcr0 |= current->thread.mmcr0;
550
551 /* Be careful not to set PMXE if userspace had it cleared */
552 if (!(current->thread.mmcr0 & MMCR0_PMXE))
553 mmcr0 &= ~MMCR0_PMXE;
554
555 mtspr(SPRN_SIAR, current->thread.siar);
556 mtspr(SPRN_SIER, current->thread.sier);
557 mtspr(SPRN_SDAR, current->thread.sdar);
558 mtspr(SPRN_MMCR2, current->thread.mmcr2);
559out:
560 return mmcr0;
561}
465#endif /* CONFIG_PPC64 */ 562#endif /* CONFIG_PPC64 */
466 563
467static void perf_event_interrupt(struct pt_regs *regs); 564static void perf_event_interrupt(struct pt_regs *regs);
@@ -732,6 +829,13 @@ static void power_pmu_read(struct perf_event *event)
732 829
733 if (!event->hw.idx) 830 if (!event->hw.idx)
734 return; 831 return;
832
833 if (is_ebb_event(event)) {
834 val = read_pmc(event->hw.idx);
835 local64_set(&event->hw.prev_count, val);
836 return;
837 }
838
735 /* 839 /*
736 * Performance monitor interrupts come even when interrupts 840 * Performance monitor interrupts come even when interrupts
737 * are soft-disabled, as long as interrupts are hard-enabled. 841 * are soft-disabled, as long as interrupts are hard-enabled.
@@ -852,7 +956,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
852static void power_pmu_disable(struct pmu *pmu) 956static void power_pmu_disable(struct pmu *pmu)
853{ 957{
854 struct cpu_hw_events *cpuhw; 958 struct cpu_hw_events *cpuhw;
855 unsigned long flags; 959 unsigned long flags, mmcr0, val;
856 960
857 if (!ppmu) 961 if (!ppmu)
858 return; 962 return;
@@ -860,9 +964,6 @@ static void power_pmu_disable(struct pmu *pmu)
860 cpuhw = &__get_cpu_var(cpu_hw_events); 964 cpuhw = &__get_cpu_var(cpu_hw_events);
861 965
862 if (!cpuhw->disabled) { 966 if (!cpuhw->disabled) {
863 cpuhw->disabled = 1;
864 cpuhw->n_added = 0;
865
866 /* 967 /*
867 * Check if we ever enabled the PMU on this cpu. 968 * Check if we ever enabled the PMU on this cpu.
868 */ 969 */
@@ -872,6 +973,21 @@ static void power_pmu_disable(struct pmu *pmu)
872 } 973 }
873 974
874 /* 975 /*
976 * Set the 'freeze counters' bit, clear EBE/PMCC/PMAO/FC56.
977 */
978 val = mmcr0 = mfspr(SPRN_MMCR0);
979 val |= MMCR0_FC;
980 val &= ~(MMCR0_EBE | MMCR0_PMCC | MMCR0_PMAO | MMCR0_FC56);
981
982 /*
983 * The barrier is to make sure the mtspr has been
984 * executed and the PMU has frozen the events etc.
985 * before we return.
986 */
987 write_mmcr0(cpuhw, val);
988 mb();
989
990 /*
875 * Disable instruction sampling if it was enabled 991 * Disable instruction sampling if it was enabled
876 */ 992 */
877 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { 993 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
@@ -880,15 +996,12 @@ static void power_pmu_disable(struct pmu *pmu)
880 mb(); 996 mb();
881 } 997 }
882 998
883 /* 999 cpuhw->disabled = 1;
884 * Set the 'freeze counters' bit. 1000 cpuhw->n_added = 0;
885 * The barrier is to make sure the mtspr has been 1001
886 * executed and the PMU has frozen the events 1002 ebb_switch_out(mmcr0);
887 * before we return.
888 */
889 write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
890 mb();
891 } 1003 }
1004
892 local_irq_restore(flags); 1005 local_irq_restore(flags);
893} 1006}
894 1007
@@ -903,23 +1016,36 @@ static void power_pmu_enable(struct pmu *pmu)
903 struct cpu_hw_events *cpuhw; 1016 struct cpu_hw_events *cpuhw;
904 unsigned long flags; 1017 unsigned long flags;
905 long i; 1018 long i;
906 unsigned long val; 1019 unsigned long val, mmcr0;
907 s64 left; 1020 s64 left;
908 unsigned int hwc_index[MAX_HWEVENTS]; 1021 unsigned int hwc_index[MAX_HWEVENTS];
909 int n_lim; 1022 int n_lim;
910 int idx; 1023 int idx;
1024 bool ebb;
911 1025
912 if (!ppmu) 1026 if (!ppmu)
913 return; 1027 return;
914 local_irq_save(flags); 1028 local_irq_save(flags);
1029
915 cpuhw = &__get_cpu_var(cpu_hw_events); 1030 cpuhw = &__get_cpu_var(cpu_hw_events);
916 if (!cpuhw->disabled) { 1031 if (!cpuhw->disabled)
917 local_irq_restore(flags); 1032 goto out;
918 return; 1033
1034 if (cpuhw->n_events == 0) {
1035 ppc_set_pmu_inuse(0);
1036 goto out;
919 } 1037 }
1038
920 cpuhw->disabled = 0; 1039 cpuhw->disabled = 0;
921 1040
922 /* 1041 /*
1042 * EBB requires an exclusive group and all events must have the EBB
1043 * flag set, or not set, so we can just check a single event. Also we
1044 * know we have at least one event.
1045 */
1046 ebb = is_ebb_event(cpuhw->event[0]);
1047
1048 /*
923 * If we didn't change anything, or only removed events, 1049 * If we didn't change anything, or only removed events,
924 * no need to recalculate MMCR* settings and reset the PMCs. 1050 * no need to recalculate MMCR* settings and reset the PMCs.
925 * Just reenable the PMU with the current MMCR* settings 1051 * Just reenable the PMU with the current MMCR* settings
@@ -928,8 +1054,6 @@ static void power_pmu_enable(struct pmu *pmu)
928 if (!cpuhw->n_added) { 1054 if (!cpuhw->n_added) {
929 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); 1055 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
930 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); 1056 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
931 if (cpuhw->n_events == 0)
932 ppc_set_pmu_inuse(0);
933 goto out_enable; 1057 goto out_enable;
934 } 1058 }
935 1059
@@ -996,25 +1120,34 @@ static void power_pmu_enable(struct pmu *pmu)
996 ++n_lim; 1120 ++n_lim;
997 continue; 1121 continue;
998 } 1122 }
999 val = 0; 1123
1000 if (event->hw.sample_period) { 1124 if (ebb)
1001 left = local64_read(&event->hw.period_left); 1125 val = local64_read(&event->hw.prev_count);
1002 if (left < 0x80000000L) 1126 else {
1003 val = 0x80000000L - left; 1127 val = 0;
1128 if (event->hw.sample_period) {
1129 left = local64_read(&event->hw.period_left);
1130 if (left < 0x80000000L)
1131 val = 0x80000000L - left;
1132 }
1133 local64_set(&event->hw.prev_count, val);
1004 } 1134 }
1005 local64_set(&event->hw.prev_count, val); 1135
1006 event->hw.idx = idx; 1136 event->hw.idx = idx;
1007 if (event->hw.state & PERF_HES_STOPPED) 1137 if (event->hw.state & PERF_HES_STOPPED)
1008 val = 0; 1138 val = 0;
1009 write_pmc(idx, val); 1139 write_pmc(idx, val);
1140
1010 perf_event_update_userpage(event); 1141 perf_event_update_userpage(event);
1011 } 1142 }
1012 cpuhw->n_limited = n_lim; 1143 cpuhw->n_limited = n_lim;
1013 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; 1144 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
1014 1145
1015 out_enable: 1146 out_enable:
1147 mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]);
1148
1016 mb(); 1149 mb();
1017 write_mmcr0(cpuhw, cpuhw->mmcr[0]); 1150 write_mmcr0(cpuhw, mmcr0);
1018 1151
1019 /* 1152 /*
1020 * Enable instruction sampling if necessary 1153 * Enable instruction sampling if necessary
@@ -1112,13 +1245,18 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
1112 event->hw.config = cpuhw->events[n0]; 1245 event->hw.config = cpuhw->events[n0];
1113 1246
1114nocheck: 1247nocheck:
1248 ebb_event_add(event);
1249
1115 ++cpuhw->n_events; 1250 ++cpuhw->n_events;
1116 ++cpuhw->n_added; 1251 ++cpuhw->n_added;
1117 1252
1118 ret = 0; 1253 ret = 0;
1119 out: 1254 out:
1120 if (has_branch_stack(event)) 1255 if (has_branch_stack(event)) {
1121 power_pmu_bhrb_enable(event); 1256 power_pmu_bhrb_enable(event);
1257 cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
1258 event->attr.branch_sample_type);
1259 }
1122 1260
1123 perf_pmu_enable(event->pmu); 1261 perf_pmu_enable(event->pmu);
1124 local_irq_restore(flags); 1262 local_irq_restore(flags);
@@ -1472,6 +1610,11 @@ static int power_pmu_event_init(struct perf_event *event)
1472 } 1610 }
1473 } 1611 }
1474 1612
1613 /* Extra checks for EBB */
1614 err = ebb_event_check(event);
1615 if (err)
1616 return err;
1617
1475 /* 1618 /*
1476 * If this is in a group, check if it can go on with all the 1619 * If this is in a group, check if it can go on with all the
1477 * other hardware events in the group. We assume the event 1620 * other hardware events in the group. We assume the event
@@ -1511,6 +1654,13 @@ static int power_pmu_event_init(struct perf_event *event)
1511 local64_set(&event->hw.period_left, event->hw.last_period); 1654 local64_set(&event->hw.period_left, event->hw.last_period);
1512 1655
1513 /* 1656 /*
1657 * For EBB events we just context switch the PMC value, we don't do any
1658 * of the sample_period logic. We use hw.prev_count for this.
1659 */
1660 if (is_ebb_event(event))
1661 local64_set(&event->hw.prev_count, 0);
1662
1663 /*
1514 * See if we need to reserve the PMU. 1664 * See if we need to reserve the PMU.
1515 * If no events are currently in use, then we have to take a 1665 * If no events are currently in use, then we have to take a
1516 * mutex to ensure that we don't race with another task doing 1666 * mutex to ensure that we don't race with another task doing
@@ -1786,7 +1936,7 @@ static void power_pmu_setup(int cpu)
1786 cpuhw->mmcr[0] = MMCR0_FC; 1936 cpuhw->mmcr[0] = MMCR0_FC;
1787} 1937}
1788 1938
1789static int __cpuinit 1939static int
1790power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) 1940power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1791{ 1941{
1792 unsigned int cpu = (long)hcpu; 1942 unsigned int cpu = (long)hcpu;
@@ -1803,7 +1953,7 @@ power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu
1803 return NOTIFY_OK; 1953 return NOTIFY_OK;
1804} 1954}
1805 1955
1806int __cpuinit register_power_pmu(struct power_pmu *pmu) 1956int register_power_pmu(struct power_pmu *pmu)
1807{ 1957{
1808 if (ppmu) 1958 if (ppmu)
1809 return -EBUSY; /* something's already registered */ 1959 return -EBUSY; /* something's already registered */
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 3c475d6267c7..d1821b8bbc4c 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -60,7 +60,30 @@
60#define PME_PM_LD_REF_L1 0xc880 60#define PME_PM_LD_REF_L1 0xc880
61#define PME_PM_LD_MISS_L1 0x400f0 61#define PME_PM_LD_MISS_L1 0x400f0
62#define PME_PM_BRU_FIN 0x10068 62#define PME_PM_BRU_FIN 0x10068
63#define PME_PM_BRU_MPRED 0x400f6 63#define PME_PM_BR_MPRED 0x400f6
64
65#define PME_PM_CMPLU_STALL_FXU 0x20014
66#define PME_PM_CMPLU_STALL_DIV 0x40014
67#define PME_PM_CMPLU_STALL_SCALAR 0x40012
68#define PME_PM_CMPLU_STALL_SCALAR_LONG 0x20018
69#define PME_PM_CMPLU_STALL_VECTOR 0x2001c
70#define PME_PM_CMPLU_STALL_VECTOR_LONG 0x4004a
71#define PME_PM_CMPLU_STALL_LSU 0x20012
72#define PME_PM_CMPLU_STALL_REJECT 0x40016
73#define PME_PM_CMPLU_STALL_ERAT_MISS 0x40018
74#define PME_PM_CMPLU_STALL_DCACHE_MISS 0x20016
75#define PME_PM_CMPLU_STALL_STORE 0x2004a
76#define PME_PM_CMPLU_STALL_THRD 0x1001c
77#define PME_PM_CMPLU_STALL_IFU 0x4004c
78#define PME_PM_CMPLU_STALL_BRU 0x4004e
79#define PME_PM_GCT_NOSLOT_IC_MISS 0x2001a
80#define PME_PM_GCT_NOSLOT_BR_MPRED 0x4001a
81#define PME_PM_GCT_NOSLOT_BR_MPRED_IC_MISS 0x4001c
82#define PME_PM_GRP_CMPL 0x30004
83#define PME_PM_1PLUS_PPC_CMPL 0x100f2
84#define PME_PM_CMPLU_STALL_DFU 0x2003c
85#define PME_PM_RUN_CYC 0x200f4
86#define PME_PM_RUN_INST_CMPL 0x400fa
64 87
65/* 88/*
66 * Layout of constraint bits: 89 * Layout of constraint bits:
@@ -326,7 +349,7 @@ static int power7_generic_events[] = {
326 [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1, 349 [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1,
327 [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1, 350 [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1,
328 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN, 351 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN,
329 [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BRU_MPRED, 352 [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BR_MPRED,
330}; 353};
331 354
332#define C(x) PERF_COUNT_HW_CACHE_##x 355#define C(x) PERF_COUNT_HW_CACHE_##x
@@ -382,7 +405,7 @@ GENERIC_EVENT_ATTR(instructions, INST_CMPL);
382GENERIC_EVENT_ATTR(cache-references, LD_REF_L1); 405GENERIC_EVENT_ATTR(cache-references, LD_REF_L1);
383GENERIC_EVENT_ATTR(cache-misses, LD_MISS_L1); 406GENERIC_EVENT_ATTR(cache-misses, LD_MISS_L1);
384GENERIC_EVENT_ATTR(branch-instructions, BRU_FIN); 407GENERIC_EVENT_ATTR(branch-instructions, BRU_FIN);
385GENERIC_EVENT_ATTR(branch-misses, BRU_MPRED); 408GENERIC_EVENT_ATTR(branch-misses, BR_MPRED);
386 409
387POWER_EVENT_ATTR(CYC, CYC); 410POWER_EVENT_ATTR(CYC, CYC);
388POWER_EVENT_ATTR(GCT_NOSLOT_CYC, GCT_NOSLOT_CYC); 411POWER_EVENT_ATTR(GCT_NOSLOT_CYC, GCT_NOSLOT_CYC);
@@ -391,7 +414,32 @@ POWER_EVENT_ATTR(INST_CMPL, INST_CMPL);
391POWER_EVENT_ATTR(LD_REF_L1, LD_REF_L1); 414POWER_EVENT_ATTR(LD_REF_L1, LD_REF_L1);
392POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1); 415POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1);
393POWER_EVENT_ATTR(BRU_FIN, BRU_FIN) 416POWER_EVENT_ATTR(BRU_FIN, BRU_FIN)
394POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED); 417POWER_EVENT_ATTR(BR_MPRED, BR_MPRED);
418
419POWER_EVENT_ATTR(CMPLU_STALL_FXU, CMPLU_STALL_FXU);
420POWER_EVENT_ATTR(CMPLU_STALL_DIV, CMPLU_STALL_DIV);
421POWER_EVENT_ATTR(CMPLU_STALL_SCALAR, CMPLU_STALL_SCALAR);
422POWER_EVENT_ATTR(CMPLU_STALL_SCALAR_LONG, CMPLU_STALL_SCALAR_LONG);
423POWER_EVENT_ATTR(CMPLU_STALL_VECTOR, CMPLU_STALL_VECTOR);
424POWER_EVENT_ATTR(CMPLU_STALL_VECTOR_LONG, CMPLU_STALL_VECTOR_LONG);
425POWER_EVENT_ATTR(CMPLU_STALL_LSU, CMPLU_STALL_LSU);
426POWER_EVENT_ATTR(CMPLU_STALL_REJECT, CMPLU_STALL_REJECT);
427
428POWER_EVENT_ATTR(CMPLU_STALL_ERAT_MISS, CMPLU_STALL_ERAT_MISS);
429POWER_EVENT_ATTR(CMPLU_STALL_DCACHE_MISS, CMPLU_STALL_DCACHE_MISS);
430POWER_EVENT_ATTR(CMPLU_STALL_STORE, CMPLU_STALL_STORE);
431POWER_EVENT_ATTR(CMPLU_STALL_THRD, CMPLU_STALL_THRD);
432POWER_EVENT_ATTR(CMPLU_STALL_IFU, CMPLU_STALL_IFU);
433POWER_EVENT_ATTR(CMPLU_STALL_BRU, CMPLU_STALL_BRU);
434POWER_EVENT_ATTR(GCT_NOSLOT_IC_MISS, GCT_NOSLOT_IC_MISS);
435
436POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED, GCT_NOSLOT_BR_MPRED);
437POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED_IC_MISS, GCT_NOSLOT_BR_MPRED_IC_MISS);
438POWER_EVENT_ATTR(GRP_CMPL, GRP_CMPL);
439POWER_EVENT_ATTR(1PLUS_PPC_CMPL, 1PLUS_PPC_CMPL);
440POWER_EVENT_ATTR(CMPLU_STALL_DFU, CMPLU_STALL_DFU);
441POWER_EVENT_ATTR(RUN_CYC, RUN_CYC);
442POWER_EVENT_ATTR(RUN_INST_CMPL, RUN_INST_CMPL);
395 443
396static struct attribute *power7_events_attr[] = { 444static struct attribute *power7_events_attr[] = {
397 GENERIC_EVENT_PTR(CYC), 445 GENERIC_EVENT_PTR(CYC),
@@ -401,7 +449,7 @@ static struct attribute *power7_events_attr[] = {
401 GENERIC_EVENT_PTR(LD_REF_L1), 449 GENERIC_EVENT_PTR(LD_REF_L1),
402 GENERIC_EVENT_PTR(LD_MISS_L1), 450 GENERIC_EVENT_PTR(LD_MISS_L1),
403 GENERIC_EVENT_PTR(BRU_FIN), 451 GENERIC_EVENT_PTR(BRU_FIN),
404 GENERIC_EVENT_PTR(BRU_MPRED), 452 GENERIC_EVENT_PTR(BR_MPRED),
405 453
406 POWER_EVENT_PTR(CYC), 454 POWER_EVENT_PTR(CYC),
407 POWER_EVENT_PTR(GCT_NOSLOT_CYC), 455 POWER_EVENT_PTR(GCT_NOSLOT_CYC),
@@ -410,7 +458,32 @@ static struct attribute *power7_events_attr[] = {
410 POWER_EVENT_PTR(LD_REF_L1), 458 POWER_EVENT_PTR(LD_REF_L1),
411 POWER_EVENT_PTR(LD_MISS_L1), 459 POWER_EVENT_PTR(LD_MISS_L1),
412 POWER_EVENT_PTR(BRU_FIN), 460 POWER_EVENT_PTR(BRU_FIN),
413 POWER_EVENT_PTR(BRU_MPRED), 461 POWER_EVENT_PTR(BR_MPRED),
462
463 POWER_EVENT_PTR(CMPLU_STALL_FXU),
464 POWER_EVENT_PTR(CMPLU_STALL_DIV),
465 POWER_EVENT_PTR(CMPLU_STALL_SCALAR),
466 POWER_EVENT_PTR(CMPLU_STALL_SCALAR_LONG),
467 POWER_EVENT_PTR(CMPLU_STALL_VECTOR),
468 POWER_EVENT_PTR(CMPLU_STALL_VECTOR_LONG),
469 POWER_EVENT_PTR(CMPLU_STALL_LSU),
470 POWER_EVENT_PTR(CMPLU_STALL_REJECT),
471
472 POWER_EVENT_PTR(CMPLU_STALL_ERAT_MISS),
473 POWER_EVENT_PTR(CMPLU_STALL_DCACHE_MISS),
474 POWER_EVENT_PTR(CMPLU_STALL_STORE),
475 POWER_EVENT_PTR(CMPLU_STALL_THRD),
476 POWER_EVENT_PTR(CMPLU_STALL_IFU),
477 POWER_EVENT_PTR(CMPLU_STALL_BRU),
478 POWER_EVENT_PTR(GCT_NOSLOT_IC_MISS),
479 POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED),
480
481 POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED_IC_MISS),
482 POWER_EVENT_PTR(GRP_CMPL),
483 POWER_EVENT_PTR(1PLUS_PPC_CMPL),
484 POWER_EVENT_PTR(CMPLU_STALL_DFU),
485 POWER_EVENT_PTR(RUN_CYC),
486 POWER_EVENT_PTR(RUN_INST_CMPL),
414 NULL 487 NULL
415}; 488};
416 489
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index f7d1c4fff303..2ee4a707f0df 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -31,9 +31,9 @@
31 * 31 *
32 * 60 56 52 48 44 40 36 32 32 * 60 56 52 48 44 40 36 32
33 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | 33 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
34 * [ thresh_cmp ] [ thresh_ctl ] 34 * | [ thresh_cmp ] [ thresh_ctl ]
35 * | 35 * | |
36 * thresh start/stop OR FAB match -* 36 * *- EBB (Linux) thresh start/stop OR FAB match -*
37 * 37 *
38 * 28 24 20 16 12 8 4 0 38 * 28 24 20 16 12 8 4 0
39 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | 39 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
@@ -85,6 +85,7 @@
85 * 85 *
86 */ 86 */
87 87
88#define EVENT_EBB_MASK 1ull
88#define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */ 89#define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */
89#define EVENT_THR_CMP_MASK 0x3ff 90#define EVENT_THR_CMP_MASK 0x3ff
90#define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */ 91#define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */
@@ -109,6 +110,17 @@
109#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) 110#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
110#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */ 111#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */
111 112
113#define EVENT_VALID_MASK \
114 ((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
115 (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
116 (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
117 (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
118 (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
119 (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \
120 (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
121 (EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT) | \
122 EVENT_PSEL_MASK)
123
112/* MMCRA IFM bits - POWER8 */ 124/* MMCRA IFM bits - POWER8 */
113#define POWER8_MMCRA_IFM1 0x0000000040000000UL 125#define POWER8_MMCRA_IFM1 0x0000000040000000UL
114#define POWER8_MMCRA_IFM2 0x0000000080000000UL 126#define POWER8_MMCRA_IFM2 0x0000000080000000UL
@@ -130,10 +142,10 @@
130 * 142 *
131 * 28 24 20 16 12 8 4 0 143 * 28 24 20 16 12 8 4 0
132 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | 144 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
133 * [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] 145 * | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1]
134 * | | 146 * EBB -* | |
135 * L1 I/D qualifier -* | Count of events for each PMC. 147 * | | Count of events for each PMC.
136 * | p1, p2, p3, p4, p5, p6. 148 * L1 I/D qualifier -* | p1, p2, p3, p4, p5, p6.
137 * nc - number of counters -* 149 * nc - number of counters -*
138 * 150 *
139 * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints 151 * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
@@ -149,6 +161,9 @@
149#define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32) 161#define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32)
150#define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK) 162#define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK)
151 163
164#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24)
165#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK)
166
152#define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22) 167#define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22)
153#define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3) 168#define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3)
154 169
@@ -207,14 +222,21 @@ static inline bool event_is_fab_match(u64 event)
207 222
208static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) 223static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
209{ 224{
210 unsigned int unit, pmc, cache; 225 unsigned int unit, pmc, cache, ebb;
211 unsigned long mask, value; 226 unsigned long mask, value;
212 227
213 mask = value = 0; 228 mask = value = 0;
214 229
215 pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; 230 if (event & ~EVENT_VALID_MASK)
216 unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; 231 return -1;
217 cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; 232
233 pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
234 unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
235 cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK;
236 ebb = (event >> PERF_EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK;
237
238 /* Clear the EBB bit in the event, so event checks work below */
239 event &= ~(EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT);
218 240
219 if (pmc) { 241 if (pmc) {
220 if (pmc > 6) 242 if (pmc > 6)
@@ -284,6 +306,18 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long
284 value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); 306 value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
285 } 307 }
286 308
309 if (!pmc && ebb)
310 /* EBB events must specify the PMC */
311 return -1;
312
313 /*
314 * All events must agree on EBB, either all request it or none.
315 * EBB events are pinned & exclusive, so this should never actually
316 * hit, but we leave it as a fallback in case.
317 */
318 mask |= CNST_EBB_VAL(ebb);
319 value |= CNST_EBB_MASK;
320
287 *maskp = mask; 321 *maskp = mask;
288 *valp = value; 322 *valp = value;
289 323
@@ -378,6 +412,10 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
378 if (pmc_inuse & 0x7c) 412 if (pmc_inuse & 0x7c)
379 mmcr[0] |= MMCR0_PMCjCE; 413 mmcr[0] |= MMCR0_PMCjCE;
380 414
415 /* If we're not using PMC 5 or 6, freeze them */
416 if (!(pmc_inuse & 0x60))
417 mmcr[0] |= MMCR0_FC56;
418
381 mmcr[1] = mmcr1; 419 mmcr[1] = mmcr1;
382 mmcr[2] = mmcra; 420 mmcr[2] = mmcra;
383 421
@@ -523,18 +561,13 @@ static int power8_generic_events[] = {
523static u64 power8_bhrb_filter_map(u64 branch_sample_type) 561static u64 power8_bhrb_filter_map(u64 branch_sample_type)
524{ 562{
525 u64 pmu_bhrb_filter = 0; 563 u64 pmu_bhrb_filter = 0;
526 u64 br_privilege = branch_sample_type & ONLY_PLM;
527 564
528 /* BHRB and regular PMU events share the same prvillege state 565 /* BHRB and regular PMU events share the same privilege state
529 * filter configuration. BHRB is always recorded along with a 566 * filter configuration. BHRB is always recorded along with a
530 * regular PMU event. So privilege state filter criteria for BHRB 567 * regular PMU event. As the privilege state filter is handled
531 * and the companion PMU events has to be the same. As a default 568 * in the basic PMC configuration of the accompanying regular
532 * "perf record" tool sets all privillege bits ON when no filter 569 * PMU event, we ignore any separate BHRB specific request.
533 * criteria is provided in the command line. So as along as all
534 * privillege bits are ON or they are OFF, we are good to go.
535 */ 570 */
536 if ((br_privilege != 7) && (br_privilege != 0))
537 return -1;
538 571
539 /* No branch filter requested */ 572 /* No branch filter requested */
540 if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) 573 if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
@@ -574,7 +607,7 @@ static struct power_pmu power8_pmu = {
574 .get_constraint = power8_get_constraint, 607 .get_constraint = power8_get_constraint,
575 .get_alternatives = power8_get_alternatives, 608 .get_alternatives = power8_get_alternatives,
576 .disable_pmc = power8_disable_pmc, 609 .disable_pmc = power8_disable_pmc,
577 .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB, 610 .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB,
578 .n_generic = ARRAY_SIZE(power8_generic_events), 611 .n_generic = ARRAY_SIZE(power8_generic_events),
579 .generic_events = power8_generic_events, 612 .generic_events = power8_generic_events,
580 .attr_groups = power8_pmu_attr_groups, 613 .attr_groups = power8_pmu_attr_groups,
@@ -583,10 +616,19 @@ static struct power_pmu power8_pmu = {
583 616
584static int __init init_power8_pmu(void) 617static int __init init_power8_pmu(void)
585{ 618{
619 int rc;
620
586 if (!cur_cpu_spec->oprofile_cpu_type || 621 if (!cur_cpu_spec->oprofile_cpu_type ||
587 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8")) 622 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
588 return -ENODEV; 623 return -ENODEV;
589 624
590 return register_power_pmu(&power8_pmu); 625 rc = register_power_pmu(&power8_pmu);
626 if (rc)
627 return rc;
628
629 /* Tell userspace that EBB is supported */
630 cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
631
632 return 0;
591} 633}
592early_initcall(init_power8_pmu); 634early_initcall(init_power8_pmu);
diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/currituck.c
index ecd3890c40d7..7f1b71a01c6a 100644
--- a/arch/powerpc/platforms/44x/currituck.c
+++ b/arch/powerpc/platforms/44x/currituck.c
@@ -91,12 +91,12 @@ static void __init ppc47x_init_irq(void)
91} 91}
92 92
93#ifdef CONFIG_SMP 93#ifdef CONFIG_SMP
94static void __cpuinit smp_ppc47x_setup_cpu(int cpu) 94static void smp_ppc47x_setup_cpu(int cpu)
95{ 95{
96 mpic_setup_this_cpu(); 96 mpic_setup_this_cpu();
97} 97}
98 98
99static int __cpuinit smp_ppc47x_kick_cpu(int cpu) 99static int smp_ppc47x_kick_cpu(int cpu)
100{ 100{
101 struct device_node *cpunode = of_get_cpu_node(cpu, NULL); 101 struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
102 const u64 *spin_table_addr_prop; 102 const u64 *spin_table_addr_prop;
@@ -176,13 +176,48 @@ static int __init ppc47x_probe(void)
176 return 1; 176 return 1;
177} 177}
178 178
179static int board_rev = -1;
180static int __init ppc47x_get_board_rev(void)
181{
182 u8 fpga_reg0;
183 void *fpga;
184 struct device_node *np;
185
186 np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga");
187 if (!np)
188 goto fail;
189
190 fpga = of_iomap(np, 0);
191 of_node_put(np);
192 if (!fpga)
193 goto fail;
194
195 fpga_reg0 = ioread8(fpga);
196 board_rev = fpga_reg0 & 0x03;
197 pr_info("%s: Found board revision %d\n", __func__, board_rev);
198 iounmap(fpga);
199 return 0;
200
201fail:
202 pr_info("%s: Unable to find board revision\n", __func__);
203 return 0;
204}
205machine_arch_initcall(ppc47x, ppc47x_get_board_rev);
206
179/* Use USB controller should have been hardware swizzled but it wasn't :( */ 207/* Use USB controller should have been hardware swizzled but it wasn't :( */
180static void ppc47x_pci_irq_fixup(struct pci_dev *dev) 208static void ppc47x_pci_irq_fixup(struct pci_dev *dev)
181{ 209{
182 if (dev->vendor == 0x1033 && (dev->device == 0x0035 || 210 if (dev->vendor == 0x1033 && (dev->device == 0x0035 ||
183 dev->device == 0x00e0)) { 211 dev->device == 0x00e0)) {
184 dev->irq = irq_create_mapping(NULL, 47); 212 if (board_rev == 0) {
185 pr_info("%s: Mapping irq 47 %d\n", __func__, dev->irq); 213 dev->irq = irq_create_mapping(NULL, 47);
214 pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
215 } else if (board_rev == 2) {
216 dev->irq = irq_create_mapping(NULL, 49);
217 pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
218 } else {
219 pr_alert("%s: Unknown board revision\n", __func__);
220 }
186 } 221 }
187} 222}
188 223
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
index a28a8629727e..4241bc825800 100644
--- a/arch/powerpc/platforms/44x/iss4xx.c
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -81,12 +81,12 @@ static void __init iss4xx_init_irq(void)
81} 81}
82 82
83#ifdef CONFIG_SMP 83#ifdef CONFIG_SMP
84static void __cpuinit smp_iss4xx_setup_cpu(int cpu) 84static void smp_iss4xx_setup_cpu(int cpu)
85{ 85{
86 mpic_setup_this_cpu(); 86 mpic_setup_this_cpu();
87} 87}
88 88
89static int __cpuinit smp_iss4xx_kick_cpu(int cpu) 89static int smp_iss4xx_kick_cpu(int cpu)
90{ 90{
91 struct device_node *cpunode = of_get_cpu_node(cpu, NULL); 91 struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
92 const u64 *spin_table_addr_prop; 92 const u64 *spin_table_addr_prop;
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
index 0a134e0469ef..3e90ece10ae9 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -43,9 +43,7 @@ static void __init mpc5121_ads_setup_arch(void)
43 mpc83xx_add_bridge(np); 43 mpc83xx_add_bridge(np);
44#endif 44#endif
45 45
46#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) 46 mpc512x_setup_arch();
47 mpc512x_setup_diu();
48#endif
49} 47}
50 48
51static void __init mpc5121_ads_init_IRQ(void) 49static void __init mpc5121_ads_init_IRQ(void)
@@ -69,7 +67,7 @@ define_machine(mpc5121_ads) {
69 .probe = mpc5121_ads_probe, 67 .probe = mpc5121_ads_probe,
70 .setup_arch = mpc5121_ads_setup_arch, 68 .setup_arch = mpc5121_ads_setup_arch,
71 .init = mpc512x_init, 69 .init = mpc512x_init,
72 .init_early = mpc512x_init_diu, 70 .init_early = mpc512x_init_early,
73 .init_IRQ = mpc5121_ads_init_IRQ, 71 .init_IRQ = mpc5121_ads_init_IRQ,
74 .get_irq = ipic_get_irq, 72 .get_irq = ipic_get_irq,
75 .calibrate_decr = generic_calibrate_decr, 73 .calibrate_decr = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
index 0a8e60023944..cc97f022d028 100644
--- a/arch/powerpc/platforms/512x/mpc512x.h
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -12,18 +12,12 @@
12#ifndef __MPC512X_H__ 12#ifndef __MPC512X_H__
13#define __MPC512X_H__ 13#define __MPC512X_H__
14extern void __init mpc512x_init_IRQ(void); 14extern void __init mpc512x_init_IRQ(void);
15extern void __init mpc512x_init_early(void);
15extern void __init mpc512x_init(void); 16extern void __init mpc512x_init(void);
17extern void __init mpc512x_setup_arch(void);
16extern int __init mpc5121_clk_init(void); 18extern int __init mpc5121_clk_init(void);
17void __init mpc512x_declare_of_platform_devices(void);
18extern const char *mpc512x_select_psc_compat(void); 19extern const char *mpc512x_select_psc_compat(void);
20extern const char *mpc512x_select_reset_compat(void);
19extern void mpc512x_restart(char *cmd); 21extern void mpc512x_restart(char *cmd);
20 22
21#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
22void mpc512x_init_diu(void);
23void mpc512x_setup_diu(void);
24#else
25#define mpc512x_init_diu NULL
26#define mpc512x_setup_diu NULL
27#endif
28
29#endif /* __MPC512X_H__ */ 23#endif /* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c
index 5fb919b30924..ce71408781a0 100644
--- a/arch/powerpc/platforms/512x/mpc512x_generic.c
+++ b/arch/powerpc/platforms/512x/mpc512x_generic.c
@@ -45,8 +45,8 @@ define_machine(mpc512x_generic) {
45 .name = "MPC512x generic", 45 .name = "MPC512x generic",
46 .probe = mpc512x_generic_probe, 46 .probe = mpc512x_generic_probe,
47 .init = mpc512x_init, 47 .init = mpc512x_init,
48 .init_early = mpc512x_init_diu, 48 .init_early = mpc512x_init_early,
49 .setup_arch = mpc512x_setup_diu, 49 .setup_arch = mpc512x_setup_arch,
50 .init_IRQ = mpc512x_init_IRQ, 50 .init_IRQ = mpc512x_init_IRQ,
51 .get_irq = ipic_get_irq, 51 .get_irq = ipic_get_irq,
52 .calibrate_decr = generic_calibrate_decr, 52 .calibrate_decr = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 6eb94ab99d39..a82a41b4fd91 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -35,8 +35,10 @@ static struct mpc512x_reset_module __iomem *reset_module_base;
35static void __init mpc512x_restart_init(void) 35static void __init mpc512x_restart_init(void)
36{ 36{
37 struct device_node *np; 37 struct device_node *np;
38 const char *reset_compat;
38 39
39 np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-reset"); 40 reset_compat = mpc512x_select_reset_compat();
41 np = of_find_compatible_node(NULL, NULL, reset_compat);
40 if (!np) 42 if (!np)
41 return; 43 return;
42 44
@@ -58,7 +60,7 @@ void mpc512x_restart(char *cmd)
58 ; 60 ;
59} 61}
60 62
61#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) 63#if IS_ENABLED(CONFIG_FB_FSL_DIU)
62 64
63struct fsl_diu_shared_fb { 65struct fsl_diu_shared_fb {
64 u8 gamma[0x300]; /* 32-bit aligned! */ 66 u8 gamma[0x300]; /* 32-bit aligned! */
@@ -355,6 +357,17 @@ const char *mpc512x_select_psc_compat(void)
355 return NULL; 357 return NULL;
356} 358}
357 359
360const char *mpc512x_select_reset_compat(void)
361{
362 if (of_machine_is_compatible("fsl,mpc5121"))
363 return "fsl,mpc5121-reset";
364
365 if (of_machine_is_compatible("fsl,mpc5125"))
366 return "fsl,mpc5125-reset";
367
368 return NULL;
369}
370
358static unsigned int __init get_fifo_size(struct device_node *np, 371static unsigned int __init get_fifo_size(struct device_node *np,
359 char *prop_name) 372 char *prop_name)
360{ 373{
@@ -436,14 +449,26 @@ void __init mpc512x_psc_fifo_init(void)
436 } 449 }
437} 450}
438 451
452void __init mpc512x_init_early(void)
453{
454 mpc512x_restart_init();
455 if (IS_ENABLED(CONFIG_FB_FSL_DIU))
456 mpc512x_init_diu();
457}
458
439void __init mpc512x_init(void) 459void __init mpc512x_init(void)
440{ 460{
441 mpc5121_clk_init(); 461 mpc5121_clk_init();
442 mpc512x_declare_of_platform_devices(); 462 mpc512x_declare_of_platform_devices();
443 mpc512x_restart_init();
444 mpc512x_psc_fifo_init(); 463 mpc512x_psc_fifo_init();
445} 464}
446 465
466void __init mpc512x_setup_arch(void)
467{
468 if (IS_ENABLED(CONFIG_FB_FSL_DIU))
469 mpc512x_setup_diu();
470}
471
447/** 472/**
448 * mpc512x_cs_config - Setup chip select configuration 473 * mpc512x_cs_config - Setup chip select configuration
449 * @cs: chip select number 474 * @cs: chip select number
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
index 0575e858291c..24b314d7bd5f 100644
--- a/arch/powerpc/platforms/512x/pdm360ng.c
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -119,9 +119,9 @@ static int __init pdm360ng_probe(void)
119define_machine(pdm360ng) { 119define_machine(pdm360ng) {
120 .name = "PDM360NG", 120 .name = "PDM360NG",
121 .probe = pdm360ng_probe, 121 .probe = pdm360ng_probe,
122 .setup_arch = mpc512x_setup_diu, 122 .setup_arch = mpc512x_setup_arch,
123 .init = pdm360ng_init, 123 .init = pdm360ng_init,
124 .init_early = mpc512x_init_diu, 124 .init_early = mpc512x_init_early,
125 .init_IRQ = mpc512x_init_IRQ, 125 .init_IRQ = mpc512x_init_IRQ,
126 .get_irq = ipic_get_irq, 126 .get_irq = ipic_get_irq,
127 .calibrate_decr = generic_calibrate_decr, 127 .calibrate_decr = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 624cb51d19c9..7bc315822935 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -231,17 +231,7 @@ static struct i2c_driver mcu_driver = {
231 .id_table = mcu_ids, 231 .id_table = mcu_ids,
232}; 232};
233 233
234static int __init mcu_init(void) 234module_i2c_driver(mcu_driver);
235{
236 return i2c_add_driver(&mcu_driver);
237}
238module_init(mcu_init);
239
240static void __exit mcu_exit(void)
241{
242 i2c_del_driver(&mcu_driver);
243}
244module_exit(mcu_exit);
245 235
246MODULE_DESCRIPTION("Power Management and GPIO expander driver for " 236MODULE_DESCRIPTION("Power Management and GPIO expander driver for "
247 "MPC8349E-mITX-compatible MCU"); 237 "MPC8349E-mITX-compatible MCU");
diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c
index 753a42c29d4d..39cfa4044e6c 100644
--- a/arch/powerpc/platforms/85xx/p5020_ds.c
+++ b/arch/powerpc/platforms/85xx/p5020_ds.c
@@ -75,12 +75,7 @@ define_machine(p5020_ds) {
75#ifdef CONFIG_PCI 75#ifdef CONFIG_PCI
76 .pcibios_fixup_bus = fsl_pcibios_fixup_bus, 76 .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
77#endif 77#endif
78/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
79#ifdef CONFIG_PPC64
80 .get_irq = mpic_get_irq,
81#else
82 .get_irq = mpic_get_coreint_irq, 78 .get_irq = mpic_get_coreint_irq,
83#endif
84 .restart = fsl_rstcr_restart, 79 .restart = fsl_rstcr_restart,
85 .calibrate_decr = generic_calibrate_decr, 80 .calibrate_decr = generic_calibrate_decr,
86 .progress = udbg_progress, 81 .progress = udbg_progress,
diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c
index 11381851828e..f70e74cddf97 100644
--- a/arch/powerpc/platforms/85xx/p5040_ds.c
+++ b/arch/powerpc/platforms/85xx/p5040_ds.c
@@ -66,12 +66,7 @@ define_machine(p5040_ds) {
66#ifdef CONFIG_PCI 66#ifdef CONFIG_PCI
67 .pcibios_fixup_bus = fsl_pcibios_fixup_bus, 67 .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
68#endif 68#endif
69/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
70#ifdef CONFIG_PPC64
71 .get_irq = mpic_get_irq,
72#else
73 .get_irq = mpic_get_coreint_irq, 69 .get_irq = mpic_get_coreint_irq,
74#endif
75 .restart = fsl_rstcr_restart, 70 .restart = fsl_rstcr_restart,
76 .calibrate_decr = generic_calibrate_decr, 71 .calibrate_decr = generic_calibrate_decr,
77 .progress = udbg_progress, 72 .progress = udbg_progress,
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 6a1759939c6b..5ced4f5bb2b2 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -99,7 +99,7 @@ static void mpc85xx_take_timebase(void)
99} 99}
100 100
101#ifdef CONFIG_HOTPLUG_CPU 101#ifdef CONFIG_HOTPLUG_CPU
102static void __cpuinit smp_85xx_mach_cpu_die(void) 102static void smp_85xx_mach_cpu_die(void)
103{ 103{
104 unsigned int cpu = smp_processor_id(); 104 unsigned int cpu = smp_processor_id();
105 u32 tmp; 105 u32 tmp;
@@ -141,7 +141,7 @@ static inline u32 read_spin_table_addr_l(void *spin_table)
141 return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l); 141 return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l);
142} 142}
143 143
144static int __cpuinit smp_85xx_kick_cpu(int nr) 144static int smp_85xx_kick_cpu(int nr)
145{ 145{
146 unsigned long flags; 146 unsigned long flags;
147 const u64 *cpu_rel_addr; 147 const u64 *cpu_rel_addr;
@@ -362,7 +362,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
362} 362}
363#endif /* CONFIG_KEXEC */ 363#endif /* CONFIG_KEXEC */
364 364
365static void __cpuinit smp_85xx_setup_cpu(int cpu_nr) 365static void smp_85xx_setup_cpu(int cpu_nr)
366{ 366{
367 if (smp_85xx_ops.probe == smp_mpic_probe) 367 if (smp_85xx_ops.probe == smp_mpic_probe)
368 mpic_setup_this_cpu(); 368 mpic_setup_this_cpu();
diff --git a/arch/powerpc/platforms/85xx/t4240_qds.c b/arch/powerpc/platforms/85xx/t4240_qds.c
index 5998e9f33304..91ead6b1b8af 100644
--- a/arch/powerpc/platforms/85xx/t4240_qds.c
+++ b/arch/powerpc/platforms/85xx/t4240_qds.c
@@ -75,12 +75,7 @@ define_machine(t4240_qds) {
75#ifdef CONFIG_PCI 75#ifdef CONFIG_PCI
76 .pcibios_fixup_bus = fsl_pcibios_fixup_bus, 76 .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
77#endif 77#endif
78/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
79#ifdef CONFIG_PPC64
80 .get_irq = mpic_get_irq,
81#else
82 .get_irq = mpic_get_coreint_irq, 78 .get_irq = mpic_get_coreint_irq,
83#endif
84 .restart = fsl_rstcr_restart, 79 .restart = fsl_rstcr_restart,
85 .calibrate_decr = generic_calibrate_decr, 80 .calibrate_decr = generic_calibrate_decr,
86 .progress = udbg_progress, 81 .progress = udbg_progress,
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 1e121088826f..587a2828b06c 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(int irq, void *dev)
43 43
44static struct irqaction tbint_irqaction = { 44static struct irqaction tbint_irqaction = {
45 .handler = timebase_interrupt, 45 .handler = timebase_interrupt,
46 .flags = IRQF_NO_THREAD,
46 .name = "tbint", 47 .name = "tbint",
47}; 48};
48 49
@@ -218,19 +219,12 @@ void mpc8xx_restart(char *cmd)
218 219
219static void cpm_cascade(unsigned int irq, struct irq_desc *desc) 220static void cpm_cascade(unsigned int irq, struct irq_desc *desc)
220{ 221{
221 struct irq_chip *chip; 222 struct irq_chip *chip = irq_desc_get_chip(desc);
222 int cascade_irq; 223 int cascade_irq = cpm_get_irq();
223
224 if ((cascade_irq = cpm_get_irq()) >= 0) {
225 struct irq_desc *cdesc = irq_to_desc(cascade_irq);
226 224
225 if (cascade_irq >= 0)
227 generic_handle_irq(cascade_irq); 226 generic_handle_irq(cascade_irq);
228 227
229 chip = irq_desc_get_chip(cdesc);
230 chip->irq_eoi(&cdesc->irq_data);
231 }
232
233 chip = irq_desc_get_chip(desc);
234 chip->irq_eoi(&desc->irq_data); 228 chip->irq_eoi(&desc->irq_data);
235} 229}
236 230
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index b62aab3e22ec..d703775bda30 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -86,6 +86,27 @@ config MPIC
86 bool 86 bool
87 default n 87 default n
88 88
89config MPIC_TIMER
90 bool "MPIC Global Timer"
91 depends on MPIC && FSL_SOC
92 default n
93 help
94 The MPIC global timer is a hardware timer inside the
95 Freescale PIC complying with OpenPIC standard. When the
96 specified interval times out, the hardware timer generates
97 an interrupt. The driver currently is only tested on fsl
98 chip, but it can potentially support other global timers
99 complying with the OpenPIC standard.
100
101config FSL_MPIC_TIMER_WAKEUP
102 tristate "Freescale MPIC global timer wakeup driver"
103 depends on FSL_SOC && MPIC_TIMER && PM
104 default n
105 help
106 The driver provides a way to wake up the system by MPIC
107 timer.
108 e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup"
109
89config PPC_EPAPR_HV_PIC 110config PPC_EPAPR_HV_PIC
90 bool 111 bool
91 default n 112 default n
@@ -164,6 +185,11 @@ config IBMEBUS
164 help 185 help
165 Bus device driver for GX bus based adapters. 186 Bus device driver for GX bus based adapters.
166 187
188config EEH
189 bool
190 depends on (PPC_POWERNV || PPC_PSERIES) && PCI
191 default y
192
167config PPC_MPC106 193config PPC_MPC106
168 bool 194 bool
169 default n 195 default n
@@ -193,37 +219,6 @@ config PPC_IO_WORKAROUNDS
193 219
194source "drivers/cpufreq/Kconfig" 220source "drivers/cpufreq/Kconfig"
195 221
196menu "CPU Frequency drivers"
197 depends on CPU_FREQ
198
199config CPU_FREQ_PMAC
200 bool "Support for Apple PowerBooks"
201 depends on ADB_PMU && PPC32
202 select CPU_FREQ_TABLE
203 help
204 This adds support for frequency switching on Apple PowerBooks,
205 this currently includes some models of iBook & Titanium
206 PowerBook.
207
208config CPU_FREQ_PMAC64
209 bool "Support for some Apple G5s"
210 depends on PPC_PMAC && PPC64
211 select CPU_FREQ_TABLE
212 help
213 This adds support for frequency switching on Apple iMac G5,
214 and some of the more recent desktop G5 machines as well.
215
216config PPC_PASEMI_CPUFREQ
217 bool "Support for PA Semi PWRficient"
218 depends on PPC_PASEMI
219 default y
220 select CPU_FREQ_TABLE
221 help
222 This adds the support for frequency switching on PA Semi
223 PWRficient processors.
224
225endmenu
226
227menu "CPUIdle driver" 222menu "CPUIdle driver"
228 223
229source "drivers/cpuidle/Kconfig" 224source "drivers/cpuidle/Kconfig"
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 54f3936001aa..47d9a03dd415 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -71,6 +71,7 @@ config PPC_BOOK3S_64
71 select PPC_FPU 71 select PPC_FPU
72 select PPC_HAVE_PMU_SUPPORT 72 select PPC_HAVE_PMU_SUPPORT
73 select SYS_SUPPORTS_HUGETLBFS 73 select SYS_SUPPORTS_HUGETLBFS
74 select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
74 75
75config PPC_BOOK3E_64 76config PPC_BOOK3E_64
76 bool "Embedded processors" 77 bool "Embedded processors"
@@ -158,6 +159,7 @@ config E500
158config PPC_E500MC 159config PPC_E500MC
159 bool "e500mc Support" 160 bool "e500mc Support"
160 select PPC_FPU 161 select PPC_FPU
162 select COMMON_CLK
161 depends on E500 163 depends on E500
162 help 164 help
163 This must be enabled for running on e500mc (and derivatives 165 This must be enabled for running on e500mc (and derivatives
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 246e1d8b3af3..c34ee4e60873 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -185,7 +185,8 @@ static void beat_lpar_hptab_clear(void)
185static long beat_lpar_hpte_updatepp(unsigned long slot, 185static long beat_lpar_hpte_updatepp(unsigned long slot,
186 unsigned long newpp, 186 unsigned long newpp,
187 unsigned long vpn, 187 unsigned long vpn,
188 int psize, int ssize, int local) 188 int psize, int apsize,
189 int ssize, int local)
189{ 190{
190 unsigned long lpar_rc; 191 unsigned long lpar_rc;
191 u64 dummy0, dummy1; 192 u64 dummy0, dummy1;
@@ -274,7 +275,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
274} 275}
275 276
276static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, 277static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
277 int psize, int ssize, int local) 278 int psize, int apsize,
279 int ssize, int local)
278{ 280{
279 unsigned long want_v; 281 unsigned long want_v;
280 unsigned long lpar_rc; 282 unsigned long lpar_rc;
@@ -364,9 +366,10 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
364 * already zero. For now I am paranoid. 366 * already zero. For now I am paranoid.
365 */ 367 */
366static long beat_lpar_hpte_updatepp_v3(unsigned long slot, 368static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
367 unsigned long newpp, 369 unsigned long newpp,
368 unsigned long vpn, 370 unsigned long vpn,
369 int psize, int ssize, int local) 371 int psize, int apsize,
372 int ssize, int local)
370{ 373{
371 unsigned long lpar_rc; 374 unsigned long lpar_rc;
372 unsigned long want_v; 375 unsigned long want_v;
@@ -394,7 +397,8 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
394} 397}
395 398
396static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn, 399static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
397 int psize, int ssize, int local) 400 int psize, int apsize,
401 int ssize, int local)
398{ 402{
399 unsigned long want_v; 403 unsigned long want_v;
400 unsigned long lpar_rc; 404 unsigned long lpar_rc;
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index 8c6dc42ecf65..9e5dfbcc00af 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -239,7 +239,7 @@ void __init beatic_init_IRQ(void)
239 ppc_md.get_irq = beatic_get_irq; 239 ppc_md.get_irq = beatic_get_irq;
240 240
241 /* Allocate an irq host */ 241 /* Allocate an irq host */
242 beatic_host = irq_domain_add_nomap(NULL, 0, &beatic_pic_host_ops, NULL); 242 beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, NULL);
243 BUG_ON(beatic_host == NULL); 243 BUG_ON(beatic_host == NULL);
244 irq_set_default_host(beatic_host); 244 irq_set_default_host(beatic_host);
245} 245}
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index d35dbbc8ec79..f75f6fcac729 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -142,7 +142,7 @@ static int smp_cell_cpu_bootable(unsigned int nr)
142 * during boot if the user requests it. Odd-numbered 142 * during boot if the user requests it. Odd-numbered
143 * cpus are assumed to be secondary threads. 143 * cpus are assumed to be secondary threads.
144 */ 144 */
145 if (system_state < SYSTEM_RUNNING && 145 if (system_state == SYSTEM_BOOTING &&
146 cpu_has_feature(CPU_FTR_SMT) && 146 cpu_has_feature(CPU_FTR_SMT) &&
147 !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) 147 !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
148 return 0; 148 return 0;
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 35f77a42bedf..f3900427ffab 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -238,7 +238,7 @@ const struct file_operations spufs_context_fops = {
238 .release = spufs_dir_close, 238 .release = spufs_dir_close,
239 .llseek = dcache_dir_lseek, 239 .llseek = dcache_dir_lseek,
240 .read = generic_read_dir, 240 .read = generic_read_dir,
241 .readdir = dcache_readdir, 241 .iterate = dcache_readdir,
242 .fsync = noop_fsync, 242 .fsync = noop_fsync,
243}; 243};
244EXPORT_SYMBOL_GPL(spufs_context_fops); 244EXPORT_SYMBOL_GPL(spufs_context_fops);
diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile
index ce6d789e0741..8e8d4cae5ebe 100644
--- a/arch/powerpc/platforms/pasemi/Makefile
+++ b/arch/powerpc/platforms/pasemi/Makefile
@@ -1,3 +1,2 @@
1obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o 1obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o
2obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o 2obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o
3obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += cpufreq.o
diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c
deleted file mode 100644
index be1e7958909e..000000000000
--- a/arch/powerpc/platforms/pasemi/cpufreq.c
+++ /dev/null
@@ -1,330 +0,0 @@
1/*
2 * Copyright (C) 2007 PA Semi, Inc
3 *
4 * Authors: Egor Martovetsky <egor@pasemi.com>
5 * Olof Johansson <olof@lixom.net>
6 *
7 * Maintained by: Olof Johansson <olof@lixom.net>
8 *
9 * Based on arch/powerpc/platforms/cell/cbe_cpufreq.c:
10 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2, or (at your option)
15 * any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 */
27
28#include <linux/cpufreq.h>
29#include <linux/timer.h>
30#include <linux/module.h>
31
32#include <asm/hw_irq.h>
33#include <asm/io.h>
34#include <asm/prom.h>
35#include <asm/time.h>
36#include <asm/smp.h>
37
38#define SDCASR_REG 0x0100
39#define SDCASR_REG_STRIDE 0x1000
40#define SDCPWR_CFGA0_REG 0x0100
41#define SDCPWR_PWST0_REG 0x0000
42#define SDCPWR_GIZTIME_REG 0x0440
43
44/* SDCPWR_GIZTIME_REG fields */
45#define SDCPWR_GIZTIME_GR 0x80000000
46#define SDCPWR_GIZTIME_LONGLOCK 0x000000ff
47
48/* Offset of ASR registers from SDC base */
49#define SDCASR_OFFSET 0x120000
50
51static void __iomem *sdcpwr_mapbase;
52static void __iomem *sdcasr_mapbase;
53
54static DEFINE_MUTEX(pas_switch_mutex);
55
56/* Current astate, is used when waking up from power savings on
57 * one core, in case the other core has switched states during
58 * the idle time.
59 */
60static int current_astate;
61
62/* We support 5(A0-A4) power states excluding turbo(A5-A6) modes */
63static struct cpufreq_frequency_table pas_freqs[] = {
64 {0, 0},
65 {1, 0},
66 {2, 0},
67 {3, 0},
68 {4, 0},
69 {0, CPUFREQ_TABLE_END},
70};
71
72static struct freq_attr *pas_cpu_freqs_attr[] = {
73 &cpufreq_freq_attr_scaling_available_freqs,
74 NULL,
75};
76
77/*
78 * hardware specific functions
79 */
80
81static int get_astate_freq(int astate)
82{
83 u32 ret;
84 ret = in_le32(sdcpwr_mapbase + SDCPWR_CFGA0_REG + (astate * 0x10));
85
86 return ret & 0x3f;
87}
88
89static int get_cur_astate(int cpu)
90{
91 u32 ret;
92
93 ret = in_le32(sdcpwr_mapbase + SDCPWR_PWST0_REG);
94 ret = (ret >> (cpu * 4)) & 0x7;
95
96 return ret;
97}
98
99static int get_gizmo_latency(void)
100{
101 u32 giztime, ret;
102
103 giztime = in_le32(sdcpwr_mapbase + SDCPWR_GIZTIME_REG);
104
105 /* just provide the upper bound */
106 if (giztime & SDCPWR_GIZTIME_GR)
107 ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 128000;
108 else
109 ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 1000;
110
111 return ret;
112}
113
114static void set_astate(int cpu, unsigned int astate)
115{
116 unsigned long flags;
117
118 /* Return if called before init has run */
119 if (unlikely(!sdcasr_mapbase))
120 return;
121
122 local_irq_save(flags);
123
124 out_le32(sdcasr_mapbase + SDCASR_REG + SDCASR_REG_STRIDE*cpu, astate);
125
126 local_irq_restore(flags);
127}
128
129int check_astate(void)
130{
131 return get_cur_astate(hard_smp_processor_id());
132}
133
134void restore_astate(int cpu)
135{
136 set_astate(cpu, current_astate);
137}
138
139/*
140 * cpufreq functions
141 */
142
143static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
144{
145 const u32 *max_freqp;
146 u32 max_freq;
147 int i, cur_astate;
148 struct resource res;
149 struct device_node *cpu, *dn;
150 int err = -ENODEV;
151
152 cpu = of_get_cpu_node(policy->cpu, NULL);
153
154 if (!cpu)
155 goto out;
156
157 dn = of_find_compatible_node(NULL, NULL, "1682m-sdc");
158 if (!dn)
159 dn = of_find_compatible_node(NULL, NULL,
160 "pasemi,pwrficient-sdc");
161 if (!dn)
162 goto out;
163 err = of_address_to_resource(dn, 0, &res);
164 of_node_put(dn);
165 if (err)
166 goto out;
167 sdcasr_mapbase = ioremap(res.start + SDCASR_OFFSET, 0x2000);
168 if (!sdcasr_mapbase) {
169 err = -EINVAL;
170 goto out;
171 }
172
173 dn = of_find_compatible_node(NULL, NULL, "1682m-gizmo");
174 if (!dn)
175 dn = of_find_compatible_node(NULL, NULL,
176 "pasemi,pwrficient-gizmo");
177 if (!dn) {
178 err = -ENODEV;
179 goto out_unmap_sdcasr;
180 }
181 err = of_address_to_resource(dn, 0, &res);
182 of_node_put(dn);
183 if (err)
184 goto out_unmap_sdcasr;
185 sdcpwr_mapbase = ioremap(res.start, 0x1000);
186 if (!sdcpwr_mapbase) {
187 err = -EINVAL;
188 goto out_unmap_sdcasr;
189 }
190
191 pr_debug("init cpufreq on CPU %d\n", policy->cpu);
192
193 max_freqp = of_get_property(cpu, "clock-frequency", NULL);
194 if (!max_freqp) {
195 err = -EINVAL;
196 goto out_unmap_sdcpwr;
197 }
198
199 /* we need the freq in kHz */
200 max_freq = *max_freqp / 1000;
201
202 pr_debug("max clock-frequency is at %u kHz\n", max_freq);
203 pr_debug("initializing frequency table\n");
204
205 /* initialize frequency table */
206 for (i=0; pas_freqs[i].frequency!=CPUFREQ_TABLE_END; i++) {
207 pas_freqs[i].frequency = get_astate_freq(pas_freqs[i].index) * 100000;
208 pr_debug("%d: %d\n", i, pas_freqs[i].frequency);
209 }
210
211 policy->cpuinfo.transition_latency = get_gizmo_latency();
212
213 cur_astate = get_cur_astate(policy->cpu);
214 pr_debug("current astate is at %d\n",cur_astate);
215
216 policy->cur = pas_freqs[cur_astate].frequency;
217 cpumask_copy(policy->cpus, cpu_online_mask);
218
219 ppc_proc_freq = policy->cur * 1000ul;
220
221 cpufreq_frequency_table_get_attr(pas_freqs, policy->cpu);
222
223 /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max
224 * are set correctly
225 */
226 return cpufreq_frequency_table_cpuinfo(policy, pas_freqs);
227
228out_unmap_sdcpwr:
229 iounmap(sdcpwr_mapbase);
230
231out_unmap_sdcasr:
232 iounmap(sdcasr_mapbase);
233out:
234 return err;
235}
236
237static int pas_cpufreq_cpu_exit(struct cpufreq_policy *policy)
238{
239 /*
240 * We don't support CPU hotplug. Don't unmap after the system
241 * has already made it to a running state.
242 */
243 if (system_state != SYSTEM_BOOTING)
244 return 0;
245
246 if (sdcasr_mapbase)
247 iounmap(sdcasr_mapbase);
248 if (sdcpwr_mapbase)
249 iounmap(sdcpwr_mapbase);
250
251 cpufreq_frequency_table_put_attr(policy->cpu);
252 return 0;
253}
254
255static int pas_cpufreq_verify(struct cpufreq_policy *policy)
256{
257 return cpufreq_frequency_table_verify(policy, pas_freqs);
258}
259
260static int pas_cpufreq_target(struct cpufreq_policy *policy,
261 unsigned int target_freq,
262 unsigned int relation)
263{
264 struct cpufreq_freqs freqs;
265 int pas_astate_new;
266 int i;
267
268 cpufreq_frequency_table_target(policy,
269 pas_freqs,
270 target_freq,
271 relation,
272 &pas_astate_new);
273
274 freqs.old = policy->cur;
275 freqs.new = pas_freqs[pas_astate_new].frequency;
276
277 mutex_lock(&pas_switch_mutex);
278 cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
279
280 pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n",
281 policy->cpu,
282 pas_freqs[pas_astate_new].frequency,
283 pas_freqs[pas_astate_new].index);
284
285 current_astate = pas_astate_new;
286
287 for_each_online_cpu(i)
288 set_astate(i, pas_astate_new);
289
290 cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
291 mutex_unlock(&pas_switch_mutex);
292
293 ppc_proc_freq = freqs.new * 1000ul;
294 return 0;
295}
296
297static struct cpufreq_driver pas_cpufreq_driver = {
298 .name = "pas-cpufreq",
299 .owner = THIS_MODULE,
300 .flags = CPUFREQ_CONST_LOOPS,
301 .init = pas_cpufreq_cpu_init,
302 .exit = pas_cpufreq_cpu_exit,
303 .verify = pas_cpufreq_verify,
304 .target = pas_cpufreq_target,
305 .attr = pas_cpu_freqs_attr,
306};
307
308/*
309 * module init and destoy
310 */
311
312static int __init pas_cpufreq_init(void)
313{
314 if (!of_machine_is_compatible("PA6T-1682M") &&
315 !of_machine_is_compatible("pasemi,pwrficient"))
316 return -ENODEV;
317
318 return cpufreq_register_driver(&pas_cpufreq_driver);
319}
320
321static void __exit pas_cpufreq_exit(void)
322{
323 cpufreq_unregister_driver(&pas_cpufreq_driver);
324}
325
326module_init(pas_cpufreq_init);
327module_exit(pas_cpufreq_exit);
328
329MODULE_LICENSE("GPL");
330MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>, Olof Johansson <olof@lixom.net>");
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index ea47df66fee5..52c6ce1cc985 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -9,8 +9,6 @@ obj-y += pic.o setup.o time.o feature.o pci.o \
9 sleep.o low_i2c.o cache.o pfunc_core.o \ 9 sleep.o low_i2c.o cache.o pfunc_core.o \
10 pfunc_base.o udbg_scc.o udbg_adb.o 10 pfunc_base.o udbg_scc.o udbg_adb.o
11obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o 11obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o
12obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq_32.o
13obj-$(CONFIG_CPU_FREQ_PMAC64) += cpufreq_64.o
14# CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really 12# CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really
15# need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really 13# need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really
16# CONFIG_NVRAM=y 14# CONFIG_NVRAM=y
diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
deleted file mode 100644
index 3104fad82480..000000000000
--- a/arch/powerpc/platforms/powermac/cpufreq_32.c
+++ /dev/null
@@ -1,721 +0,0 @@
1/*
2 * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
3 * Copyright (C) 2004 John Steele Scott <toojays@toojays.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * TODO: Need a big cleanup here. Basically, we need to have different
10 * cpufreq_driver structures for the different type of HW instead of the
11 * current mess. We also need to better deal with the detection of the
12 * type of machine.
13 *
14 */
15
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/errno.h>
19#include <linux/kernel.h>
20#include <linux/delay.h>
21#include <linux/sched.h>
22#include <linux/adb.h>
23#include <linux/pmu.h>
24#include <linux/cpufreq.h>
25#include <linux/init.h>
26#include <linux/device.h>
27#include <linux/hardirq.h>
28#include <asm/prom.h>
29#include <asm/machdep.h>
30#include <asm/irq.h>
31#include <asm/pmac_feature.h>
32#include <asm/mmu_context.h>
33#include <asm/sections.h>
34#include <asm/cputable.h>
35#include <asm/time.h>
36#include <asm/mpic.h>
37#include <asm/keylargo.h>
38#include <asm/switch_to.h>
39
40/* WARNING !!! This will cause calibrate_delay() to be called,
41 * but this is an __init function ! So you MUST go edit
42 * init/main.c to make it non-init before enabling DEBUG_FREQ
43 */
44#undef DEBUG_FREQ
45
46extern void low_choose_7447a_dfs(int dfs);
47extern void low_choose_750fx_pll(int pll);
48extern void low_sleep_handler(void);
49
50/*
51 * Currently, PowerMac cpufreq supports only high & low frequencies
52 * that are set by the firmware
53 */
54static unsigned int low_freq;
55static unsigned int hi_freq;
56static unsigned int cur_freq;
57static unsigned int sleep_freq;
58static unsigned long transition_latency;
59
60/*
61 * Different models uses different mechanisms to switch the frequency
62 */
63static int (*set_speed_proc)(int low_speed);
64static unsigned int (*get_speed_proc)(void);
65
66/*
67 * Some definitions used by the various speedprocs
68 */
69static u32 voltage_gpio;
70static u32 frequency_gpio;
71static u32 slew_done_gpio;
72static int no_schedule;
73static int has_cpu_l2lve;
74static int is_pmu_based;
75
76/* There are only two frequency states for each processor. Values
77 * are in kHz for the time being.
78 */
79#define CPUFREQ_HIGH 0
80#define CPUFREQ_LOW 1
81
82static struct cpufreq_frequency_table pmac_cpu_freqs[] = {
83 {CPUFREQ_HIGH, 0},
84 {CPUFREQ_LOW, 0},
85 {0, CPUFREQ_TABLE_END},
86};
87
88static struct freq_attr* pmac_cpu_freqs_attr[] = {
89 &cpufreq_freq_attr_scaling_available_freqs,
90 NULL,
91};
92
93static inline void local_delay(unsigned long ms)
94{
95 if (no_schedule)
96 mdelay(ms);
97 else
98 msleep(ms);
99}
100
101#ifdef DEBUG_FREQ
102static inline void debug_calc_bogomips(void)
103{
104 /* This will cause a recalc of bogomips and display the
105 * result. We backup/restore the value to avoid affecting the
106 * core cpufreq framework's own calculation.
107 */
108 unsigned long save_lpj = loops_per_jiffy;
109 calibrate_delay();
110 loops_per_jiffy = save_lpj;
111}
112#endif /* DEBUG_FREQ */
113
114/* Switch CPU speed under 750FX CPU control
115 */
116static int cpu_750fx_cpu_speed(int low_speed)
117{
118 u32 hid2;
119
120 if (low_speed == 0) {
121 /* ramping up, set voltage first */
122 pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
123 /* Make sure we sleep for at least 1ms */
124 local_delay(10);
125
126 /* tweak L2 for high voltage */
127 if (has_cpu_l2lve) {
128 hid2 = mfspr(SPRN_HID2);
129 hid2 &= ~0x2000;
130 mtspr(SPRN_HID2, hid2);
131 }
132 }
133#ifdef CONFIG_6xx
134 low_choose_750fx_pll(low_speed);
135#endif
136 if (low_speed == 1) {
137 /* tweak L2 for low voltage */
138 if (has_cpu_l2lve) {
139 hid2 = mfspr(SPRN_HID2);
140 hid2 |= 0x2000;
141 mtspr(SPRN_HID2, hid2);
142 }
143
144 /* ramping down, set voltage last */
145 pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
146 local_delay(10);
147 }
148
149 return 0;
150}
151
152static unsigned int cpu_750fx_get_cpu_speed(void)
153{
154 if (mfspr(SPRN_HID1) & HID1_PS)
155 return low_freq;
156 else
157 return hi_freq;
158}
159
160/* Switch CPU speed using DFS */
161static int dfs_set_cpu_speed(int low_speed)
162{
163 if (low_speed == 0) {
164 /* ramping up, set voltage first */
165 pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
166 /* Make sure we sleep for at least 1ms */
167 local_delay(1);
168 }
169
170 /* set frequency */
171#ifdef CONFIG_6xx
172 low_choose_7447a_dfs(low_speed);
173#endif
174 udelay(100);
175
176 if (low_speed == 1) {
177 /* ramping down, set voltage last */
178 pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
179 local_delay(1);
180 }
181
182 return 0;
183}
184
185static unsigned int dfs_get_cpu_speed(void)
186{
187 if (mfspr(SPRN_HID1) & HID1_DFS)
188 return low_freq;
189 else
190 return hi_freq;
191}
192
193
194/* Switch CPU speed using slewing GPIOs
195 */
196static int gpios_set_cpu_speed(int low_speed)
197{
198 int gpio, timeout = 0;
199
200 /* If ramping up, set voltage first */
201 if (low_speed == 0) {
202 pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
203 /* Delay is way too big but it's ok, we schedule */
204 local_delay(10);
205 }
206
207 /* Set frequency */
208 gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0);
209 if (low_speed == ((gpio & 0x01) == 0))
210 goto skip;
211
212 pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, frequency_gpio,
213 low_speed ? 0x04 : 0x05);
214 udelay(200);
215 do {
216 if (++timeout > 100)
217 break;
218 local_delay(1);
219 gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, slew_done_gpio, 0);
220 } while((gpio & 0x02) == 0);
221 skip:
222 /* If ramping down, set voltage last */
223 if (low_speed == 1) {
224 pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
225 /* Delay is way too big but it's ok, we schedule */
226 local_delay(10);
227 }
228
229#ifdef DEBUG_FREQ
230 debug_calc_bogomips();
231#endif
232
233 return 0;
234}
235
236/* Switch CPU speed under PMU control
237 */
238static int pmu_set_cpu_speed(int low_speed)
239{
240 struct adb_request req;
241 unsigned long save_l2cr;
242 unsigned long save_l3cr;
243 unsigned int pic_prio;
244 unsigned long flags;
245
246 preempt_disable();
247
248#ifdef DEBUG_FREQ
249 printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1));
250#endif
251 pmu_suspend();
252
253 /* Disable all interrupt sources on openpic */
254 pic_prio = mpic_cpu_get_priority();
255 mpic_cpu_set_priority(0xf);
256
257 /* Make sure the decrementer won't interrupt us */
258 asm volatile("mtdec %0" : : "r" (0x7fffffff));
259 /* Make sure any pending DEC interrupt occurring while we did
260 * the above didn't re-enable the DEC */
261 mb();
262 asm volatile("mtdec %0" : : "r" (0x7fffffff));
263
264 /* We can now disable MSR_EE */
265 local_irq_save(flags);
266
267 /* Giveup the FPU & vec */
268 enable_kernel_fp();
269
270#ifdef CONFIG_ALTIVEC
271 if (cpu_has_feature(CPU_FTR_ALTIVEC))
272 enable_kernel_altivec();
273#endif /* CONFIG_ALTIVEC */
274
275 /* Save & disable L2 and L3 caches */
276 save_l3cr = _get_L3CR(); /* (returns -1 if not available) */
277 save_l2cr = _get_L2CR(); /* (returns -1 if not available) */
278
279 /* Send the new speed command. My assumption is that this command
280 * will cause PLL_CFG[0..3] to be changed next time CPU goes to sleep
281 */
282 pmu_request(&req, NULL, 6, PMU_CPU_SPEED, 'W', 'O', 'O', 'F', low_speed);
283 while (!req.complete)
284 pmu_poll();
285
286 /* Prepare the northbridge for the speed transition */
287 pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,1);
288
289 /* Call low level code to backup CPU state and recover from
290 * hardware reset
291 */
292 low_sleep_handler();
293
294 /* Restore the northbridge */
295 pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,0);
296
297 /* Restore L2 cache */
298 if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0)
299 _set_L2CR(save_l2cr);
300 /* Restore L3 cache */
301 if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0)
302 _set_L3CR(save_l3cr);
303
304 /* Restore userland MMU context */
305 switch_mmu_context(NULL, current->active_mm);
306
307#ifdef DEBUG_FREQ
308 printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1));
309#endif
310
311 /* Restore low level PMU operations */
312 pmu_unlock();
313
314 /*
315 * Restore decrementer; we'll take a decrementer interrupt
316 * as soon as interrupts are re-enabled and the generic
317 * clockevents code will reprogram it with the right value.
318 */
319 set_dec(1);
320
321 /* Restore interrupts */
322 mpic_cpu_set_priority(pic_prio);
323
324 /* Let interrupts flow again ... */
325 local_irq_restore(flags);
326
327#ifdef DEBUG_FREQ
328 debug_calc_bogomips();
329#endif
330
331 pmu_resume();
332
333 preempt_enable();
334
335 return 0;
336}
337
338static int do_set_cpu_speed(struct cpufreq_policy *policy, int speed_mode,
339 int notify)
340{
341 struct cpufreq_freqs freqs;
342 unsigned long l3cr;
343 static unsigned long prev_l3cr;
344
345 freqs.old = cur_freq;
346 freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq;
347
348 if (freqs.old == freqs.new)
349 return 0;
350
351 if (notify)
352 cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
353 if (speed_mode == CPUFREQ_LOW &&
354 cpu_has_feature(CPU_FTR_L3CR)) {
355 l3cr = _get_L3CR();
356 if (l3cr & L3CR_L3E) {
357 prev_l3cr = l3cr;
358 _set_L3CR(0);
359 }
360 }
361 set_speed_proc(speed_mode == CPUFREQ_LOW);
362 if (speed_mode == CPUFREQ_HIGH &&
363 cpu_has_feature(CPU_FTR_L3CR)) {
364 l3cr = _get_L3CR();
365 if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr)
366 _set_L3CR(prev_l3cr);
367 }
368 if (notify)
369 cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
370 cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq;
371
372 return 0;
373}
374
375static unsigned int pmac_cpufreq_get_speed(unsigned int cpu)
376{
377 return cur_freq;
378}
379
380static int pmac_cpufreq_verify(struct cpufreq_policy *policy)
381{
382 return cpufreq_frequency_table_verify(policy, pmac_cpu_freqs);
383}
384
385static int pmac_cpufreq_target( struct cpufreq_policy *policy,
386 unsigned int target_freq,
387 unsigned int relation)
388{
389 unsigned int newstate = 0;
390 int rc;
391
392 if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs,
393 target_freq, relation, &newstate))
394 return -EINVAL;
395
396 rc = do_set_cpu_speed(policy, newstate, 1);
397
398 ppc_proc_freq = cur_freq * 1000ul;
399 return rc;
400}
401
402static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy)
403{
404 if (policy->cpu != 0)
405 return -ENODEV;
406
407 policy->cpuinfo.transition_latency = transition_latency;
408 policy->cur = cur_freq;
409
410 cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu);
411 return cpufreq_frequency_table_cpuinfo(policy, pmac_cpu_freqs);
412}
413
414static u32 read_gpio(struct device_node *np)
415{
416 const u32 *reg = of_get_property(np, "reg", NULL);
417 u32 offset;
418
419 if (reg == NULL)
420 return 0;
421 /* That works for all keylargos but shall be fixed properly
422 * some day... The problem is that it seems we can't rely
423 * on the "reg" property of the GPIO nodes, they are either
424 * relative to the base of KeyLargo or to the base of the
425 * GPIO space, and the device-tree doesn't help.
426 */
427 offset = *reg;
428 if (offset < KEYLARGO_GPIO_LEVELS0)
429 offset += KEYLARGO_GPIO_LEVELS0;
430 return offset;
431}
432
433static int pmac_cpufreq_suspend(struct cpufreq_policy *policy)
434{
435 /* Ok, this could be made a bit smarter, but let's be robust for now. We
436 * always force a speed change to high speed before sleep, to make sure
437 * we have appropriate voltage and/or bus speed for the wakeup process,
438 * and to make sure our loops_per_jiffies are "good enough", that is will
439 * not cause too short delays if we sleep in low speed and wake in high
440 * speed..
441 */
442 no_schedule = 1;
443 sleep_freq = cur_freq;
444 if (cur_freq == low_freq && !is_pmu_based)
445 do_set_cpu_speed(policy, CPUFREQ_HIGH, 0);
446 return 0;
447}
448
449static int pmac_cpufreq_resume(struct cpufreq_policy *policy)
450{
451 /* If we resume, first check if we have a get() function */
452 if (get_speed_proc)
453 cur_freq = get_speed_proc();
454 else
455 cur_freq = 0;
456
457 /* We don't, hrm... we don't really know our speed here, best
458 * is that we force a switch to whatever it was, which is
459 * probably high speed due to our suspend() routine
460 */
461 do_set_cpu_speed(policy, sleep_freq == low_freq ?
462 CPUFREQ_LOW : CPUFREQ_HIGH, 0);
463
464 ppc_proc_freq = cur_freq * 1000ul;
465
466 no_schedule = 0;
467 return 0;
468}
469
470static struct cpufreq_driver pmac_cpufreq_driver = {
471 .verify = pmac_cpufreq_verify,
472 .target = pmac_cpufreq_target,
473 .get = pmac_cpufreq_get_speed,
474 .init = pmac_cpufreq_cpu_init,
475 .suspend = pmac_cpufreq_suspend,
476 .resume = pmac_cpufreq_resume,
477 .flags = CPUFREQ_PM_NO_WARN,
478 .attr = pmac_cpu_freqs_attr,
479 .name = "powermac",
480 .owner = THIS_MODULE,
481};
482
483
484static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
485{
486 struct device_node *volt_gpio_np = of_find_node_by_name(NULL,
487 "voltage-gpio");
488 struct device_node *freq_gpio_np = of_find_node_by_name(NULL,
489 "frequency-gpio");
490 struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL,
491 "slewing-done");
492 const u32 *value;
493
494 /*
495 * Check to see if it's GPIO driven or PMU only
496 *
497 * The way we extract the GPIO address is slightly hackish, but it
498 * works well enough for now. We need to abstract the whole GPIO
499 * stuff sooner or later anyway
500 */
501
502 if (volt_gpio_np)
503 voltage_gpio = read_gpio(volt_gpio_np);
504 if (freq_gpio_np)
505 frequency_gpio = read_gpio(freq_gpio_np);
506 if (slew_done_gpio_np)
507 slew_done_gpio = read_gpio(slew_done_gpio_np);
508
509 /* If we use the frequency GPIOs, calculate the min/max speeds based
510 * on the bus frequencies
511 */
512 if (frequency_gpio && slew_done_gpio) {
513 int lenp, rc;
514 const u32 *freqs, *ratio;
515
516 freqs = of_get_property(cpunode, "bus-frequencies", &lenp);
517 lenp /= sizeof(u32);
518 if (freqs == NULL || lenp != 2) {
519 printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n");
520 return 1;
521 }
522 ratio = of_get_property(cpunode, "processor-to-bus-ratio*2",
523 NULL);
524 if (ratio == NULL) {
525 printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n");
526 return 1;
527 }
528
529 /* Get the min/max bus frequencies */
530 low_freq = min(freqs[0], freqs[1]);
531 hi_freq = max(freqs[0], freqs[1]);
532
533 /* Grrrr.. It _seems_ that the device-tree is lying on the low bus
534 * frequency, it claims it to be around 84Mhz on some models while
535 * it appears to be approx. 101Mhz on all. Let's hack around here...
536 * fortunately, we don't need to be too precise
537 */
538 if (low_freq < 98000000)
539 low_freq = 101000000;
540
541 /* Convert those to CPU core clocks */
542 low_freq = (low_freq * (*ratio)) / 2000;
543 hi_freq = (hi_freq * (*ratio)) / 2000;
544
545 /* Now we get the frequencies, we read the GPIO to see what is out current
546 * speed
547 */
548 rc = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0);
549 cur_freq = (rc & 0x01) ? hi_freq : low_freq;
550
551 set_speed_proc = gpios_set_cpu_speed;
552 return 1;
553 }
554
555 /* If we use the PMU, look for the min & max frequencies in the
556 * device-tree
557 */
558 value = of_get_property(cpunode, "min-clock-frequency", NULL);
559 if (!value)
560 return 1;
561 low_freq = (*value) / 1000;
562 /* The PowerBook G4 12" (PowerBook6,1) has an error in the device-tree
563 * here */
564 if (low_freq < 100000)
565 low_freq *= 10;
566
567 value = of_get_property(cpunode, "max-clock-frequency", NULL);
568 if (!value)
569 return 1;
570 hi_freq = (*value) / 1000;
571 set_speed_proc = pmu_set_cpu_speed;
572 is_pmu_based = 1;
573
574 return 0;
575}
576
577static int pmac_cpufreq_init_7447A(struct device_node *cpunode)
578{
579 struct device_node *volt_gpio_np;
580
581 if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL)
582 return 1;
583
584 volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
585 if (volt_gpio_np)
586 voltage_gpio = read_gpio(volt_gpio_np);
587 if (!voltage_gpio){
588 printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n");
589 return 1;
590 }
591
592 /* OF only reports the high frequency */
593 hi_freq = cur_freq;
594 low_freq = cur_freq/2;
595
596 /* Read actual frequency from CPU */
597 cur_freq = dfs_get_cpu_speed();
598 set_speed_proc = dfs_set_cpu_speed;
599 get_speed_proc = dfs_get_cpu_speed;
600
601 return 0;
602}
603
604static int pmac_cpufreq_init_750FX(struct device_node *cpunode)
605{
606 struct device_node *volt_gpio_np;
607 u32 pvr;
608 const u32 *value;
609
610 if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL)
611 return 1;
612
613 hi_freq = cur_freq;
614 value = of_get_property(cpunode, "reduced-clock-frequency", NULL);
615 if (!value)
616 return 1;
617 low_freq = (*value) / 1000;
618
619 volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
620 if (volt_gpio_np)
621 voltage_gpio = read_gpio(volt_gpio_np);
622
623 pvr = mfspr(SPRN_PVR);
624 has_cpu_l2lve = !((pvr & 0xf00) == 0x100);
625
626 set_speed_proc = cpu_750fx_cpu_speed;
627 get_speed_proc = cpu_750fx_get_cpu_speed;
628 cur_freq = cpu_750fx_get_cpu_speed();
629
630 return 0;
631}
632
633/* Currently, we support the following machines:
634 *
635 * - Titanium PowerBook 1Ghz (PMU based, 667Mhz & 1Ghz)
636 * - Titanium PowerBook 800 (PMU based, 667Mhz & 800Mhz)
637 * - Titanium PowerBook 400 (PMU based, 300Mhz & 400Mhz)
638 * - Titanium PowerBook 500 (PMU based, 300Mhz & 500Mhz)
639 * - iBook2 500/600 (PMU based, 400Mhz & 500/600Mhz)
640 * - iBook2 700 (CPU based, 400Mhz & 700Mhz, support low voltage)
641 * - Recent MacRISC3 laptops
642 * - All new machines with 7447A CPUs
643 */
644static int __init pmac_cpufreq_setup(void)
645{
646 struct device_node *cpunode;
647 const u32 *value;
648
649 if (strstr(cmd_line, "nocpufreq"))
650 return 0;
651
652 /* Assume only one CPU */
653 cpunode = of_find_node_by_type(NULL, "cpu");
654 if (!cpunode)
655 goto out;
656
657 /* Get current cpu clock freq */
658 value = of_get_property(cpunode, "clock-frequency", NULL);
659 if (!value)
660 goto out;
661 cur_freq = (*value) / 1000;
662 transition_latency = CPUFREQ_ETERNAL;
663
664 /* Check for 7447A based MacRISC3 */
665 if (of_machine_is_compatible("MacRISC3") &&
666 of_get_property(cpunode, "dynamic-power-step", NULL) &&
667 PVR_VER(mfspr(SPRN_PVR)) == 0x8003) {
668 pmac_cpufreq_init_7447A(cpunode);
669 transition_latency = 8000000;
670 /* Check for other MacRISC3 machines */
671 } else if (of_machine_is_compatible("PowerBook3,4") ||
672 of_machine_is_compatible("PowerBook3,5") ||
673 of_machine_is_compatible("MacRISC3")) {
674 pmac_cpufreq_init_MacRISC3(cpunode);
675 /* Else check for iBook2 500/600 */
676 } else if (of_machine_is_compatible("PowerBook4,1")) {
677 hi_freq = cur_freq;
678 low_freq = 400000;
679 set_speed_proc = pmu_set_cpu_speed;
680 is_pmu_based = 1;
681 }
682 /* Else check for TiPb 550 */
683 else if (of_machine_is_compatible("PowerBook3,3") && cur_freq == 550000) {
684 hi_freq = cur_freq;
685 low_freq = 500000;
686 set_speed_proc = pmu_set_cpu_speed;
687 is_pmu_based = 1;
688 }
689 /* Else check for TiPb 400 & 500 */
690 else if (of_machine_is_compatible("PowerBook3,2")) {
691 /* We only know about the 400 MHz and the 500Mhz model
692 * they both have 300 MHz as low frequency
693 */
694 if (cur_freq < 350000 || cur_freq > 550000)
695 goto out;
696 hi_freq = cur_freq;
697 low_freq = 300000;
698 set_speed_proc = pmu_set_cpu_speed;
699 is_pmu_based = 1;
700 }
701 /* Else check for 750FX */
702 else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000)
703 pmac_cpufreq_init_750FX(cpunode);
704out:
705 of_node_put(cpunode);
706 if (set_speed_proc == NULL)
707 return -ENODEV;
708
709 pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq;
710 pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq;
711 ppc_proc_freq = cur_freq * 1000ul;
712
713 printk(KERN_INFO "Registering PowerMac CPU frequency driver\n");
714 printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
715 low_freq/1000, hi_freq/1000, cur_freq/1000);
716
717 return cpufreq_register_driver(&pmac_cpufreq_driver);
718}
719
720module_init(pmac_cpufreq_setup);
721
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
deleted file mode 100644
index 7ba423431cfe..000000000000
--- a/arch/powerpc/platforms/powermac/cpufreq_64.c
+++ /dev/null
@@ -1,746 +0,0 @@
1/*
2 * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
3 * and Markus Demleitner <msdemlei@cl.uni-heidelberg.de>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs,
10 * that is iMac G5 and latest single CPU desktop.
11 */
12
13#undef DEBUG
14
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/errno.h>
18#include <linux/kernel.h>
19#include <linux/delay.h>
20#include <linux/sched.h>
21#include <linux/cpufreq.h>
22#include <linux/init.h>
23#include <linux/completion.h>
24#include <linux/mutex.h>
25#include <asm/prom.h>
26#include <asm/machdep.h>
27#include <asm/irq.h>
28#include <asm/sections.h>
29#include <asm/cputable.h>
30#include <asm/time.h>
31#include <asm/smu.h>
32#include <asm/pmac_pfunc.h>
33
34#define DBG(fmt...) pr_debug(fmt)
35
36/* see 970FX user manual */
37
38#define SCOM_PCR 0x0aa001 /* PCR scom addr */
39
40#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */
41#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */
42#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */
43#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */
44#define PCR_SPEED_MASK 0x000e0000U /* speed mask */
45#define PCR_SPEED_SHIFT 17
46#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */
47#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */
48#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */
49#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */
50#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */
51#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */
52
53#define SCOM_PSR 0x408001 /* PSR scom addr */
54/* warning: PSR is a 64 bits register */
55#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */
56#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */
57#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */
58#define PSR_CUR_SPEED_SHIFT (56)
59
60/*
61 * The G5 only supports two frequencies (Quarter speed is not supported)
62 */
63#define CPUFREQ_HIGH 0
64#define CPUFREQ_LOW 1
65
66static struct cpufreq_frequency_table g5_cpu_freqs[] = {
67 {CPUFREQ_HIGH, 0},
68 {CPUFREQ_LOW, 0},
69 {0, CPUFREQ_TABLE_END},
70};
71
72static struct freq_attr* g5_cpu_freqs_attr[] = {
73 &cpufreq_freq_attr_scaling_available_freqs,
74 NULL,
75};
76
77/* Power mode data is an array of the 32 bits PCR values to use for
78 * the various frequencies, retrieved from the device-tree
79 */
80static int g5_pmode_cur;
81
82static void (*g5_switch_volt)(int speed_mode);
83static int (*g5_switch_freq)(int speed_mode);
84static int (*g5_query_freq)(void);
85
86static DEFINE_MUTEX(g5_switch_mutex);
87
88static unsigned long transition_latency;
89
90#ifdef CONFIG_PMAC_SMU
91
92static const u32 *g5_pmode_data;
93static int g5_pmode_max;
94
95static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */
96static int g5_fvt_count; /* number of op. points */
97static int g5_fvt_cur; /* current op. point */
98
99/*
100 * SMU based voltage switching for Neo2 platforms
101 */
102
103static void g5_smu_switch_volt(int speed_mode)
104{
105 struct smu_simple_cmd cmd;
106
107 DECLARE_COMPLETION_ONSTACK(comp);
108 smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete,
109 &comp, 'V', 'S', 'L', 'E', 'W',
110 0xff, g5_fvt_cur+1, speed_mode);
111 wait_for_completion(&comp);
112}
113
114/*
115 * Platform function based voltage/vdnap switching for Neo2
116 */
117
118static struct pmf_function *pfunc_set_vdnap0;
119static struct pmf_function *pfunc_vdnap0_complete;
120
121static void g5_vdnap_switch_volt(int speed_mode)
122{
123 struct pmf_args args;
124 u32 slew, done = 0;
125 unsigned long timeout;
126
127 slew = (speed_mode == CPUFREQ_LOW) ? 1 : 0;
128 args.count = 1;
129 args.u[0].p = &slew;
130
131 pmf_call_one(pfunc_set_vdnap0, &args);
132
133 /* It's an irq GPIO so we should be able to just block here,
134 * I'll do that later after I've properly tested the IRQ code for
135 * platform functions
136 */
137 timeout = jiffies + HZ/10;
138 while(!time_after(jiffies, timeout)) {
139 args.count = 1;
140 args.u[0].p = &done;
141 pmf_call_one(pfunc_vdnap0_complete, &args);
142 if (done)
143 break;
144 msleep(1);
145 }
146 if (done == 0)
147 printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
148}
149
150
151/*
152 * SCOM based frequency switching for 970FX rev3
153 */
154static int g5_scom_switch_freq(int speed_mode)
155{
156 unsigned long flags;
157 int to;
158
159 /* If frequency is going up, first ramp up the voltage */
160 if (speed_mode < g5_pmode_cur)
161 g5_switch_volt(speed_mode);
162
163 local_irq_save(flags);
164
165 /* Clear PCR high */
166 scom970_write(SCOM_PCR, 0);
167 /* Clear PCR low */
168 scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0);
169 /* Set PCR low */
170 scom970_write(SCOM_PCR, PCR_HILO_SELECT |
171 g5_pmode_data[speed_mode]);
172
173 /* Wait for completion */
174 for (to = 0; to < 10; to++) {
175 unsigned long psr = scom970_read(SCOM_PSR);
176
177 if ((psr & PSR_CMD_RECEIVED) == 0 &&
178 (((psr >> PSR_CUR_SPEED_SHIFT) ^
179 (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3)
180 == 0)
181 break;
182 if (psr & PSR_CMD_COMPLETED)
183 break;
184 udelay(100);
185 }
186
187 local_irq_restore(flags);
188
189 /* If frequency is going down, last ramp the voltage */
190 if (speed_mode > g5_pmode_cur)
191 g5_switch_volt(speed_mode);
192
193 g5_pmode_cur = speed_mode;
194 ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul;
195
196 return 0;
197}
198
199static int g5_scom_query_freq(void)
200{
201 unsigned long psr = scom970_read(SCOM_PSR);
202 int i;
203
204 for (i = 0; i <= g5_pmode_max; i++)
205 if ((((psr >> PSR_CUR_SPEED_SHIFT) ^
206 (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0)
207 break;
208 return i;
209}
210
211/*
212 * Fake voltage switching for platforms with missing support
213 */
214
215static void g5_dummy_switch_volt(int speed_mode)
216{
217}
218
219#endif /* CONFIG_PMAC_SMU */
220
221/*
222 * Platform function based voltage switching for PowerMac7,2 & 7,3
223 */
224
225static struct pmf_function *pfunc_cpu0_volt_high;
226static struct pmf_function *pfunc_cpu0_volt_low;
227static struct pmf_function *pfunc_cpu1_volt_high;
228static struct pmf_function *pfunc_cpu1_volt_low;
229
230static void g5_pfunc_switch_volt(int speed_mode)
231{
232 if (speed_mode == CPUFREQ_HIGH) {
233 if (pfunc_cpu0_volt_high)
234 pmf_call_one(pfunc_cpu0_volt_high, NULL);
235 if (pfunc_cpu1_volt_high)
236 pmf_call_one(pfunc_cpu1_volt_high, NULL);
237 } else {
238 if (pfunc_cpu0_volt_low)
239 pmf_call_one(pfunc_cpu0_volt_low, NULL);
240 if (pfunc_cpu1_volt_low)
241 pmf_call_one(pfunc_cpu1_volt_low, NULL);
242 }
243 msleep(10); /* should be faster , to fix */
244}
245
246/*
247 * Platform function based frequency switching for PowerMac7,2 & 7,3
248 */
249
250static struct pmf_function *pfunc_cpu_setfreq_high;
251static struct pmf_function *pfunc_cpu_setfreq_low;
252static struct pmf_function *pfunc_cpu_getfreq;
253static struct pmf_function *pfunc_slewing_done;
254
255static int g5_pfunc_switch_freq(int speed_mode)
256{
257 struct pmf_args args;
258 u32 done = 0;
259 unsigned long timeout;
260 int rc;
261
262 DBG("g5_pfunc_switch_freq(%d)\n", speed_mode);
263
264 /* If frequency is going up, first ramp up the voltage */
265 if (speed_mode < g5_pmode_cur)
266 g5_switch_volt(speed_mode);
267
268 /* Do it */
269 if (speed_mode == CPUFREQ_HIGH)
270 rc = pmf_call_one(pfunc_cpu_setfreq_high, NULL);
271 else
272 rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL);
273
274 if (rc)
275 printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc);
276
277 /* It's an irq GPIO so we should be able to just block here,
278 * I'll do that later after I've properly tested the IRQ code for
279 * platform functions
280 */
281 timeout = jiffies + HZ/10;
282 while(!time_after(jiffies, timeout)) {
283 args.count = 1;
284 args.u[0].p = &done;
285 pmf_call_one(pfunc_slewing_done, &args);
286 if (done)
287 break;
288 msleep(1);
289 }
290 if (done == 0)
291 printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
292
293 /* If frequency is going down, last ramp the voltage */
294 if (speed_mode > g5_pmode_cur)
295 g5_switch_volt(speed_mode);
296
297 g5_pmode_cur = speed_mode;
298 ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul;
299
300 return 0;
301}
302
303static int g5_pfunc_query_freq(void)
304{
305 struct pmf_args args;
306 u32 val = 0;
307
308 args.count = 1;
309 args.u[0].p = &val;
310 pmf_call_one(pfunc_cpu_getfreq, &args);
311 return val ? CPUFREQ_HIGH : CPUFREQ_LOW;
312}
313
314
315/*
316 * Common interface to the cpufreq core
317 */
318
319static int g5_cpufreq_verify(struct cpufreq_policy *policy)
320{
321 return cpufreq_frequency_table_verify(policy, g5_cpu_freqs);
322}
323
324static int g5_cpufreq_target(struct cpufreq_policy *policy,
325 unsigned int target_freq, unsigned int relation)
326{
327 unsigned int newstate = 0;
328 struct cpufreq_freqs freqs;
329 int rc;
330
331 if (cpufreq_frequency_table_target(policy, g5_cpu_freqs,
332 target_freq, relation, &newstate))
333 return -EINVAL;
334
335 if (g5_pmode_cur == newstate)
336 return 0;
337
338 mutex_lock(&g5_switch_mutex);
339
340 freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency;
341 freqs.new = g5_cpu_freqs[newstate].frequency;
342
343 cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
344 rc = g5_switch_freq(newstate);
345 cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
346
347 mutex_unlock(&g5_switch_mutex);
348
349 return rc;
350}
351
352static unsigned int g5_cpufreq_get_speed(unsigned int cpu)
353{
354 return g5_cpu_freqs[g5_pmode_cur].frequency;
355}
356
357static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy)
358{
359 policy->cpuinfo.transition_latency = transition_latency;
360 policy->cur = g5_cpu_freqs[g5_query_freq()].frequency;
361 /* secondary CPUs are tied to the primary one by the
362 * cpufreq core if in the secondary policy we tell it that
363 * it actually must be one policy together with all others. */
364 cpumask_copy(policy->cpus, cpu_online_mask);
365 cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu);
366
367 return cpufreq_frequency_table_cpuinfo(policy,
368 g5_cpu_freqs);
369}
370
371
372static struct cpufreq_driver g5_cpufreq_driver = {
373 .name = "powermac",
374 .owner = THIS_MODULE,
375 .flags = CPUFREQ_CONST_LOOPS,
376 .init = g5_cpufreq_cpu_init,
377 .verify = g5_cpufreq_verify,
378 .target = g5_cpufreq_target,
379 .get = g5_cpufreq_get_speed,
380 .attr = g5_cpu_freqs_attr,
381};
382
383
384#ifdef CONFIG_PMAC_SMU
385
386static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
387{
388 struct device_node *cpunode;
389 unsigned int psize, ssize;
390 unsigned long max_freq;
391 char *freq_method, *volt_method;
392 const u32 *valp;
393 u32 pvr_hi;
394 int use_volts_vdnap = 0;
395 int use_volts_smu = 0;
396 int rc = -ENODEV;
397
398 /* Check supported platforms */
399 if (of_machine_is_compatible("PowerMac8,1") ||
400 of_machine_is_compatible("PowerMac8,2") ||
401 of_machine_is_compatible("PowerMac9,1"))
402 use_volts_smu = 1;
403 else if (of_machine_is_compatible("PowerMac11,2"))
404 use_volts_vdnap = 1;
405 else
406 return -ENODEV;
407
408 /* Get first CPU node */
409 for (cpunode = NULL;
410 (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) {
411 const u32 *reg = of_get_property(cpunode, "reg", NULL);
412 if (reg == NULL || (*reg) != 0)
413 continue;
414 if (!strcmp(cpunode->type, "cpu"))
415 break;
416 }
417 if (cpunode == NULL) {
418 printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n");
419 return -ENODEV;
420 }
421
422 /* Check 970FX for now */
423 valp = of_get_property(cpunode, "cpu-version", NULL);
424 if (!valp) {
425 DBG("No cpu-version property !\n");
426 goto bail_noprops;
427 }
428 pvr_hi = (*valp) >> 16;
429 if (pvr_hi != 0x3c && pvr_hi != 0x44) {
430 printk(KERN_ERR "cpufreq: Unsupported CPU version\n");
431 goto bail_noprops;
432 }
433
434 /* Look for the powertune data in the device-tree */
435 g5_pmode_data = of_get_property(cpunode, "power-mode-data",&psize);
436 if (!g5_pmode_data) {
437 DBG("No power-mode-data !\n");
438 goto bail_noprops;
439 }
440 g5_pmode_max = psize / sizeof(u32) - 1;
441
442 if (use_volts_smu) {
443 const struct smu_sdbp_header *shdr;
444
445 /* Look for the FVT table */
446 shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL);
447 if (!shdr)
448 goto bail_noprops;
449 g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1];
450 ssize = (shdr->len * sizeof(u32)) -
451 sizeof(struct smu_sdbp_header);
452 g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt);
453 g5_fvt_cur = 0;
454
455 /* Sanity checking */
456 if (g5_fvt_count < 1 || g5_pmode_max < 1)
457 goto bail_noprops;
458
459 g5_switch_volt = g5_smu_switch_volt;
460 volt_method = "SMU";
461 } else if (use_volts_vdnap) {
462 struct device_node *root;
463
464 root = of_find_node_by_path("/");
465 if (root == NULL) {
466 printk(KERN_ERR "cpufreq: Can't find root of "
467 "device tree\n");
468 goto bail_noprops;
469 }
470 pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0");
471 pfunc_vdnap0_complete =
472 pmf_find_function(root, "slewing-done");
473 if (pfunc_set_vdnap0 == NULL ||
474 pfunc_vdnap0_complete == NULL) {
475 printk(KERN_ERR "cpufreq: Can't find required "
476 "platform function\n");
477 goto bail_noprops;
478 }
479
480 g5_switch_volt = g5_vdnap_switch_volt;
481 volt_method = "GPIO";
482 } else {
483 g5_switch_volt = g5_dummy_switch_volt;
484 volt_method = "none";
485 }
486
487 /*
488 * From what I see, clock-frequency is always the maximal frequency.
489 * The current driver can not slew sysclk yet, so we really only deal
490 * with powertune steps for now. We also only implement full freq and
491 * half freq in this version. So far, I haven't yet seen a machine
492 * supporting anything else.
493 */
494 valp = of_get_property(cpunode, "clock-frequency", NULL);
495 if (!valp)
496 return -ENODEV;
497 max_freq = (*valp)/1000;
498 g5_cpu_freqs[0].frequency = max_freq;
499 g5_cpu_freqs[1].frequency = max_freq/2;
500
501 /* Set callbacks */
502 transition_latency = 12000;
503 g5_switch_freq = g5_scom_switch_freq;
504 g5_query_freq = g5_scom_query_freq;
505 freq_method = "SCOM";
506
507 /* Force apply current frequency to make sure everything is in
508 * sync (voltage is right for example). Firmware may leave us with
509 * a strange setting ...
510 */
511 g5_switch_volt(CPUFREQ_HIGH);
512 msleep(10);
513 g5_pmode_cur = -1;
514 g5_switch_freq(g5_query_freq());
515
516 printk(KERN_INFO "Registering G5 CPU frequency driver\n");
517 printk(KERN_INFO "Frequency method: %s, Voltage method: %s\n",
518 freq_method, volt_method);
519 printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
520 g5_cpu_freqs[1].frequency/1000,
521 g5_cpu_freqs[0].frequency/1000,
522 g5_cpu_freqs[g5_pmode_cur].frequency/1000);
523
524 rc = cpufreq_register_driver(&g5_cpufreq_driver);
525
526 /* We keep the CPU node on hold... hopefully, Apple G5 don't have
527 * hotplug CPU with a dynamic device-tree ...
528 */
529 return rc;
530
531 bail_noprops:
532 of_node_put(cpunode);
533
534 return rc;
535}
536
537#endif /* CONFIG_PMAC_SMU */
538
539
540static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
541{
542 struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL;
543 const u8 *eeprom = NULL;
544 const u32 *valp;
545 u64 max_freq, min_freq, ih, il;
546 int has_volt = 1, rc = 0;
547
548 DBG("cpufreq: Initializing for PowerMac7,2, PowerMac7,3 and"
549 " RackMac3,1...\n");
550
551 /* Get first CPU node */
552 for (cpunode = NULL;
553 (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) {
554 if (!strcmp(cpunode->type, "cpu"))
555 break;
556 }
557 if (cpunode == NULL) {
558 printk(KERN_ERR "cpufreq: Can't find any CPU node\n");
559 return -ENODEV;
560 }
561
562 /* Lookup the cpuid eeprom node */
563 cpuid = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/cpuid@a0");
564 if (cpuid != NULL)
565 eeprom = of_get_property(cpuid, "cpuid", NULL);
566 if (eeprom == NULL) {
567 printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n");
568 rc = -ENODEV;
569 goto bail;
570 }
571
572 /* Lookup the i2c hwclock */
573 for (hwclock = NULL;
574 (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){
575 const char *loc = of_get_property(hwclock,
576 "hwctrl-location", NULL);
577 if (loc == NULL)
578 continue;
579 if (strcmp(loc, "CPU CLOCK"))
580 continue;
581 if (!of_get_property(hwclock, "platform-get-frequency", NULL))
582 continue;
583 break;
584 }
585 if (hwclock == NULL) {
586 printk(KERN_ERR "cpufreq: Can't find i2c clock chip !\n");
587 rc = -ENODEV;
588 goto bail;
589 }
590
591 DBG("cpufreq: i2c clock chip found: %s\n", hwclock->full_name);
592
593 /* Now get all the platform functions */
594 pfunc_cpu_getfreq =
595 pmf_find_function(hwclock, "get-frequency");
596 pfunc_cpu_setfreq_high =
597 pmf_find_function(hwclock, "set-frequency-high");
598 pfunc_cpu_setfreq_low =
599 pmf_find_function(hwclock, "set-frequency-low");
600 pfunc_slewing_done =
601 pmf_find_function(hwclock, "slewing-done");
602 pfunc_cpu0_volt_high =
603 pmf_find_function(hwclock, "set-voltage-high-0");
604 pfunc_cpu0_volt_low =
605 pmf_find_function(hwclock, "set-voltage-low-0");
606 pfunc_cpu1_volt_high =
607 pmf_find_function(hwclock, "set-voltage-high-1");
608 pfunc_cpu1_volt_low =
609 pmf_find_function(hwclock, "set-voltage-low-1");
610
611 /* Check we have minimum requirements */
612 if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL ||
613 pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) {
614 printk(KERN_ERR "cpufreq: Can't find platform functions !\n");
615 rc = -ENODEV;
616 goto bail;
617 }
618
619 /* Check that we have complete sets */
620 if (pfunc_cpu0_volt_high == NULL || pfunc_cpu0_volt_low == NULL) {
621 pmf_put_function(pfunc_cpu0_volt_high);
622 pmf_put_function(pfunc_cpu0_volt_low);
623 pfunc_cpu0_volt_high = pfunc_cpu0_volt_low = NULL;
624 has_volt = 0;
625 }
626 if (!has_volt ||
627 pfunc_cpu1_volt_high == NULL || pfunc_cpu1_volt_low == NULL) {
628 pmf_put_function(pfunc_cpu1_volt_high);
629 pmf_put_function(pfunc_cpu1_volt_low);
630 pfunc_cpu1_volt_high = pfunc_cpu1_volt_low = NULL;
631 }
632
633 /* Note: The device tree also contains a "platform-set-values"
634 * function for which I haven't quite figured out the usage. It
635 * might have to be called on init and/or wakeup, I'm not too sure
636 * but things seem to work fine without it so far ...
637 */
638
639 /* Get max frequency from device-tree */
640 valp = of_get_property(cpunode, "clock-frequency", NULL);
641 if (!valp) {
642 printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n");
643 rc = -ENODEV;
644 goto bail;
645 }
646
647 max_freq = (*valp)/1000;
648
649 /* Now calculate reduced frequency by using the cpuid input freq
650 * ratio. This requires 64 bits math unless we are willing to lose
651 * some precision
652 */
653 ih = *((u32 *)(eeprom + 0x10));
654 il = *((u32 *)(eeprom + 0x20));
655
656 /* Check for machines with no useful settings */
657 if (il == ih) {
658 printk(KERN_WARNING "cpufreq: No low frequency mode available"
659 " on this model !\n");
660 rc = -ENODEV;
661 goto bail;
662 }
663
664 min_freq = 0;
665 if (ih != 0 && il != 0)
666 min_freq = (max_freq * il) / ih;
667
668 /* Sanity check */
669 if (min_freq >= max_freq || min_freq < 1000) {
670 printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n");
671 rc = -ENXIO;
672 goto bail;
673 }
674 g5_cpu_freqs[0].frequency = max_freq;
675 g5_cpu_freqs[1].frequency = min_freq;
676
677 /* Set callbacks */
678 transition_latency = CPUFREQ_ETERNAL;
679 g5_switch_volt = g5_pfunc_switch_volt;
680 g5_switch_freq = g5_pfunc_switch_freq;
681 g5_query_freq = g5_pfunc_query_freq;
682
683 /* Force apply current frequency to make sure everything is in
684 * sync (voltage is right for example). Firmware may leave us with
685 * a strange setting ...
686 */
687 g5_switch_volt(CPUFREQ_HIGH);
688 msleep(10);
689 g5_pmode_cur = -1;
690 g5_switch_freq(g5_query_freq());
691
692 printk(KERN_INFO "Registering G5 CPU frequency driver\n");
693 printk(KERN_INFO "Frequency method: i2c/pfunc, "
694 "Voltage method: %s\n", has_volt ? "i2c/pfunc" : "none");
695 printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
696 g5_cpu_freqs[1].frequency/1000,
697 g5_cpu_freqs[0].frequency/1000,
698 g5_cpu_freqs[g5_pmode_cur].frequency/1000);
699
700 rc = cpufreq_register_driver(&g5_cpufreq_driver);
701 bail:
702 if (rc != 0) {
703 pmf_put_function(pfunc_cpu_getfreq);
704 pmf_put_function(pfunc_cpu_setfreq_high);
705 pmf_put_function(pfunc_cpu_setfreq_low);
706 pmf_put_function(pfunc_slewing_done);
707 pmf_put_function(pfunc_cpu0_volt_high);
708 pmf_put_function(pfunc_cpu0_volt_low);
709 pmf_put_function(pfunc_cpu1_volt_high);
710 pmf_put_function(pfunc_cpu1_volt_low);
711 }
712 of_node_put(hwclock);
713 of_node_put(cpuid);
714 of_node_put(cpunode);
715
716 return rc;
717}
718
719static int __init g5_cpufreq_init(void)
720{
721 struct device_node *cpus;
722 int rc = 0;
723
724 cpus = of_find_node_by_path("/cpus");
725 if (cpus == NULL) {
726 DBG("No /cpus node !\n");
727 return -ENODEV;
728 }
729
730 if (of_machine_is_compatible("PowerMac7,2") ||
731 of_machine_is_compatible("PowerMac7,3") ||
732 of_machine_is_compatible("RackMac3,1"))
733 rc = g5_pm72_cpufreq_init(cpus);
734#ifdef CONFIG_PMAC_SMU
735 else
736 rc = g5_neo2_cpufreq_init(cpus);
737#endif /* CONFIG_PMAC_SMU */
738
739 of_node_put(cpus);
740 return rc;
741}
742
743module_init(g5_cpufreq_init);
744
745
746MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index bdb738a69e41..5cbd4d67d5c4 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -192,7 +192,7 @@ static int psurge_secondary_ipi_init(void)
192{ 192{
193 int rc = -ENOMEM; 193 int rc = -ENOMEM;
194 194
195 psurge_host = irq_domain_add_nomap(NULL, 0, &psurge_host_ops, NULL); 195 psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL);
196 196
197 if (psurge_host) 197 if (psurge_host)
198 psurge_secondary_virq = irq_create_direct_mapping(psurge_host); 198 psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
@@ -885,7 +885,7 @@ static int smp_core99_cpu_notify(struct notifier_block *self,
885 return NOTIFY_OK; 885 return NOTIFY_OK;
886} 886}
887 887
888static struct notifier_block __cpuinitdata smp_core99_cpu_nb = { 888static struct notifier_block smp_core99_cpu_nb = {
889 .notifier_call = smp_core99_cpu_notify, 889 .notifier_call = smp_core99_cpu_notify,
890}; 890};
891#endif /* CONFIG_HOTPLUG_CPU */ 891#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index bcc3cb48a44e..7fe595152478 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -3,3 +3,4 @@ obj-y += opal-rtc.o opal-nvram.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
5obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o 5obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
6obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
new file mode 100644
index 000000000000..0cd1c4a71755
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -0,0 +1,916 @@
1/*
2 * The file intends to implement the functions needed by EEH, which is
3 * built on IODA compliant chip. Actually, lots of functions related
4 * to EEH would be built based on the OPAL APIs.
5 *
6 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/bootmem.h>
15#include <linux/debugfs.h>
16#include <linux/delay.h>
17#include <linux/init.h>
18#include <linux/io.h>
19#include <linux/irq.h>
20#include <linux/kernel.h>
21#include <linux/msi.h>
22#include <linux/notifier.h>
23#include <linux/pci.h>
24#include <linux/string.h>
25
26#include <asm/eeh.h>
27#include <asm/eeh_event.h>
28#include <asm/io.h>
29#include <asm/iommu.h>
30#include <asm/msi_bitmap.h>
31#include <asm/opal.h>
32#include <asm/pci-bridge.h>
33#include <asm/ppc-pci.h>
34#include <asm/tce.h>
35
36#include "powernv.h"
37#include "pci.h"
38
39/* Debugging option */
40#ifdef IODA_EEH_DBG_ON
41#define IODA_EEH_DBG(args...) pr_info(args)
42#else
43#define IODA_EEH_DBG(args...)
44#endif
45
46static char *hub_diag = NULL;
47static int ioda_eeh_nb_init = 0;
48
49static int ioda_eeh_event(struct notifier_block *nb,
50 unsigned long events, void *change)
51{
52 uint64_t changed_evts = (uint64_t)change;
53
54 /* We simply send special EEH event */
55 if ((changed_evts & OPAL_EVENT_PCI_ERROR) &&
56 (events & OPAL_EVENT_PCI_ERROR))
57 eeh_send_failure_event(NULL);
58
59 return 0;
60}
61
62static struct notifier_block ioda_eeh_nb = {
63 .notifier_call = ioda_eeh_event,
64 .next = NULL,
65 .priority = 0
66};
67
68#ifdef CONFIG_DEBUG_FS
69static int ioda_eeh_dbgfs_set(void *data, u64 val)
70{
71 struct pci_controller *hose = data;
72 struct pnv_phb *phb = hose->private_data;
73
74 out_be64(phb->regs + 0xD10, val);
75 return 0;
76}
77
78static int ioda_eeh_dbgfs_get(void *data, u64 *val)
79{
80 struct pci_controller *hose = data;
81 struct pnv_phb *phb = hose->private_data;
82
83 *val = in_be64(phb->regs + 0xD10);
84 return 0;
85}
86
87DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_dbgfs_ops, ioda_eeh_dbgfs_get,
88 ioda_eeh_dbgfs_set, "0x%llx\n");
89#endif /* CONFIG_DEBUG_FS */
90
91/**
92 * ioda_eeh_post_init - Chip dependent post initialization
93 * @hose: PCI controller
94 *
95 * The function will be called after eeh PEs and devices
96 * have been built. That means the EEH is ready to supply
97 * service with I/O cache.
98 */
99static int ioda_eeh_post_init(struct pci_controller *hose)
100{
101 struct pnv_phb *phb = hose->private_data;
102 int ret;
103
104 /* Register OPAL event notifier */
105 if (!ioda_eeh_nb_init) {
106 ret = opal_notifier_register(&ioda_eeh_nb);
107 if (ret) {
108 pr_err("%s: Can't register OPAL event notifier (%d)\n",
109 __func__, ret);
110 return ret;
111 }
112
113 ioda_eeh_nb_init = 1;
114 }
115
116 /* FIXME: Enable it for PHB3 later */
117 if (phb->type == PNV_PHB_IODA1) {
118 if (!hub_diag) {
119 hub_diag = (char *)__get_free_page(GFP_KERNEL |
120 __GFP_ZERO);
121 if (!hub_diag) {
122 pr_err("%s: Out of memory !\n",
123 __func__);
124 return -ENOMEM;
125 }
126 }
127
128#ifdef CONFIG_DEBUG_FS
129 if (phb->dbgfs)
130 debugfs_create_file("err_injct", 0600,
131 phb->dbgfs, hose,
132 &ioda_eeh_dbgfs_ops);
133#endif
134
135 phb->eeh_state |= PNV_EEH_STATE_ENABLED;
136 }
137
138 return 0;
139}
140
141/**
142 * ioda_eeh_set_option - Set EEH operation or I/O setting
143 * @pe: EEH PE
144 * @option: options
145 *
146 * Enable or disable EEH option for the indicated PE. The
147 * function also can be used to enable I/O or DMA for the
148 * PE.
149 */
150static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
151{
152 s64 ret;
153 u32 pe_no;
154 struct pci_controller *hose = pe->phb;
155 struct pnv_phb *phb = hose->private_data;
156
157 /* Check on PE number */
158 if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
159 pr_err("%s: PE address %x out of range [0, %x] "
160 "on PHB#%x\n",
161 __func__, pe->addr, phb->ioda.total_pe,
162 hose->global_number);
163 return -EINVAL;
164 }
165
166 pe_no = pe->addr;
167 switch (option) {
168 case EEH_OPT_DISABLE:
169 ret = -EEXIST;
170 break;
171 case EEH_OPT_ENABLE:
172 ret = 0;
173 break;
174 case EEH_OPT_THAW_MMIO:
175 ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
176 OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO);
177 if (ret) {
178 pr_warning("%s: Failed to enable MMIO for "
179 "PHB#%x-PE#%x, err=%lld\n",
180 __func__, hose->global_number, pe_no, ret);
181 return -EIO;
182 }
183
184 break;
185 case EEH_OPT_THAW_DMA:
186 ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
187 OPAL_EEH_ACTION_CLEAR_FREEZE_DMA);
188 if (ret) {
189 pr_warning("%s: Failed to enable DMA for "
190 "PHB#%x-PE#%x, err=%lld\n",
191 __func__, hose->global_number, pe_no, ret);
192 return -EIO;
193 }
194
195 break;
196 default:
197 pr_warning("%s: Invalid option %d\n", __func__, option);
198 return -EINVAL;
199 }
200
201 return ret;
202}
203
204/**
205 * ioda_eeh_get_state - Retrieve the state of PE
206 * @pe: EEH PE
207 *
208 * The PE's state should be retrieved from the PEEV, PEST
209 * IODA tables. Since the OPAL has exported the function
210 * to do it, it'd better to use that.
211 */
212static int ioda_eeh_get_state(struct eeh_pe *pe)
213{
214 s64 ret = 0;
215 u8 fstate;
216 u16 pcierr;
217 u32 pe_no;
218 int result;
219 struct pci_controller *hose = pe->phb;
220 struct pnv_phb *phb = hose->private_data;
221
222 /*
223 * Sanity check on PE address. The PHB PE address should
224 * be zero.
225 */
226 if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
227 pr_err("%s: PE address %x out of range [0, %x] "
228 "on PHB#%x\n",
229 __func__, pe->addr, phb->ioda.total_pe,
230 hose->global_number);
231 return EEH_STATE_NOT_SUPPORT;
232 }
233
234 /* Retrieve PE status through OPAL */
235 pe_no = pe->addr;
236 ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
237 &fstate, &pcierr, NULL);
238 if (ret) {
239 pr_err("%s: Failed to get EEH status on "
240 "PHB#%x-PE#%x\n, err=%lld\n",
241 __func__, hose->global_number, pe_no, ret);
242 return EEH_STATE_NOT_SUPPORT;
243 }
244
245 /* Check PHB status */
246 if (pe->type & EEH_PE_PHB) {
247 result = 0;
248 result &= ~EEH_STATE_RESET_ACTIVE;
249
250 if (pcierr != OPAL_EEH_PHB_ERROR) {
251 result |= EEH_STATE_MMIO_ACTIVE;
252 result |= EEH_STATE_DMA_ACTIVE;
253 result |= EEH_STATE_MMIO_ENABLED;
254 result |= EEH_STATE_DMA_ENABLED;
255 }
256
257 return result;
258 }
259
260 /* Parse result out */
261 result = 0;
262 switch (fstate) {
263 case OPAL_EEH_STOPPED_NOT_FROZEN:
264 result &= ~EEH_STATE_RESET_ACTIVE;
265 result |= EEH_STATE_MMIO_ACTIVE;
266 result |= EEH_STATE_DMA_ACTIVE;
267 result |= EEH_STATE_MMIO_ENABLED;
268 result |= EEH_STATE_DMA_ENABLED;
269 break;
270 case OPAL_EEH_STOPPED_MMIO_FREEZE:
271 result &= ~EEH_STATE_RESET_ACTIVE;
272 result |= EEH_STATE_DMA_ACTIVE;
273 result |= EEH_STATE_DMA_ENABLED;
274 break;
275 case OPAL_EEH_STOPPED_DMA_FREEZE:
276 result &= ~EEH_STATE_RESET_ACTIVE;
277 result |= EEH_STATE_MMIO_ACTIVE;
278 result |= EEH_STATE_MMIO_ENABLED;
279 break;
280 case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
281 result &= ~EEH_STATE_RESET_ACTIVE;
282 break;
283 case OPAL_EEH_STOPPED_RESET:
284 result |= EEH_STATE_RESET_ACTIVE;
285 break;
286 case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
287 result |= EEH_STATE_UNAVAILABLE;
288 break;
289 case OPAL_EEH_STOPPED_PERM_UNAVAIL:
290 result |= EEH_STATE_NOT_SUPPORT;
291 break;
292 default:
293 pr_warning("%s: Unexpected EEH status 0x%x "
294 "on PHB#%x-PE#%x\n",
295 __func__, fstate, hose->global_number, pe_no);
296 }
297
298 return result;
299}
300
301static int ioda_eeh_pe_clear(struct eeh_pe *pe)
302{
303 struct pci_controller *hose;
304 struct pnv_phb *phb;
305 u32 pe_no;
306 u8 fstate;
307 u16 pcierr;
308 s64 ret;
309
310 pe_no = pe->addr;
311 hose = pe->phb;
312 phb = pe->phb->private_data;
313
314 /* Clear the EEH error on the PE */
315 ret = opal_pci_eeh_freeze_clear(phb->opal_id,
316 pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
317 if (ret) {
318 pr_err("%s: Failed to clear EEH error for "
319 "PHB#%x-PE#%x, err=%lld\n",
320 __func__, hose->global_number, pe_no, ret);
321 return -EIO;
322 }
323
324 /*
325 * Read the PE state back and verify that the frozen
326 * state has been removed.
327 */
328 ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
329 &fstate, &pcierr, NULL);
330 if (ret) {
331 pr_err("%s: Failed to get EEH status on "
332 "PHB#%x-PE#%x\n, err=%lld\n",
333 __func__, hose->global_number, pe_no, ret);
334 return -EIO;
335 }
336
337 if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) {
338 pr_err("%s: Frozen state not cleared on "
339 "PHB#%x-PE#%x, sts=%x\n",
340 __func__, hose->global_number, pe_no, fstate);
341 return -EIO;
342 }
343
344 return 0;
345}
346
347static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
348{
349 s64 rc = OPAL_HARDWARE;
350
351 while (1) {
352 rc = opal_pci_poll(phb->opal_id);
353 if (rc <= 0)
354 break;
355
356 msleep(rc);
357 }
358
359 return rc;
360}
361
362static int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
363{
364 struct pnv_phb *phb = hose->private_data;
365 s64 rc = OPAL_HARDWARE;
366
367 pr_debug("%s: Reset PHB#%x, option=%d\n",
368 __func__, hose->global_number, option);
369
370 /* Issue PHB complete reset request */
371 if (option == EEH_RESET_FUNDAMENTAL ||
372 option == EEH_RESET_HOT)
373 rc = opal_pci_reset(phb->opal_id,
374 OPAL_PHB_COMPLETE,
375 OPAL_ASSERT_RESET);
376 else if (option == EEH_RESET_DEACTIVATE)
377 rc = opal_pci_reset(phb->opal_id,
378 OPAL_PHB_COMPLETE,
379 OPAL_DEASSERT_RESET);
380 if (rc < 0)
381 goto out;
382
383 /*
384 * Poll state of the PHB until the request is done
385 * successfully.
386 */
387 rc = ioda_eeh_phb_poll(phb);
388out:
389 if (rc != OPAL_SUCCESS)
390 return -EIO;
391
392 return 0;
393}
394
395static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
396{
397 struct pnv_phb *phb = hose->private_data;
398 s64 rc = OPAL_SUCCESS;
399
400 pr_debug("%s: Reset PHB#%x, option=%d\n",
401 __func__, hose->global_number, option);
402
403 /*
404 * During the reset deassert time, we needn't care
405 * the reset scope because the firmware does nothing
406 * for fundamental or hot reset during deassert phase.
407 */
408 if (option == EEH_RESET_FUNDAMENTAL)
409 rc = opal_pci_reset(phb->opal_id,
410 OPAL_PCI_FUNDAMENTAL_RESET,
411 OPAL_ASSERT_RESET);
412 else if (option == EEH_RESET_HOT)
413 rc = opal_pci_reset(phb->opal_id,
414 OPAL_PCI_HOT_RESET,
415 OPAL_ASSERT_RESET);
416 else if (option == EEH_RESET_DEACTIVATE)
417 rc = opal_pci_reset(phb->opal_id,
418 OPAL_PCI_HOT_RESET,
419 OPAL_DEASSERT_RESET);
420 if (rc < 0)
421 goto out;
422
423 /* Poll state of the PHB until the request is done */
424 rc = ioda_eeh_phb_poll(phb);
425out:
426 if (rc != OPAL_SUCCESS)
427 return -EIO;
428
429 return 0;
430}
431
432static int ioda_eeh_bridge_reset(struct pci_controller *hose,
433 struct pci_dev *dev, int option)
434{
435 u16 ctrl;
436
437 pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n",
438 __func__, hose->global_number, dev->bus->number,
439 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option);
440
441 switch (option) {
442 case EEH_RESET_FUNDAMENTAL:
443 case EEH_RESET_HOT:
444 pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
445 ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
446 pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
447 break;
448 case EEH_RESET_DEACTIVATE:
449 pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
450 ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
451 pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
452 break;
453 }
454
455 return 0;
456}
457
458/**
459 * ioda_eeh_reset - Reset the indicated PE
460 * @pe: EEH PE
461 * @option: reset option
462 *
463 * Do reset on the indicated PE. For PCI bus sensitive PE,
464 * we need to reset the parent p2p bridge. The PHB has to
465 * be reinitialized if the p2p bridge is root bridge. For
466 * PCI device sensitive PE, we will try to reset the device
467 * through FLR. For now, we don't have OPAL APIs to do HARD
468 * reset yet, so all reset would be SOFT (HOT) reset.
469 */
470static int ioda_eeh_reset(struct eeh_pe *pe, int option)
471{
472 struct pci_controller *hose = pe->phb;
473 struct eeh_dev *edev;
474 struct pci_dev *dev;
475 int ret;
476
477 /*
478 * Anyway, we have to clear the problematic state for the
479 * corresponding PE. However, we needn't do it if the PE
480 * is PHB associated. That means the PHB is having fatal
481 * errors and it needs reset. Further more, the AIB interface
482 * isn't reliable any more.
483 */
484 if (!(pe->type & EEH_PE_PHB) &&
485 (option == EEH_RESET_HOT ||
486 option == EEH_RESET_FUNDAMENTAL)) {
487 ret = ioda_eeh_pe_clear(pe);
488 if (ret)
489 return -EIO;
490 }
491
492 /*
493 * The rules applied to reset, either fundamental or hot reset:
494 *
495 * We always reset the direct upstream bridge of the PE. If the
496 * direct upstream bridge isn't root bridge, we always take hot
497 * reset no matter what option (fundamental or hot) is. Otherwise,
498 * we should do the reset according to the required option.
499 */
500 if (pe->type & EEH_PE_PHB) {
501 ret = ioda_eeh_phb_reset(hose, option);
502 } else {
503 if (pe->type & EEH_PE_DEVICE) {
504 /*
505 * If it's device PE, we didn't refer to the parent
506 * PCI bus yet. So we have to figure it out indirectly.
507 */
508 edev = list_first_entry(&pe->edevs,
509 struct eeh_dev, list);
510 dev = eeh_dev_to_pci_dev(edev);
511 dev = dev->bus->self;
512 } else {
513 /*
514 * If it's bus PE, the parent PCI bus is already there
515 * and just pick it up.
516 */
517 dev = pe->bus->self;
518 }
519
520 /*
521 * Do reset based on the fact that the direct upstream bridge
522 * is root bridge (port) or not.
523 */
524 if (dev->bus->number == 0)
525 ret = ioda_eeh_root_reset(hose, option);
526 else
527 ret = ioda_eeh_bridge_reset(hose, dev, option);
528 }
529
530 return ret;
531}
532
533/**
534 * ioda_eeh_get_log - Retrieve error log
535 * @pe: EEH PE
536 * @severity: Severity level of the log
537 * @drv_log: buffer to store the log
538 * @len: space of the log buffer
539 *
540 * The function is used to retrieve error log from P7IOC.
541 */
542static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
543 char *drv_log, unsigned long len)
544{
545 s64 ret;
546 unsigned long flags;
547 struct pci_controller *hose = pe->phb;
548 struct pnv_phb *phb = hose->private_data;
549
550 spin_lock_irqsave(&phb->lock, flags);
551
552 ret = opal_pci_get_phb_diag_data2(phb->opal_id,
553 phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
554 if (ret) {
555 spin_unlock_irqrestore(&phb->lock, flags);
556 pr_warning("%s: Failed to get log for PHB#%x-PE#%x\n",
557 __func__, hose->global_number, pe->addr);
558 return -EIO;
559 }
560
561 /*
562 * FIXME: We probably need log the error in somewhere.
563 * Lets make it up in future.
564 */
565 /* pr_info("%s", phb->diag.blob); */
566
567 spin_unlock_irqrestore(&phb->lock, flags);
568
569 return 0;
570}
571
572/**
573 * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
574 * @pe: EEH PE
575 *
576 * For particular PE, it might have included PCI bridges. In order
577 * to make the PE work properly, those PCI bridges should be configured
578 * correctly. However, we need do nothing on P7IOC since the reset
579 * function will do everything that should be covered by the function.
580 */
581static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
582{
583 return 0;
584}
585
586static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
587{
588 /* GEM */
589 pr_info(" GEM XFIR: %016llx\n", data->gemXfir);
590 pr_info(" GEM RFIR: %016llx\n", data->gemRfir);
591 pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir);
592 pr_info(" GEM Mask: %016llx\n", data->gemMask);
593 pr_info(" GEM RWOF: %016llx\n", data->gemRwof);
594
595 /* LEM */
596 pr_info(" LEM FIR: %016llx\n", data->lemFir);
597 pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask);
598 pr_info(" LEM Action 0: %016llx\n", data->lemAction0);
599 pr_info(" LEM Action 1: %016llx\n", data->lemAction1);
600 pr_info(" LEM WOF: %016llx\n", data->lemWof);
601}
602
603static void ioda_eeh_hub_diag(struct pci_controller *hose)
604{
605 struct pnv_phb *phb = hose->private_data;
606 struct OpalIoP7IOCErrorData *data;
607 long rc;
608
609 data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag;
610 rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE);
611 if (rc != OPAL_SUCCESS) {
612 pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n",
613 __func__, phb->hub_id, rc);
614 return;
615 }
616
617 switch (data->type) {
618 case OPAL_P7IOC_DIAG_TYPE_RGC:
619 pr_info("P7IOC diag-data for RGC\n\n");
620 ioda_eeh_hub_diag_common(data);
621 pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus);
622 pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp);
623 break;
624 case OPAL_P7IOC_DIAG_TYPE_BI:
625 pr_info("P7IOC diag-data for BI %s\n\n",
626 data->bi.biDownbound ? "Downbound" : "Upbound");
627 ioda_eeh_hub_diag_common(data);
628 pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0);
629 pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1);
630 pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2);
631 pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus);
632 break;
633 case OPAL_P7IOC_DIAG_TYPE_CI:
634 pr_info("P7IOC diag-data for CI Port %d\\nn",
635 data->ci.ciPort);
636 ioda_eeh_hub_diag_common(data);
637 pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus);
638 pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp);
639 break;
640 case OPAL_P7IOC_DIAG_TYPE_MISC:
641 pr_info("P7IOC diag-data for MISC\n\n");
642 ioda_eeh_hub_diag_common(data);
643 break;
644 case OPAL_P7IOC_DIAG_TYPE_I2C:
645 pr_info("P7IOC diag-data for I2C\n\n");
646 ioda_eeh_hub_diag_common(data);
647 break;
648 default:
649 pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n",
650 __func__, phb->hub_id, data->type);
651 }
652}
653
654static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose,
655 struct OpalIoPhbErrorCommon *common)
656{
657 struct OpalIoP7IOCPhbErrorData *data;
658 int i;
659
660 data = (struct OpalIoP7IOCPhbErrorData *)common;
661
662 pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n\n",
663 hose->global_number, common->version);
664
665 pr_info(" brdgCtl: %08x\n", data->brdgCtl);
666
667 pr_info(" portStatusReg: %08x\n", data->portStatusReg);
668 pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus);
669 pr_info(" busAgentStatus: %08x\n", data->busAgentStatus);
670
671 pr_info(" deviceStatus: %08x\n", data->deviceStatus);
672 pr_info(" slotStatus: %08x\n", data->slotStatus);
673 pr_info(" linkStatus: %08x\n", data->linkStatus);
674 pr_info(" devCmdStatus: %08x\n", data->devCmdStatus);
675 pr_info(" devSecStatus: %08x\n", data->devSecStatus);
676
677 pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus);
678 pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus);
679 pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus);
680 pr_info(" tlpHdr1: %08x\n", data->tlpHdr1);
681 pr_info(" tlpHdr2: %08x\n", data->tlpHdr2);
682 pr_info(" tlpHdr3: %08x\n", data->tlpHdr3);
683 pr_info(" tlpHdr4: %08x\n", data->tlpHdr4);
684 pr_info(" sourceId: %08x\n", data->sourceId);
685
686 pr_info(" errorClass: %016llx\n", data->errorClass);
687 pr_info(" correlator: %016llx\n", data->correlator);
688 pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr);
689 pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr);
690 pr_info(" lemFir: %016llx\n", data->lemFir);
691 pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask);
692 pr_info(" lemWOF: %016llx\n", data->lemWOF);
693 pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus);
694 pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus);
695 pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0);
696 pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1);
697 pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus);
698 pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
699 pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0);
700 pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1);
701 pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus);
702 pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
703 pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0);
704 pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1);
705 pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus);
706 pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
707 pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0);
708 pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1);
709
710 for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
711 if ((data->pestA[i] >> 63) == 0 &&
712 (data->pestB[i] >> 63) == 0)
713 continue;
714
715 pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]);
716 pr_info(" PESTB: %016llx\n", data->pestB[i]);
717 }
718}
719
720static void ioda_eeh_phb_diag(struct pci_controller *hose)
721{
722 struct pnv_phb *phb = hose->private_data;
723 struct OpalIoPhbErrorCommon *common;
724 long rc;
725
726 common = (struct OpalIoPhbErrorCommon *)phb->diag.blob;
727 rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE);
728 if (rc != OPAL_SUCCESS) {
729 pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
730 __func__, hose->global_number, rc);
731 return;
732 }
733
734 switch (common->ioType) {
735 case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
736 ioda_eeh_p7ioc_phb_diag(hose, common);
737 break;
738 default:
739 pr_warning("%s: Unrecognized I/O chip %d\n",
740 __func__, common->ioType);
741 }
742}
743
744static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
745 struct eeh_pe **pe)
746{
747 struct eeh_pe *phb_pe;
748
749 phb_pe = eeh_phb_pe_get(hose);
750 if (!phb_pe) {
751 pr_warning("%s Can't find PE for PHB#%d\n",
752 __func__, hose->global_number);
753 return -EEXIST;
754 }
755
756 *pe = phb_pe;
757 return 0;
758}
759
760static int ioda_eeh_get_pe(struct pci_controller *hose,
761 u16 pe_no, struct eeh_pe **pe)
762{
763 struct eeh_pe *phb_pe, *dev_pe;
764 struct eeh_dev dev;
765
766 /* Find the PHB PE */
767 if (ioda_eeh_get_phb_pe(hose, &phb_pe))
768 return -EEXIST;
769
770 /* Find the PE according to PE# */
771 memset(&dev, 0, sizeof(struct eeh_dev));
772 dev.phb = hose;
773 dev.pe_config_addr = pe_no;
774 dev_pe = eeh_pe_get(&dev);
775 if (!dev_pe) {
776 pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n",
777 __func__, hose->global_number, pe_no);
778 return -EEXIST;
779 }
780
781 *pe = dev_pe;
782 return 0;
783}
784
785/**
786 * ioda_eeh_next_error - Retrieve next error for EEH core to handle
787 * @pe: The affected PE
788 *
789 * The function is expected to be called by EEH core while it gets
790 * special EEH event (without binding PE). The function calls to
791 * OPAL APIs for next error to handle. The informational error is
792 * handled internally by platform. However, the dead IOC, dead PHB,
793 * fenced PHB and frozen PE should be handled by EEH core eventually.
794 */
795static int ioda_eeh_next_error(struct eeh_pe **pe)
796{
797 struct pci_controller *hose, *tmp;
798 struct pnv_phb *phb;
799 u64 frozen_pe_no;
800 u16 err_type, severity;
801 long rc;
802 int ret = 1;
803
804 /*
805 * While running here, it's safe to purge the event queue.
806 * And we should keep the cached OPAL notifier event sychronized
807 * between the kernel and firmware.
808 */
809 eeh_remove_event(NULL);
810 opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
811
812 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
813 /*
814 * If the subordinate PCI buses of the PHB has been
815 * removed, we needn't take care of it any more.
816 */
817 phb = hose->private_data;
818 if (phb->eeh_state & PNV_EEH_STATE_REMOVED)
819 continue;
820
821 rc = opal_pci_next_error(phb->opal_id,
822 &frozen_pe_no, &err_type, &severity);
823
824 /* If OPAL API returns error, we needn't proceed */
825 if (rc != OPAL_SUCCESS) {
826 IODA_EEH_DBG("%s: Invalid return value on "
827 "PHB#%x (0x%lx) from opal_pci_next_error",
828 __func__, hose->global_number, rc);
829 continue;
830 }
831
832 /* If the PHB doesn't have error, stop processing */
833 if (err_type == OPAL_EEH_NO_ERROR ||
834 severity == OPAL_EEH_SEV_NO_ERROR) {
835 IODA_EEH_DBG("%s: No error found on PHB#%x\n",
836 __func__, hose->global_number);
837 continue;
838 }
839
840 /*
841 * Processing the error. We're expecting the error with
842 * highest priority reported upon multiple errors on the
843 * specific PHB.
844 */
845 IODA_EEH_DBG("%s: Error (%d, %d, %d) on PHB#%x\n",
846 err_type, severity, pe_no, hose->global_number);
847 switch (err_type) {
848 case OPAL_EEH_IOC_ERROR:
849 if (severity == OPAL_EEH_SEV_IOC_DEAD) {
850 list_for_each_entry_safe(hose, tmp,
851 &hose_list, list_node) {
852 phb = hose->private_data;
853 phb->eeh_state |= PNV_EEH_STATE_REMOVED;
854 }
855
856 pr_err("EEH: dead IOC detected\n");
857 ret = 4;
858 goto out;
859 } else if (severity == OPAL_EEH_SEV_INF) {
860 pr_info("EEH: IOC informative error "
861 "detected\n");
862 ioda_eeh_hub_diag(hose);
863 }
864
865 break;
866 case OPAL_EEH_PHB_ERROR:
867 if (severity == OPAL_EEH_SEV_PHB_DEAD) {
868 if (ioda_eeh_get_phb_pe(hose, pe))
869 break;
870
871 pr_err("EEH: dead PHB#%x detected\n",
872 hose->global_number);
873 phb->eeh_state |= PNV_EEH_STATE_REMOVED;
874 ret = 3;
875 goto out;
876 } else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
877 if (ioda_eeh_get_phb_pe(hose, pe))
878 break;
879
880 pr_err("EEH: fenced PHB#%x detected\n",
881 hose->global_number);
882 ret = 2;
883 goto out;
884 } else if (severity == OPAL_EEH_SEV_INF) {
885 pr_info("EEH: PHB#%x informative error "
886 "detected\n",
887 hose->global_number);
888 ioda_eeh_phb_diag(hose);
889 }
890
891 break;
892 case OPAL_EEH_PE_ERROR:
893 if (ioda_eeh_get_pe(hose, frozen_pe_no, pe))
894 break;
895
896 pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
897 (*pe)->addr, (*pe)->phb->global_number);
898 ret = 1;
899 goto out;
900 }
901 }
902
903 ret = 0;
904out:
905 return ret;
906}
907
908struct pnv_eeh_ops ioda_eeh_ops = {
909 .post_init = ioda_eeh_post_init,
910 .set_option = ioda_eeh_set_option,
911 .get_state = ioda_eeh_get_state,
912 .reset = ioda_eeh_reset,
913 .get_log = ioda_eeh_get_log,
914 .configure_bridge = ioda_eeh_configure_bridge,
915 .next_error = ioda_eeh_next_error
916};
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
new file mode 100644
index 000000000000..79663d26e6ea
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -0,0 +1,390 @@
1/*
2 * The file intends to implement the platform dependent EEH operations on
3 * powernv platform. Actually, the powernv was created in order to fully
4 * hypervisor support.
5 *
6 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/atomic.h>
15#include <linux/delay.h>
16#include <linux/export.h>
17#include <linux/init.h>
18#include <linux/list.h>
19#include <linux/msi.h>
20#include <linux/of.h>
21#include <linux/pci.h>
22#include <linux/proc_fs.h>
23#include <linux/rbtree.h>
24#include <linux/sched.h>
25#include <linux/seq_file.h>
26#include <linux/spinlock.h>
27
28#include <asm/eeh.h>
29#include <asm/eeh_event.h>
30#include <asm/firmware.h>
31#include <asm/io.h>
32#include <asm/iommu.h>
33#include <asm/machdep.h>
34#include <asm/msi_bitmap.h>
35#include <asm/opal.h>
36#include <asm/ppc-pci.h>
37
38#include "powernv.h"
39#include "pci.h"
40
41/**
42 * powernv_eeh_init - EEH platform dependent initialization
43 *
44 * EEH platform dependent initialization on powernv
45 */
46static int powernv_eeh_init(void)
47{
48 /* We require OPALv3 */
49 if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
50 pr_warning("%s: OPALv3 is required !\n", __func__);
51 return -EINVAL;
52 }
53
54 /* Set EEH probe mode */
55 eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
56
57 return 0;
58}
59
60/**
61 * powernv_eeh_post_init - EEH platform dependent post initialization
62 *
63 * EEH platform dependent post initialization on powernv. When
64 * the function is called, the EEH PEs and devices should have
65 * been built. If the I/O cache staff has been built, EEH is
66 * ready to supply service.
67 */
68static int powernv_eeh_post_init(void)
69{
70 struct pci_controller *hose;
71 struct pnv_phb *phb;
72 int ret = 0;
73
74 list_for_each_entry(hose, &hose_list, list_node) {
75 phb = hose->private_data;
76
77 if (phb->eeh_ops && phb->eeh_ops->post_init) {
78 ret = phb->eeh_ops->post_init(hose);
79 if (ret)
80 break;
81 }
82 }
83
84 return ret;
85}
86
87/**
88 * powernv_eeh_dev_probe - Do probe on PCI device
89 * @dev: PCI device
90 * @flag: unused
91 *
92 * When EEH module is installed during system boot, all PCI devices
93 * are checked one by one to see if it supports EEH. The function
94 * is introduced for the purpose. By default, EEH has been enabled
95 * on all PCI devices. That's to say, we only need do necessary
96 * initialization on the corresponding eeh device and create PE
97 * accordingly.
98 *
99 * It's notable that's unsafe to retrieve the EEH device through
100 * the corresponding PCI device. During the PCI device hotplug, which
101 * was possiblly triggered by EEH core, the binding between EEH device
102 * and the PCI device isn't built yet.
103 */
104static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
105{
106 struct pci_controller *hose = pci_bus_to_host(dev->bus);
107 struct pnv_phb *phb = hose->private_data;
108 struct device_node *dn = pci_device_to_OF_node(dev);
109 struct eeh_dev *edev = of_node_to_eeh_dev(dn);
110
111 /*
112 * When probing the root bridge, which doesn't have any
113 * subordinate PCI devices. We don't have OF node for
114 * the root bridge. So it's not reasonable to continue
115 * the probing.
116 */
117 if (!dn || !edev || edev->pe)
118 return 0;
119
120 /* Skip for PCI-ISA bridge */
121 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
122 return 0;
123
124 /* Initialize eeh device */
125 edev->class_code = dev->class;
126 edev->mode &= 0xFFFFFF00;
127 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
128 edev->mode |= EEH_DEV_BRIDGE;
129 if (pci_is_pcie(dev)) {
130 edev->pcie_cap = pci_pcie_cap(dev);
131
132 if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
133 edev->mode |= EEH_DEV_ROOT_PORT;
134 else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
135 edev->mode |= EEH_DEV_DS_PORT;
136 }
137
138 edev->config_addr = ((dev->bus->number << 8) | dev->devfn);
139 edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
140
141 /* Create PE */
142 eeh_add_to_parent_pe(edev);
143
144 /*
145 * Enable EEH explicitly so that we will do EEH check
146 * while accessing I/O stuff
147 *
148 * FIXME: Enable that for PHB3 later
149 */
150 if (phb->type == PNV_PHB_IODA1)
151 eeh_subsystem_enabled = 1;
152
153 /* Save memory bars */
154 eeh_save_bars(edev);
155
156 return 0;
157}
158
159/**
160 * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
161 * @pe: EEH PE
162 * @option: operation to be issued
163 *
164 * The function is used to control the EEH functionality globally.
165 * Currently, following options are support according to PAPR:
166 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
167 */
168static int powernv_eeh_set_option(struct eeh_pe *pe, int option)
169{
170 struct pci_controller *hose = pe->phb;
171 struct pnv_phb *phb = hose->private_data;
172 int ret = -EEXIST;
173
174 /*
175 * What we need do is pass it down for hardware
176 * implementation to handle it.
177 */
178 if (phb->eeh_ops && phb->eeh_ops->set_option)
179 ret = phb->eeh_ops->set_option(pe, option);
180
181 return ret;
182}
183
184/**
185 * powernv_eeh_get_pe_addr - Retrieve PE address
186 * @pe: EEH PE
187 *
188 * Retrieve the PE address according to the given tranditional
189 * PCI BDF (Bus/Device/Function) address.
190 */
191static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
192{
193 return pe->addr;
194}
195
196/**
197 * powernv_eeh_get_state - Retrieve PE state
198 * @pe: EEH PE
199 * @delay: delay while PE state is temporarily unavailable
200 *
201 * Retrieve the state of the specified PE. For IODA-compitable
202 * platform, it should be retrieved from IODA table. Therefore,
203 * we prefer passing down to hardware implementation to handle
204 * it.
205 */
206static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay)
207{
208 struct pci_controller *hose = pe->phb;
209 struct pnv_phb *phb = hose->private_data;
210 int ret = EEH_STATE_NOT_SUPPORT;
211
212 if (phb->eeh_ops && phb->eeh_ops->get_state) {
213 ret = phb->eeh_ops->get_state(pe);
214
215 /*
216 * If the PE state is temporarily unavailable,
217 * to inform the EEH core delay for default
218 * period (1 second)
219 */
220 if (delay) {
221 *delay = 0;
222 if (ret & EEH_STATE_UNAVAILABLE)
223 *delay = 1000;
224 }
225 }
226
227 return ret;
228}
229
230/**
231 * powernv_eeh_reset - Reset the specified PE
232 * @pe: EEH PE
233 * @option: reset option
234 *
235 * Reset the specified PE
236 */
237static int powernv_eeh_reset(struct eeh_pe *pe, int option)
238{
239 struct pci_controller *hose = pe->phb;
240 struct pnv_phb *phb = hose->private_data;
241 int ret = -EEXIST;
242
243 if (phb->eeh_ops && phb->eeh_ops->reset)
244 ret = phb->eeh_ops->reset(pe, option);
245
246 return ret;
247}
248
249/**
250 * powernv_eeh_wait_state - Wait for PE state
251 * @pe: EEH PE
252 * @max_wait: maximal period in microsecond
253 *
254 * Wait for the state of associated PE. It might take some time
255 * to retrieve the PE's state.
256 */
257static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
258{
259 int ret;
260 int mwait;
261
262 while (1) {
263 ret = powernv_eeh_get_state(pe, &mwait);
264
265 /*
266 * If the PE's state is temporarily unavailable,
267 * we have to wait for the specified time. Otherwise,
268 * the PE's state will be returned immediately.
269 */
270 if (ret != EEH_STATE_UNAVAILABLE)
271 return ret;
272
273 max_wait -= mwait;
274 if (max_wait <= 0) {
275 pr_warning("%s: Timeout getting PE#%x's state (%d)\n",
276 __func__, pe->addr, max_wait);
277 return EEH_STATE_NOT_SUPPORT;
278 }
279
280 msleep(mwait);
281 }
282
283 return EEH_STATE_NOT_SUPPORT;
284}
285
286/**
287 * powernv_eeh_get_log - Retrieve error log
288 * @pe: EEH PE
289 * @severity: temporary or permanent error log
290 * @drv_log: driver log to be combined with retrieved error log
291 * @len: length of driver log
292 *
293 * Retrieve the temporary or permanent error from the PE.
294 */
295static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
296 char *drv_log, unsigned long len)
297{
298 struct pci_controller *hose = pe->phb;
299 struct pnv_phb *phb = hose->private_data;
300 int ret = -EEXIST;
301
302 if (phb->eeh_ops && phb->eeh_ops->get_log)
303 ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
304
305 return ret;
306}
307
308/**
309 * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
310 * @pe: EEH PE
311 *
312 * The function will be called to reconfigure the bridges included
313 * in the specified PE so that the mulfunctional PE would be recovered
314 * again.
315 */
316static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
317{
318 struct pci_controller *hose = pe->phb;
319 struct pnv_phb *phb = hose->private_data;
320 int ret = 0;
321
322 if (phb->eeh_ops && phb->eeh_ops->configure_bridge)
323 ret = phb->eeh_ops->configure_bridge(pe);
324
325 return ret;
326}
327
328/**
329 * powernv_eeh_next_error - Retrieve next EEH error to handle
330 * @pe: Affected PE
331 *
332 * Using OPAL API, to retrieve next EEH error for EEH core to handle
333 */
334static int powernv_eeh_next_error(struct eeh_pe **pe)
335{
336 struct pci_controller *hose;
337 struct pnv_phb *phb = NULL;
338
339 list_for_each_entry(hose, &hose_list, list_node) {
340 phb = hose->private_data;
341 break;
342 }
343
344 if (phb && phb->eeh_ops->next_error)
345 return phb->eeh_ops->next_error(pe);
346
347 return -EEXIST;
348}
349
350static struct eeh_ops powernv_eeh_ops = {
351 .name = "powernv",
352 .init = powernv_eeh_init,
353 .post_init = powernv_eeh_post_init,
354 .of_probe = NULL,
355 .dev_probe = powernv_eeh_dev_probe,
356 .set_option = powernv_eeh_set_option,
357 .get_pe_addr = powernv_eeh_get_pe_addr,
358 .get_state = powernv_eeh_get_state,
359 .reset = powernv_eeh_reset,
360 .wait_state = powernv_eeh_wait_state,
361 .get_log = powernv_eeh_get_log,
362 .configure_bridge = powernv_eeh_configure_bridge,
363 .read_config = pnv_pci_cfg_read,
364 .write_config = pnv_pci_cfg_write,
365 .next_error = powernv_eeh_next_error
366};
367
368/**
369 * eeh_powernv_init - Register platform dependent EEH operations
370 *
371 * EEH initialization on powernv platform. This function should be
372 * called before any EEH related functions.
373 */
374static int __init eeh_powernv_init(void)
375{
376 int ret = -EINVAL;
377
378 if (!machine_is(powernv))
379 return ret;
380
381 ret = eeh_ops_register(&powernv_eeh_ops);
382 if (!ret)
383 pr_info("EEH: PowerNV platform initialized\n");
384 else
385 pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
386
387 return ret;
388}
389
390early_initcall(eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6fabe92eafb6..e88863ffb135 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -107,4 +107,7 @@ OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR);
107OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); 107OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS);
108OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); 108OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS);
109OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); 109OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED);
110OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR);
111OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL);
110OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI); 112OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI);
113OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 628c564ceadb..106301fd2fa5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -15,6 +15,7 @@
15#include <linux/of.h> 15#include <linux/of.h>
16#include <linux/of_platform.h> 16#include <linux/of_platform.h>
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/notifier.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
19#include <asm/opal.h> 20#include <asm/opal.h>
20#include <asm/firmware.h> 21#include <asm/firmware.h>
@@ -31,6 +32,10 @@ static DEFINE_SPINLOCK(opal_write_lock);
31extern u64 opal_mc_secondary_handler[]; 32extern u64 opal_mc_secondary_handler[];
32static unsigned int *opal_irqs; 33static unsigned int *opal_irqs;
33static unsigned int opal_irq_count; 34static unsigned int opal_irq_count;
35static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
36static DEFINE_SPINLOCK(opal_notifier_lock);
37static uint64_t last_notified_mask = 0x0ul;
38static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
34 39
35int __init early_init_dt_scan_opal(unsigned long node, 40int __init early_init_dt_scan_opal(unsigned long node,
36 const char *uname, int depth, void *data) 41 const char *uname, int depth, void *data)
@@ -95,6 +100,68 @@ static int __init opal_register_exception_handlers(void)
95 100
96early_initcall(opal_register_exception_handlers); 101early_initcall(opal_register_exception_handlers);
97 102
103int opal_notifier_register(struct notifier_block *nb)
104{
105 if (!nb) {
106 pr_warning("%s: Invalid argument (%p)\n",
107 __func__, nb);
108 return -EINVAL;
109 }
110
111 atomic_notifier_chain_register(&opal_notifier_head, nb);
112 return 0;
113}
114
115static void opal_do_notifier(uint64_t events)
116{
117 unsigned long flags;
118 uint64_t changed_mask;
119
120 if (atomic_read(&opal_notifier_hold))
121 return;
122
123 spin_lock_irqsave(&opal_notifier_lock, flags);
124 changed_mask = last_notified_mask ^ events;
125 last_notified_mask = events;
126 spin_unlock_irqrestore(&opal_notifier_lock, flags);
127
128 /*
129 * We feed with the event bits and changed bits for
130 * enough information to the callback.
131 */
132 atomic_notifier_call_chain(&opal_notifier_head,
133 events, (void *)changed_mask);
134}
135
136void opal_notifier_update_evt(uint64_t evt_mask,
137 uint64_t evt_val)
138{
139 unsigned long flags;
140
141 spin_lock_irqsave(&opal_notifier_lock, flags);
142 last_notified_mask &= ~evt_mask;
143 last_notified_mask |= evt_val;
144 spin_unlock_irqrestore(&opal_notifier_lock, flags);
145}
146
147void opal_notifier_enable(void)
148{
149 int64_t rc;
150 uint64_t evt = 0;
151
152 atomic_set(&opal_notifier_hold, 0);
153
154 /* Process pending events */
155 rc = opal_poll_events(&evt);
156 if (rc == OPAL_SUCCESS && evt)
157 opal_do_notifier(evt);
158}
159
160void opal_notifier_disable(void)
161{
162 atomic_set(&opal_notifier_hold, 1);
163}
164
98int opal_get_chars(uint32_t vtermno, char *buf, int count) 165int opal_get_chars(uint32_t vtermno, char *buf, int count)
99{ 166{
100 s64 len, rc; 167 s64 len, rc;
@@ -297,7 +364,7 @@ static irqreturn_t opal_interrupt(int irq, void *data)
297 364
298 opal_handle_interrupt(virq_to_hw(irq), &events); 365 opal_handle_interrupt(virq_to_hw(irq), &events);
299 366
300 /* XXX TODO: Do something with the events */ 367 opal_do_notifier(events);
301 368
302 return IRQ_HANDLED; 369 return IRQ_HANDLED;
303} 370}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9c9d15e4cdf2..d8140b125e62 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/pci.h> 15#include <linux/pci.h>
16#include <linux/debugfs.h>
16#include <linux/delay.h> 17#include <linux/delay.h>
17#include <linux/string.h> 18#include <linux/string.h>
18#include <linux/init.h> 19#include <linux/init.h>
@@ -32,6 +33,7 @@
32#include <asm/iommu.h> 33#include <asm/iommu.h>
33#include <asm/tce.h> 34#include <asm/tce.h>
34#include <asm/xics.h> 35#include <asm/xics.h>
36#include <asm/debug.h>
35 37
36#include "powernv.h" 38#include "powernv.h"
37#include "pci.h" 39#include "pci.h"
@@ -441,6 +443,17 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
441 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 443 set_iommu_table_base(&pdev->dev, &pe->tce32_table);
442} 444}
443 445
446static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
447{
448 struct pci_dev *dev;
449
450 list_for_each_entry(dev, &bus->devices, bus_list) {
451 set_iommu_table_base(&dev->dev, &pe->tce32_table);
452 if (dev->subordinate)
453 pnv_ioda_setup_bus_dma(pe, dev->subordinate);
454 }
455}
456
444static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, 457static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
445 u64 *startp, u64 *endp) 458 u64 *startp, u64 *endp)
446{ 459{
@@ -595,6 +608,12 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
595 TCE_PCI_SWINV_PAIR; 608 TCE_PCI_SWINV_PAIR;
596 } 609 }
597 iommu_init_table(tbl, phb->hose->node); 610 iommu_init_table(tbl, phb->hose->node);
611 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
612
613 if (pe->pdev)
614 set_iommu_table_base(&pe->pdev->dev, tbl);
615 else
616 pnv_ioda_setup_bus_dma(pe, pe->pbus);
598 617
599 return; 618 return;
600 fail: 619 fail:
@@ -667,6 +686,11 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
667 } 686 }
668 iommu_init_table(tbl, phb->hose->node); 687 iommu_init_table(tbl, phb->hose->node);
669 688
689 if (pe->pdev)
690 set_iommu_table_base(&pe->pdev->dev, tbl);
691 else
692 pnv_ioda_setup_bus_dma(pe, pe->pbus);
693
670 return; 694 return;
671fail: 695fail:
672 if (pe->tce32_seg >= 0) 696 if (pe->tce32_seg >= 0)
@@ -968,11 +992,38 @@ static void pnv_pci_ioda_setup_DMA(void)
968 } 992 }
969} 993}
970 994
995static void pnv_pci_ioda_create_dbgfs(void)
996{
997#ifdef CONFIG_DEBUG_FS
998 struct pci_controller *hose, *tmp;
999 struct pnv_phb *phb;
1000 char name[16];
1001
1002 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1003 phb = hose->private_data;
1004
1005 sprintf(name, "PCI%04x", hose->global_number);
1006 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
1007 if (!phb->dbgfs)
1008 pr_warning("%s: Error on creating debugfs on PHB#%x\n",
1009 __func__, hose->global_number);
1010 }
1011#endif /* CONFIG_DEBUG_FS */
1012}
1013
971static void pnv_pci_ioda_fixup(void) 1014static void pnv_pci_ioda_fixup(void)
972{ 1015{
973 pnv_pci_ioda_setup_PEs(); 1016 pnv_pci_ioda_setup_PEs();
974 pnv_pci_ioda_setup_seg(); 1017 pnv_pci_ioda_setup_seg();
975 pnv_pci_ioda_setup_DMA(); 1018 pnv_pci_ioda_setup_DMA();
1019
1020 pnv_pci_ioda_create_dbgfs();
1021
1022#ifdef CONFIG_EEH
1023 eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
1024 eeh_addr_cache_build();
1025 eeh_init();
1026#endif
976} 1027}
977 1028
978/* 1029/*
@@ -1049,7 +1100,8 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
1049 OPAL_ASSERT_RESET); 1100 OPAL_ASSERT_RESET);
1050} 1101}
1051 1102
1052void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) 1103void __init pnv_pci_init_ioda_phb(struct device_node *np,
1104 u64 hub_id, int ioda_type)
1053{ 1105{
1054 struct pci_controller *hose; 1106 struct pci_controller *hose;
1055 static int primary = 1; 1107 static int primary = 1;
@@ -1087,6 +1139,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
1087 hose->first_busno = 0; 1139 hose->first_busno = 0;
1088 hose->last_busno = 0xff; 1140 hose->last_busno = 0xff;
1089 hose->private_data = phb; 1141 hose->private_data = phb;
1142 phb->hub_id = hub_id;
1090 phb->opal_id = phb_id; 1143 phb->opal_id = phb_id;
1091 phb->type = ioda_type; 1144 phb->type = ioda_type;
1092 1145
@@ -1172,6 +1225,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
1172 phb->ioda.io_size, phb->ioda.io_segsize); 1225 phb->ioda.io_size, phb->ioda.io_segsize);
1173 1226
1174 phb->hose->ops = &pnv_pci_ops; 1227 phb->hose->ops = &pnv_pci_ops;
1228#ifdef CONFIG_EEH
1229 phb->eeh_ops = &ioda_eeh_ops;
1230#endif
1175 1231
1176 /* Setup RID -> PE mapping function */ 1232 /* Setup RID -> PE mapping function */
1177 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 1233 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
@@ -1210,9 +1266,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
1210 opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE); 1266 opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
1211} 1267}
1212 1268
1213void pnv_pci_init_ioda2_phb(struct device_node *np) 1269void __init pnv_pci_init_ioda2_phb(struct device_node *np)
1214{ 1270{
1215 pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2); 1271 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
1216} 1272}
1217 1273
1218void __init pnv_pci_init_ioda_hub(struct device_node *np) 1274void __init pnv_pci_init_ioda_hub(struct device_node *np)
@@ -1235,6 +1291,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
1235 for_each_child_of_node(np, phbn) { 1291 for_each_child_of_node(np, phbn) {
1236 /* Look for IODA1 PHBs */ 1292 /* Look for IODA1 PHBs */
1237 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 1293 if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1238 pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1); 1294 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
1239 } 1295 }
1240} 1296}
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 92b37a0186c9..b68db6325c1b 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -86,13 +86,16 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
86static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, 86static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
87 struct pci_dev *pdev) 87 struct pci_dev *pdev)
88{ 88{
89 if (phb->p5ioc2.iommu_table.it_map == NULL) 89 if (phb->p5ioc2.iommu_table.it_map == NULL) {
90 iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); 90 iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
91 iommu_register_group(&phb->p5ioc2.iommu_table,
92 pci_domain_nr(phb->hose->bus), phb->opal_id);
93 }
91 94
92 set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); 95 set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table);
93} 96}
94 97
95static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, 98static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
96 void *tce_mem, u64 tce_size) 99 void *tce_mem, u64 tce_size)
97{ 100{
98 struct pnv_phb *phb; 101 struct pnv_phb *phb;
@@ -133,6 +136,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
133 phb->hose->first_busno = 0; 136 phb->hose->first_busno = 0;
134 phb->hose->last_busno = 0xff; 137 phb->hose->last_busno = 0xff;
135 phb->hose->private_data = phb; 138 phb->hose->private_data = phb;
139 phb->hub_id = hub_id;
136 phb->opal_id = phb_id; 140 phb->opal_id = phb_id;
137 phb->type = PNV_PHB_P5IOC2; 141 phb->type = PNV_PHB_P5IOC2;
138 phb->model = PNV_PHB_MODEL_P5IOC2; 142 phb->model = PNV_PHB_MODEL_P5IOC2;
@@ -226,7 +230,8 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
226 for_each_child_of_node(np, phbn) { 230 for_each_child_of_node(np, phbn) {
227 if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || 231 if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
228 of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) { 232 of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
229 pnv_pci_init_p5ioc2_phb(phbn, tce_mem, tce_per_phb); 233 pnv_pci_init_p5ioc2_phb(phbn, hub_id,
234 tce_mem, tce_per_phb);
230 tce_mem += tce_per_phb; 235 tce_mem += tce_per_phb;
231 } 236 }
232 } 237 }
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 277343cc6a3d..a28d3b5e6393 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -20,6 +20,7 @@
20#include <linux/irq.h> 20#include <linux/irq.h>
21#include <linux/io.h> 21#include <linux/io.h>
22#include <linux/msi.h> 22#include <linux/msi.h>
23#include <linux/iommu.h>
23 24
24#include <asm/sections.h> 25#include <asm/sections.h>
25#include <asm/io.h> 26#include <asm/io.h>
@@ -32,6 +33,8 @@
32#include <asm/iommu.h> 33#include <asm/iommu.h>
33#include <asm/tce.h> 34#include <asm/tce.h>
34#include <asm/firmware.h> 35#include <asm/firmware.h>
36#include <asm/eeh_event.h>
37#include <asm/eeh.h>
35 38
36#include "powernv.h" 39#include "powernv.h"
37#include "pci.h" 40#include "pci.h"
@@ -202,7 +205,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
202 205
203 spin_lock_irqsave(&phb->lock, flags); 206 spin_lock_irqsave(&phb->lock, flags);
204 207
205 rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); 208 rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
209 PNV_PCI_DIAG_BUF_SIZE);
206 has_diag = (rc == OPAL_SUCCESS); 210 has_diag = (rc == OPAL_SUCCESS);
207 211
208 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, 212 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
@@ -227,43 +231,50 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
227 spin_unlock_irqrestore(&phb->lock, flags); 231 spin_unlock_irqrestore(&phb->lock, flags);
228} 232}
229 233
230static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, 234static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
231 u32 bdfn) 235 struct device_node *dn)
232{ 236{
233 s64 rc; 237 s64 rc;
234 u8 fstate; 238 u8 fstate;
235 u16 pcierr; 239 u16 pcierr;
236 u32 pe_no; 240 u32 pe_no;
237 241
238 /* Get PE# if we support IODA */ 242 /*
239 pe_no = phb->bdfn_to_pe ? phb->bdfn_to_pe(phb, bus, bdfn & 0xff) : 0; 243 * Get the PE#. During the PCI probe stage, we might not
244 * setup that yet. So all ER errors should be mapped to
245 * PE#0
246 */
247 pe_no = PCI_DN(dn)->pe_number;
248 if (pe_no == IODA_INVALID_PE)
249 pe_no = 0;
240 250
241 /* Read freeze status */ 251 /* Read freeze status */
242 rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr, 252 rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr,
243 NULL); 253 NULL);
244 if (rc) { 254 if (rc) {
245 pr_warning("PCI %d: Failed to read EEH status for PE#%d," 255 pr_warning("%s: Can't read EEH status (PE#%d) for "
246 " err %lld\n", phb->hose->global_number, pe_no, rc); 256 "%s, err %lld\n",
257 __func__, pe_no, dn->full_name, rc);
247 return; 258 return;
248 } 259 }
249 cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n", 260 cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
250 bdfn, pe_no, fstate); 261 (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
262 pe_no, fstate);
251 if (fstate != 0) 263 if (fstate != 0)
252 pnv_pci_handle_eeh_config(phb, pe_no); 264 pnv_pci_handle_eeh_config(phb, pe_no);
253} 265}
254 266
255static int pnv_pci_read_config(struct pci_bus *bus, 267int pnv_pci_cfg_read(struct device_node *dn,
256 unsigned int devfn, 268 int where, int size, u32 *val)
257 int where, int size, u32 *val)
258{ 269{
259 struct pci_controller *hose = pci_bus_to_host(bus); 270 struct pci_dn *pdn = PCI_DN(dn);
260 struct pnv_phb *phb = hose->private_data; 271 struct pnv_phb *phb = pdn->phb->private_data;
261 u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; 272 u32 bdfn = (pdn->busno << 8) | pdn->devfn;
273#ifdef CONFIG_EEH
274 struct eeh_pe *phb_pe = NULL;
275#endif
262 s64 rc; 276 s64 rc;
263 277
264 if (hose == NULL)
265 return PCIBIOS_DEVICE_NOT_FOUND;
266
267 switch (size) { 278 switch (size) {
268 case 1: { 279 case 1: {
269 u8 v8; 280 u8 v8;
@@ -287,28 +298,43 @@ static int pnv_pci_read_config(struct pci_bus *bus,
287 default: 298 default:
288 return PCIBIOS_FUNC_NOT_SUPPORTED; 299 return PCIBIOS_FUNC_NOT_SUPPORTED;
289 } 300 }
290 cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n", 301 cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
291 bus->number, devfn, where, size, *val); 302 __func__, pdn->busno, pdn->devfn, where, size, *val);
292 303
293 /* Check if the PHB got frozen due to an error (no response) */ 304 /*
294 pnv_pci_config_check_eeh(phb, bus, bdfn); 305 * Check if the specified PE has been put into frozen
306 * state. On the other hand, we needn't do that while
307 * the PHB has been put into frozen state because of
308 * PHB-fatal errors.
309 */
310#ifdef CONFIG_EEH
311 phb_pe = eeh_phb_pe_get(pdn->phb);
312 if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED))
313 return PCIBIOS_SUCCESSFUL;
314
315 if (phb->eeh_state & PNV_EEH_STATE_ENABLED) {
316 if (*val == EEH_IO_ERROR_VALUE(size) &&
317 eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
318 return PCIBIOS_DEVICE_NOT_FOUND;
319 } else {
320 pnv_pci_config_check_eeh(phb, dn);
321 }
322#else
323 pnv_pci_config_check_eeh(phb, dn);
324#endif
295 325
296 return PCIBIOS_SUCCESSFUL; 326 return PCIBIOS_SUCCESSFUL;
297} 327}
298 328
299static int pnv_pci_write_config(struct pci_bus *bus, 329int pnv_pci_cfg_write(struct device_node *dn,
300 unsigned int devfn, 330 int where, int size, u32 val)
301 int where, int size, u32 val)
302{ 331{
303 struct pci_controller *hose = pci_bus_to_host(bus); 332 struct pci_dn *pdn = PCI_DN(dn);
304 struct pnv_phb *phb = hose->private_data; 333 struct pnv_phb *phb = pdn->phb->private_data;
305 u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; 334 u32 bdfn = (pdn->busno << 8) | pdn->devfn;
306
307 if (hose == NULL)
308 return PCIBIOS_DEVICE_NOT_FOUND;
309 335
310 cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n", 336 cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
311 bus->number, devfn, where, size, val); 337 pdn->busno, pdn->devfn, where, size, val);
312 switch (size) { 338 switch (size) {
313 case 1: 339 case 1:
314 opal_pci_config_write_byte(phb->opal_id, bdfn, where, val); 340 opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
@@ -322,14 +348,54 @@ static int pnv_pci_write_config(struct pci_bus *bus,
322 default: 348 default:
323 return PCIBIOS_FUNC_NOT_SUPPORTED; 349 return PCIBIOS_FUNC_NOT_SUPPORTED;
324 } 350 }
351
325 /* Check if the PHB got frozen due to an error (no response) */ 352 /* Check if the PHB got frozen due to an error (no response) */
326 pnv_pci_config_check_eeh(phb, bus, bdfn); 353#ifdef CONFIG_EEH
354 if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED))
355 pnv_pci_config_check_eeh(phb, dn);
356#else
357 pnv_pci_config_check_eeh(phb, dn);
358#endif
327 359
328 return PCIBIOS_SUCCESSFUL; 360 return PCIBIOS_SUCCESSFUL;
329} 361}
330 362
363static int pnv_pci_read_config(struct pci_bus *bus,
364 unsigned int devfn,
365 int where, int size, u32 *val)
366{
367 struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
368 struct pci_dn *pdn;
369
370 for (dn = busdn->child; dn; dn = dn->sibling) {
371 pdn = PCI_DN(dn);
372 if (pdn && pdn->devfn == devfn)
373 return pnv_pci_cfg_read(dn, where, size, val);
374 }
375
376 *val = 0xFFFFFFFF;
377 return PCIBIOS_DEVICE_NOT_FOUND;
378
379}
380
381static int pnv_pci_write_config(struct pci_bus *bus,
382 unsigned int devfn,
383 int where, int size, u32 val)
384{
385 struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
386 struct pci_dn *pdn;
387
388 for (dn = busdn->child; dn; dn = dn->sibling) {
389 pdn = PCI_DN(dn);
390 if (pdn && pdn->devfn == devfn)
391 return pnv_pci_cfg_write(dn, where, size, val);
392 }
393
394 return PCIBIOS_DEVICE_NOT_FOUND;
395}
396
331struct pci_ops pnv_pci_ops = { 397struct pci_ops pnv_pci_ops = {
332 .read = pnv_pci_read_config, 398 .read = pnv_pci_read_config,
333 .write = pnv_pci_write_config, 399 .write = pnv_pci_write_config,
334}; 400};
335 401
@@ -412,6 +478,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
412 pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), 478 pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
413 be32_to_cpup(sizep), 0); 479 be32_to_cpup(sizep), 0);
414 iommu_init_table(tbl, hose->node); 480 iommu_init_table(tbl, hose->node);
481 iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
415 482
416 /* Deal with SW invalidated TCEs when needed (BML way) */ 483 /* Deal with SW invalidated TCEs when needed (BML way) */
417 swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info", 484 swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 25d76c4df50b..d633c64e05a1 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -66,15 +66,43 @@ struct pnv_ioda_pe {
66 struct list_head list; 66 struct list_head list;
67}; 67};
68 68
69/* IOC dependent EEH operations */
70#ifdef CONFIG_EEH
71struct pnv_eeh_ops {
72 int (*post_init)(struct pci_controller *hose);
73 int (*set_option)(struct eeh_pe *pe, int option);
74 int (*get_state)(struct eeh_pe *pe);
75 int (*reset)(struct eeh_pe *pe, int option);
76 int (*get_log)(struct eeh_pe *pe, int severity,
77 char *drv_log, unsigned long len);
78 int (*configure_bridge)(struct eeh_pe *pe);
79 int (*next_error)(struct eeh_pe **pe);
80};
81
82#define PNV_EEH_STATE_ENABLED (1 << 0) /* EEH enabled */
83#define PNV_EEH_STATE_REMOVED (1 << 1) /* PHB removed */
84
85#endif /* CONFIG_EEH */
86
69struct pnv_phb { 87struct pnv_phb {
70 struct pci_controller *hose; 88 struct pci_controller *hose;
71 enum pnv_phb_type type; 89 enum pnv_phb_type type;
72 enum pnv_phb_model model; 90 enum pnv_phb_model model;
91 u64 hub_id;
73 u64 opal_id; 92 u64 opal_id;
74 void __iomem *regs; 93 void __iomem *regs;
75 int initialized; 94 int initialized;
76 spinlock_t lock; 95 spinlock_t lock;
77 96
97#ifdef CONFIG_EEH
98 struct pnv_eeh_ops *eeh_ops;
99 int eeh_state;
100#endif
101
102#ifdef CONFIG_DEBUG_FS
103 struct dentry *dbgfs;
104#endif
105
78#ifdef CONFIG_PCI_MSI 106#ifdef CONFIG_PCI_MSI
79 unsigned int msi_base; 107 unsigned int msi_base;
80 unsigned int msi32_support; 108 unsigned int msi32_support;
@@ -150,7 +178,14 @@ struct pnv_phb {
150}; 178};
151 179
152extern struct pci_ops pnv_pci_ops; 180extern struct pci_ops pnv_pci_ops;
181#ifdef CONFIG_EEH
182extern struct pnv_eeh_ops ioda_eeh_ops;
183#endif
153 184
185int pnv_pci_cfg_read(struct device_node *dn,
186 int where, int size, u32 *val);
187int pnv_pci_cfg_write(struct device_node *dn,
188 int where, int size, u32 val);
154extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, 189extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
155 void *tce_mem, u64 tce_size, 190 void *tce_mem, u64 tce_size,
156 u64 dma_offset); 191 u64 dma_offset);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d4459bfc92f7..84438af96c05 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -93,6 +93,8 @@ static void __noreturn pnv_restart(char *cmd)
93{ 93{
94 long rc = OPAL_BUSY; 94 long rc = OPAL_BUSY;
95 95
96 opal_notifier_disable();
97
96 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 98 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
97 rc = opal_cec_reboot(); 99 rc = opal_cec_reboot();
98 if (rc == OPAL_BUSY_EVENT) 100 if (rc == OPAL_BUSY_EVENT)
@@ -108,6 +110,8 @@ static void __noreturn pnv_power_off(void)
108{ 110{
109 long rc = OPAL_BUSY; 111 long rc = OPAL_BUSY;
110 112
113 opal_notifier_disable();
114
111 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 115 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
112 rc = opal_cec_power_down(0); 116 rc = opal_cec_power_down(0);
113 if (rc == OPAL_BUSY_EVENT) 117 if (rc == OPAL_BUSY_EVENT)
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 88c9459c3e07..89e3857af4e0 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -40,7 +40,7 @@
40#define DBG(fmt...) 40#define DBG(fmt...)
41#endif 41#endif
42 42
43static void __cpuinit pnv_smp_setup_cpu(int cpu) 43static void pnv_smp_setup_cpu(int cpu)
44{ 44{
45 if (cpu != boot_cpuid) 45 if (cpu != boot_cpuid)
46 xics_setup_cpu(); 46 xics_setup_cpu();
@@ -51,7 +51,7 @@ static int pnv_smp_cpu_bootable(unsigned int nr)
51 /* Special case - we inhibit secondary thread startup 51 /* Special case - we inhibit secondary thread startup
52 * during boot if the user requests it. 52 * during boot if the user requests it.
53 */ 53 */
54 if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { 54 if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
55 if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) 55 if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
56 return 0; 56 return 0;
57 if (smt_enabled_at_boot 57 if (smt_enabled_at_boot
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 177a2f70700c..3e270e3412ae 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -109,7 +109,8 @@ static long ps3_hpte_remove(unsigned long hpte_group)
109} 109}
110 110
111static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, 111static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
112 unsigned long vpn, int psize, int ssize, int local) 112 unsigned long vpn, int psize, int apsize,
113 int ssize, int local)
113{ 114{
114 int result; 115 int result;
115 u64 hpte_v, want_v, hpte_rs; 116 u64 hpte_v, want_v, hpte_rs;
@@ -162,7 +163,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
162} 163}
163 164
164static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, 165static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
165 int psize, int ssize, int local) 166 int psize, int apsize, int ssize, int local)
166{ 167{
167 unsigned long flags; 168 unsigned long flags;
168 int result; 169 int result;
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 4459eff7a75a..62b4f8025de0 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -19,7 +19,6 @@ config PPC_PSERIES
19 select ZLIB_DEFLATE 19 select ZLIB_DEFLATE
20 select PPC_DOORBELL 20 select PPC_DOORBELL
21 select HAVE_CONTEXT_TRACKING 21 select HAVE_CONTEXT_TRACKING
22 select HOTPLUG if SMP
23 select HOTPLUG_CPU if SMP 22 select HOTPLUG_CPU if SMP
24 default y 23 default y
25 24
@@ -33,11 +32,6 @@ config PPC_SPLPAR
33 processors, that is, which share physical processors between 32 processors, that is, which share physical processors between
34 two or more partitions. 33 two or more partitions.
35 34
36config EEH
37 bool
38 depends on PPC_PSERIES && PCI
39 default y
40
41config PSERIES_MSI 35config PSERIES_MSI
42 bool 36 bool
43 depends on PCI_MSI && EEH 37 depends on PCI_MSI && EEH
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 53866e537a92..8ae010381316 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,9 +6,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
6 firmware.o power.o dlpar.o mobility.o 6 firmware.o power.o dlpar.o mobility.o
7obj-$(CONFIG_SMP) += smp.o 7obj-$(CONFIG_SMP) += smp.o
8obj-$(CONFIG_SCANLOG) += scanlog.o 8obj-$(CONFIG_SCANLOG) += scanlog.o
9obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ 9obj-$(CONFIG_EEH) += eeh_pseries.o
10 eeh_driver.o eeh_event.o eeh_sysfs.o \
11 eeh_pseries.o
12obj-$(CONFIG_KEXEC) += kexec.o 10obj-$(CONFIG_KEXEC) += kexec.o
13obj-$(CONFIG_PCI) += pci.o pci_dlpar.o 11obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
14obj-$(CONFIG_PSERIES_MSI) += msi.o 12obj-$(CONFIG_PSERIES_MSI) += msi.o
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index b456b157d33d..7fbc25b1813f 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -133,6 +133,48 @@ static int pseries_eeh_init(void)
133 return 0; 133 return 0;
134} 134}
135 135
136static int pseries_eeh_cap_start(struct device_node *dn)
137{
138 struct pci_dn *pdn = PCI_DN(dn);
139 u32 status;
140
141 if (!pdn)
142 return 0;
143
144 rtas_read_config(pdn, PCI_STATUS, 2, &status);
145 if (!(status & PCI_STATUS_CAP_LIST))
146 return 0;
147
148 return PCI_CAPABILITY_LIST;
149}
150
151
152static int pseries_eeh_find_cap(struct device_node *dn, int cap)
153{
154 struct pci_dn *pdn = PCI_DN(dn);
155 int pos = pseries_eeh_cap_start(dn);
156 int cnt = 48; /* Maximal number of capabilities */
157 u32 id;
158
159 if (!pos)
160 return 0;
161
162 while (cnt--) {
163 rtas_read_config(pdn, pos, 1, &pos);
164 if (pos < 0x40)
165 break;
166 pos &= ~3;
167 rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
168 if (id == 0xff)
169 break;
170 if (id == cap)
171 return pos;
172 pos += PCI_CAP_LIST_NEXT;
173 }
174
175 return 0;
176}
177
136/** 178/**
137 * pseries_eeh_of_probe - EEH probe on the given device 179 * pseries_eeh_of_probe - EEH probe on the given device
138 * @dn: OF node 180 * @dn: OF node
@@ -146,14 +188,16 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
146{ 188{
147 struct eeh_dev *edev; 189 struct eeh_dev *edev;
148 struct eeh_pe pe; 190 struct eeh_pe pe;
191 struct pci_dn *pdn = PCI_DN(dn);
149 const u32 *class_code, *vendor_id, *device_id; 192 const u32 *class_code, *vendor_id, *device_id;
150 const u32 *regs; 193 const u32 *regs;
194 u32 pcie_flags;
151 int enable = 0; 195 int enable = 0;
152 int ret; 196 int ret;
153 197
154 /* Retrieve OF node and eeh device */ 198 /* Retrieve OF node and eeh device */
155 edev = of_node_to_eeh_dev(dn); 199 edev = of_node_to_eeh_dev(dn);
156 if (!of_device_is_available(dn)) 200 if (edev->pe || !of_device_is_available(dn))
157 return NULL; 201 return NULL;
158 202
159 /* Retrieve class/vendor/device IDs */ 203 /* Retrieve class/vendor/device IDs */
@@ -167,9 +211,26 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
167 if (dn->type && !strcmp(dn->type, "isa")) 211 if (dn->type && !strcmp(dn->type, "isa"))
168 return NULL; 212 return NULL;
169 213
170 /* Update class code and mode of eeh device */ 214 /*
215 * Update class code and mode of eeh device. We need
216 * correctly reflects that current device is root port
217 * or PCIe switch downstream port.
218 */
171 edev->class_code = *class_code; 219 edev->class_code = *class_code;
172 edev->mode = 0; 220 edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
221 edev->mode &= 0xFFFFFF00;
222 if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
223 edev->mode |= EEH_DEV_BRIDGE;
224 if (edev->pcie_cap) {
225 rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
226 2, &pcie_flags);
227 pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
228 if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
229 edev->mode |= EEH_DEV_ROOT_PORT;
230 else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
231 edev->mode |= EEH_DEV_DS_PORT;
232 }
233 }
173 234
174 /* Retrieve the device address */ 235 /* Retrieve the device address */
175 regs = of_get_property(dn, "reg", NULL); 236 regs = of_get_property(dn, "reg", NULL);
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
index ef9d9d84c7d5..5ea88d1541f7 100644
--- a/arch/powerpc/platforms/pseries/io_event_irq.c
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -115,7 +115,7 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
115 * by scope or event type alone. For example, Torrent ISR route change 115 * by scope or event type alone. For example, Torrent ISR route change
116 * event is reported with scope 0x00 (Not Applicatable) rather than 116 * event is reported with scope 0x00 (Not Applicatable) rather than
117 * 0x3B (Torrent-hub). It is better to let the clients to identify 117 * 0x3B (Torrent-hub). It is better to let the clients to identify
118 * who owns the the event. 118 * who owns the event.
119 */ 119 */
120 120
121static irqreturn_t ioei_interrupt(int irq, void *dev_id) 121static irqreturn_t ioei_interrupt(int irq, void *dev_id)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 86ae364900d6..23fc1dcf4434 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -614,6 +614,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
614 614
615 iommu_table_setparms(pci->phb, dn, tbl); 615 iommu_table_setparms(pci->phb, dn, tbl);
616 pci->iommu_table = iommu_init_table(tbl, pci->phb->node); 616 pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
617 iommu_register_group(tbl, pci_domain_nr(bus), 0);
617 618
618 /* Divide the rest (1.75GB) among the children */ 619 /* Divide the rest (1.75GB) among the children */
619 pci->phb->dma_window_size = 0x80000000ul; 620 pci->phb->dma_window_size = 0x80000000ul;
@@ -658,6 +659,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
658 ppci->phb->node); 659 ppci->phb->node);
659 iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); 660 iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
660 ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); 661 ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
662 iommu_register_group(tbl, pci_domain_nr(bus), 0);
661 pr_debug(" created table: %p\n", ppci->iommu_table); 663 pr_debug(" created table: %p\n", ppci->iommu_table);
662 } 664 }
663} 665}
@@ -684,6 +686,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
684 phb->node); 686 phb->node);
685 iommu_table_setparms(phb, dn, tbl); 687 iommu_table_setparms(phb, dn, tbl);
686 PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); 688 PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
689 iommu_register_group(tbl, pci_domain_nr(phb->bus), 0);
687 set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); 690 set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
688 return; 691 return;
689 } 692 }
@@ -1184,6 +1187,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
1184 pci->phb->node); 1187 pci->phb->node);
1185 iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); 1188 iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
1186 pci->iommu_table = iommu_init_table(tbl, pci->phb->node); 1189 pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
1190 iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0);
1187 pr_debug(" created table: %p\n", pci->iommu_table); 1191 pr_debug(" created table: %p\n", pci->iommu_table);
1188 } else { 1192 } else {
1189 pr_debug(" found DMA window, table: %p\n", pci->iommu_table); 1193 pr_debug(" found DMA window, table: %p\n", pci->iommu_table);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 6d62072a7d5a..8bad880bd177 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -45,6 +45,13 @@
45#include "plpar_wrappers.h" 45#include "plpar_wrappers.h"
46#include "pseries.h" 46#include "pseries.h"
47 47
48/* Flag bits for H_BULK_REMOVE */
49#define HBR_REQUEST 0x4000000000000000UL
50#define HBR_RESPONSE 0x8000000000000000UL
51#define HBR_END 0xc000000000000000UL
52#define HBR_AVPN 0x0200000000000000UL
53#define HBR_ANDCOND 0x0100000000000000UL
54
48 55
49/* in hvCall.S */ 56/* in hvCall.S */
50EXPORT_SYMBOL(plpar_hcall); 57EXPORT_SYMBOL(plpar_hcall);
@@ -64,6 +71,9 @@ void vpa_init(int cpu)
64 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 71 if (cpu_has_feature(CPU_FTR_ALTIVEC))
65 lppaca_of(cpu).vmxregs_in_use = 1; 72 lppaca_of(cpu).vmxregs_in_use = 1;
66 73
74 if (cpu_has_feature(CPU_FTR_ARCH_207S))
75 lppaca_of(cpu).ebb_regs_in_use = 1;
76
67 addr = __pa(&lppaca_of(cpu)); 77 addr = __pa(&lppaca_of(cpu));
68 ret = register_vpa(hwcpu, addr); 78 ret = register_vpa(hwcpu, addr);
69 79
@@ -136,7 +146,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
136 flags = 0; 146 flags = 0;
137 147
138 /* Make pHyp happy */ 148 /* Make pHyp happy */
139 if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU)) 149 if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
140 hpte_r &= ~_PAGE_COHERENT; 150 hpte_r &= ~_PAGE_COHERENT;
141 if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) 151 if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
142 flags |= H_COALESCE_CAND; 152 flags |= H_COALESCE_CAND;
@@ -240,7 +250,8 @@ static void pSeries_lpar_hptab_clear(void)
240static long pSeries_lpar_hpte_updatepp(unsigned long slot, 250static long pSeries_lpar_hpte_updatepp(unsigned long slot,
241 unsigned long newpp, 251 unsigned long newpp,
242 unsigned long vpn, 252 unsigned long vpn,
243 int psize, int ssize, int local) 253 int psize, int apsize,
254 int ssize, int local)
244{ 255{
245 unsigned long lpar_rc; 256 unsigned long lpar_rc;
246 unsigned long flags = (newpp & 7) | H_AVPN; 257 unsigned long flags = (newpp & 7) | H_AVPN;
@@ -328,7 +339,8 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
328} 339}
329 340
330static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, 341static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
331 int psize, int ssize, int local) 342 int psize, int apsize,
343 int ssize, int local)
332{ 344{
333 unsigned long want_v; 345 unsigned long want_v;
334 unsigned long lpar_rc; 346 unsigned long lpar_rc;
@@ -345,6 +357,113 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
345 BUG_ON(lpar_rc != H_SUCCESS); 357 BUG_ON(lpar_rc != H_SUCCESS);
346} 358}
347 359
360/*
361 * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
362 * to make sure that we avoid bouncing the hypervisor tlbie lock.
363 */
364#define PPC64_HUGE_HPTE_BATCH 12
365
366static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
367 unsigned long *vpn, int count,
368 int psize, int ssize)
369{
370 unsigned long param[8];
371 int i = 0, pix = 0, rc;
372 unsigned long flags = 0;
373 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
374
375 if (lock_tlbie)
376 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
377
378 for (i = 0; i < count; i++) {
379
380 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
381 pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0,
382 ssize, 0);
383 } else {
384 param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
385 param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
386 pix += 2;
387 if (pix == 8) {
388 rc = plpar_hcall9(H_BULK_REMOVE, param,
389 param[0], param[1], param[2],
390 param[3], param[4], param[5],
391 param[6], param[7]);
392 BUG_ON(rc != H_SUCCESS);
393 pix = 0;
394 }
395 }
396 }
397 if (pix) {
398 param[pix] = HBR_END;
399 rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
400 param[2], param[3], param[4], param[5],
401 param[6], param[7]);
402 BUG_ON(rc != H_SUCCESS);
403 }
404
405 if (lock_tlbie)
406 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
407}
408
409static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
410 unsigned char *hpte_slot_array,
411 unsigned long addr, int psize)
412{
413 int ssize = 0, i, index = 0;
414 unsigned long s_addr = addr;
415 unsigned int max_hpte_count, valid;
416 unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
417 unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
418 unsigned long shift, hidx, vpn = 0, vsid, hash, slot;
419
420 shift = mmu_psize_defs[psize].shift;
421 max_hpte_count = 1U << (PMD_SHIFT - shift);
422
423 for (i = 0; i < max_hpte_count; i++) {
424 valid = hpte_valid(hpte_slot_array, i);
425 if (!valid)
426 continue;
427 hidx = hpte_hash_index(hpte_slot_array, i);
428
429 /* get the vpn */
430 addr = s_addr + (i * (1ul << shift));
431 if (!is_kernel_addr(addr)) {
432 ssize = user_segment_size(addr);
433 vsid = get_vsid(mm->context.id, addr, ssize);
434 WARN_ON(vsid == 0);
435 } else {
436 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
437 ssize = mmu_kernel_ssize;
438 }
439
440 vpn = hpt_vpn(addr, vsid, ssize);
441 hash = hpt_hash(vpn, shift, ssize);
442 if (hidx & _PTEIDX_SECONDARY)
443 hash = ~hash;
444
445 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
446 slot += hidx & _PTEIDX_GROUP_IX;
447
448 slot_array[index] = slot;
449 vpn_array[index] = vpn;
450 if (index == PPC64_HUGE_HPTE_BATCH - 1) {
451 /*
452 * Now do a bluk invalidate
453 */
454 __pSeries_lpar_hugepage_invalidate(slot_array,
455 vpn_array,
456 PPC64_HUGE_HPTE_BATCH,
457 psize, ssize);
458 index = 0;
459 } else
460 index++;
461 }
462 if (index)
463 __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
464 index, psize, ssize);
465}
466
348static void pSeries_lpar_hpte_removebolted(unsigned long ea, 467static void pSeries_lpar_hpte_removebolted(unsigned long ea,
349 int psize, int ssize) 468 int psize, int ssize)
350{ 469{
@@ -356,17 +475,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
356 475
357 slot = pSeries_lpar_hpte_find(vpn, psize, ssize); 476 slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
358 BUG_ON(slot == -1); 477 BUG_ON(slot == -1);
359 478 /*
360 pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0); 479 * lpar doesn't use the passed actual page size
480 */
481 pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0);
361} 482}
362 483
363/* Flag bits for H_BULK_REMOVE */
364#define HBR_REQUEST 0x4000000000000000UL
365#define HBR_RESPONSE 0x8000000000000000UL
366#define HBR_END 0xc000000000000000UL
367#define HBR_AVPN 0x0200000000000000UL
368#define HBR_ANDCOND 0x0100000000000000UL
369
370/* 484/*
371 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie 485 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
372 * lock. 486 * lock.
@@ -400,8 +514,11 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
400 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 514 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
401 slot += hidx & _PTEIDX_GROUP_IX; 515 slot += hidx & _PTEIDX_GROUP_IX;
402 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 516 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
517 /*
518 * lpar doesn't use the passed actual page size
519 */
403 pSeries_lpar_hpte_invalidate(slot, vpn, psize, 520 pSeries_lpar_hpte_invalidate(slot, vpn, psize,
404 ssize, local); 521 0, ssize, local);
405 } else { 522 } else {
406 param[pix] = HBR_REQUEST | HBR_AVPN | slot; 523 param[pix] = HBR_REQUEST | HBR_AVPN | slot;
407 param[pix+1] = hpte_encode_avpn(vpn, psize, 524 param[pix+1] = hpte_encode_avpn(vpn, psize,
@@ -452,6 +569,7 @@ void __init hpte_init_lpar(void)
452 ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; 569 ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted;
453 ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; 570 ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range;
454 ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; 571 ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear;
572 ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
455} 573}
456 574
457#ifdef CONFIG_PPC_SMLPAR 575#ifdef CONFIG_PPC_SMLPAR
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 8733a86ad52e..6a5f2b1f32ca 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -18,6 +18,7 @@
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/kmsg_dump.h> 20#include <linux/kmsg_dump.h>
21#include <linux/pstore.h>
21#include <linux/ctype.h> 22#include <linux/ctype.h>
22#include <linux/zlib.h> 23#include <linux/zlib.h>
23#include <asm/uaccess.h> 24#include <asm/uaccess.h>
@@ -29,6 +30,13 @@
29/* Max bytes to read/write in one go */ 30/* Max bytes to read/write in one go */
30#define NVRW_CNT 0x20 31#define NVRW_CNT 0x20
31 32
33/*
34 * Set oops header version to distingush between old and new format header.
35 * lnx,oops-log partition max size is 4000, header version > 4000 will
36 * help in identifying new header.
37 */
38#define OOPS_HDR_VERSION 5000
39
32static unsigned int nvram_size; 40static unsigned int nvram_size;
33static int nvram_fetch, nvram_store; 41static int nvram_fetch, nvram_store;
34static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ 42static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
@@ -45,20 +53,23 @@ struct nvram_os_partition {
45 int min_size; /* minimum acceptable size (0 means req_size) */ 53 int min_size; /* minimum acceptable size (0 means req_size) */
46 long size; /* size of data portion (excluding err_log_info) */ 54 long size; /* size of data portion (excluding err_log_info) */
47 long index; /* offset of data portion of partition */ 55 long index; /* offset of data portion of partition */
56 bool os_partition; /* partition initialized by OS, not FW */
48}; 57};
49 58
50static struct nvram_os_partition rtas_log_partition = { 59static struct nvram_os_partition rtas_log_partition = {
51 .name = "ibm,rtas-log", 60 .name = "ibm,rtas-log",
52 .req_size = 2079, 61 .req_size = 2079,
53 .min_size = 1055, 62 .min_size = 1055,
54 .index = -1 63 .index = -1,
64 .os_partition = true
55}; 65};
56 66
57static struct nvram_os_partition oops_log_partition = { 67static struct nvram_os_partition oops_log_partition = {
58 .name = "lnx,oops-log", 68 .name = "lnx,oops-log",
59 .req_size = 4000, 69 .req_size = 4000,
60 .min_size = 2000, 70 .min_size = 2000,
61 .index = -1 71 .index = -1,
72 .os_partition = true
62}; 73};
63 74
64static const char *pseries_nvram_os_partitions[] = { 75static const char *pseries_nvram_os_partitions[] = {
@@ -67,6 +78,12 @@ static const char *pseries_nvram_os_partitions[] = {
67 NULL 78 NULL
68}; 79};
69 80
81struct oops_log_info {
82 u16 version;
83 u16 report_length;
84 u64 timestamp;
85} __attribute__((packed));
86
70static void oops_to_nvram(struct kmsg_dumper *dumper, 87static void oops_to_nvram(struct kmsg_dumper *dumper,
71 enum kmsg_dump_reason reason); 88 enum kmsg_dump_reason reason);
72 89
@@ -83,28 +100,28 @@ static unsigned long last_unread_rtas_event; /* timestamp */
83 100
84 * big_oops_buf[] holds the uncompressed text we're capturing. 101 * big_oops_buf[] holds the uncompressed text we're capturing.
85 * 102 *
86 * oops_buf[] holds the compressed text, preceded by a prefix. 103 * oops_buf[] holds the compressed text, preceded by a oops header.
87 * The prefix is just a u16 holding the length of the compressed* text. 104 * oops header has u16 holding the version of oops header (to differentiate
88 * (*Or uncompressed, if compression fails.) oops_buf[] gets written 105 * between old and new format header) followed by u16 holding the length of
89 * to NVRAM. 106 * the compressed* text (*Or uncompressed, if compression fails.) and u64
107 * holding the timestamp. oops_buf[] gets written to NVRAM.
90 * 108 *
91 * oops_len points to the prefix. oops_data points to the compressed text. 109 * oops_log_info points to the header. oops_data points to the compressed text.
92 * 110 *
93 * +- oops_buf 111 * +- oops_buf
94 * | +- oops_data 112 * | +- oops_data
95 * v v 113 * v v
96 * +------------+-----------------------------------------------+ 114 * +-----------+-----------+-----------+------------------------+
97 * | length | text | 115 * | version | length | timestamp | text |
98 * | (2 bytes) | (oops_data_sz bytes) | 116 * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) |
99 * +------------+-----------------------------------------------+ 117 * +-----------+-----------+-----------+------------------------+
100 * ^ 118 * ^
101 * +- oops_len 119 * +- oops_log_info
102 * 120 *
103 * We preallocate these buffers during init to avoid kmalloc during oops/panic. 121 * We preallocate these buffers during init to avoid kmalloc during oops/panic.
104 */ 122 */
105static size_t big_oops_buf_sz; 123static size_t big_oops_buf_sz;
106static char *big_oops_buf, *oops_buf; 124static char *big_oops_buf, *oops_buf;
107static u16 *oops_len;
108static char *oops_data; 125static char *oops_data;
109static size_t oops_data_sz; 126static size_t oops_data_sz;
110 127
@@ -114,6 +131,30 @@ static size_t oops_data_sz;
114#define MEM_LEVEL 4 131#define MEM_LEVEL 4
115static struct z_stream_s stream; 132static struct z_stream_s stream;
116 133
134#ifdef CONFIG_PSTORE
135static struct nvram_os_partition of_config_partition = {
136 .name = "of-config",
137 .index = -1,
138 .os_partition = false
139};
140
141static struct nvram_os_partition common_partition = {
142 .name = "common",
143 .index = -1,
144 .os_partition = false
145};
146
147static enum pstore_type_id nvram_type_ids[] = {
148 PSTORE_TYPE_DMESG,
149 PSTORE_TYPE_PPC_RTAS,
150 PSTORE_TYPE_PPC_OF,
151 PSTORE_TYPE_PPC_COMMON,
152 -1
153};
154static int read_type;
155static unsigned long last_rtas_event;
156#endif
157
117static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) 158static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
118{ 159{
119 unsigned int i; 160 unsigned int i;
@@ -275,48 +316,72 @@ int nvram_write_error_log(char * buff, int length,
275{ 316{
276 int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, 317 int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
277 err_type, error_log_cnt); 318 err_type, error_log_cnt);
278 if (!rc) 319 if (!rc) {
279 last_unread_rtas_event = get_seconds(); 320 last_unread_rtas_event = get_seconds();
321#ifdef CONFIG_PSTORE
322 last_rtas_event = get_seconds();
323#endif
324 }
325
280 return rc; 326 return rc;
281} 327}
282 328
283/* nvram_read_error_log 329/* nvram_read_partition
284 * 330 *
285 * Reads nvram for error log for at most 'length' 331 * Reads nvram partition for at most 'length'
286 */ 332 */
287int nvram_read_error_log(char * buff, int length, 333int nvram_read_partition(struct nvram_os_partition *part, char *buff,
288 unsigned int * err_type, unsigned int * error_log_cnt) 334 int length, unsigned int *err_type,
335 unsigned int *error_log_cnt)
289{ 336{
290 int rc; 337 int rc;
291 loff_t tmp_index; 338 loff_t tmp_index;
292 struct err_log_info info; 339 struct err_log_info info;
293 340
294 if (rtas_log_partition.index == -1) 341 if (part->index == -1)
295 return -1; 342 return -1;
296 343
297 if (length > rtas_log_partition.size) 344 if (length > part->size)
298 length = rtas_log_partition.size; 345 length = part->size;
299 346
300 tmp_index = rtas_log_partition.index; 347 tmp_index = part->index;
301 348
302 rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); 349 if (part->os_partition) {
303 if (rc <= 0) { 350 rc = ppc_md.nvram_read((char *)&info,
304 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); 351 sizeof(struct err_log_info),
305 return rc; 352 &tmp_index);
353 if (rc <= 0) {
354 pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__,
355 rc);
356 return rc;
357 }
306 } 358 }
307 359
308 rc = ppc_md.nvram_read(buff, length, &tmp_index); 360 rc = ppc_md.nvram_read(buff, length, &tmp_index);
309 if (rc <= 0) { 361 if (rc <= 0) {
310 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); 362 pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, rc);
311 return rc; 363 return rc;
312 } 364 }
313 365
314 *error_log_cnt = info.seq_num; 366 if (part->os_partition) {
315 *err_type = info.error_type; 367 *error_log_cnt = info.seq_num;
368 *err_type = info.error_type;
369 }
316 370
317 return 0; 371 return 0;
318} 372}
319 373
374/* nvram_read_error_log
375 *
376 * Reads nvram for error log for at most 'length'
377 */
378int nvram_read_error_log(char *buff, int length,
379 unsigned int *err_type, unsigned int *error_log_cnt)
380{
381 return nvram_read_partition(&rtas_log_partition, buff, length,
382 err_type, error_log_cnt);
383}
384
320/* This doesn't actually zero anything, but it sets the event_logged 385/* This doesn't actually zero anything, but it sets the event_logged
321 * word to tell that this event is safely in syslog. 386 * word to tell that this event is safely in syslog.
322 */ 387 */
@@ -405,9 +470,339 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
405 return 0; 470 return 0;
406} 471}
407 472
473/*
474 * Are we using the ibm,rtas-log for oops/panic reports? And if so,
475 * would logging this oops/panic overwrite an RTAS event that rtas_errd
476 * hasn't had a chance to read and process? Return 1 if so, else 0.
477 *
478 * We assume that if rtas_errd hasn't read the RTAS event in
479 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
480 */
481static int clobbering_unread_rtas_event(void)
482{
483 return (oops_log_partition.index == rtas_log_partition.index
484 && last_unread_rtas_event
485 && get_seconds() - last_unread_rtas_event <=
486 NVRAM_RTAS_READ_TIMEOUT);
487}
488
489/* Derived from logfs_compress() */
490static int nvram_compress(const void *in, void *out, size_t inlen,
491 size_t outlen)
492{
493 int err, ret;
494
495 ret = -EIO;
496 err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
497 MEM_LEVEL, Z_DEFAULT_STRATEGY);
498 if (err != Z_OK)
499 goto error;
500
501 stream.next_in = in;
502 stream.avail_in = inlen;
503 stream.total_in = 0;
504 stream.next_out = out;
505 stream.avail_out = outlen;
506 stream.total_out = 0;
507
508 err = zlib_deflate(&stream, Z_FINISH);
509 if (err != Z_STREAM_END)
510 goto error;
511
512 err = zlib_deflateEnd(&stream);
513 if (err != Z_OK)
514 goto error;
515
516 if (stream.total_out >= stream.total_in)
517 goto error;
518
519 ret = stream.total_out;
520error:
521 return ret;
522}
523
524/* Compress the text from big_oops_buf into oops_buf. */
525static int zip_oops(size_t text_len)
526{
527 struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
528 int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
529 oops_data_sz);
530 if (zipped_len < 0) {
531 pr_err("nvram: compression failed; returned %d\n", zipped_len);
532 pr_err("nvram: logging uncompressed oops/panic report\n");
533 return -1;
534 }
535 oops_hdr->version = OOPS_HDR_VERSION;
536 oops_hdr->report_length = (u16) zipped_len;
537 oops_hdr->timestamp = get_seconds();
538 return 0;
539}
540
541#ifdef CONFIG_PSTORE
542/* Derived from logfs_uncompress */
543int nvram_decompress(void *in, void *out, size_t inlen, size_t outlen)
544{
545 int err, ret;
546
547 ret = -EIO;
548 err = zlib_inflateInit(&stream);
549 if (err != Z_OK)
550 goto error;
551
552 stream.next_in = in;
553 stream.avail_in = inlen;
554 stream.total_in = 0;
555 stream.next_out = out;
556 stream.avail_out = outlen;
557 stream.total_out = 0;
558
559 err = zlib_inflate(&stream, Z_FINISH);
560 if (err != Z_STREAM_END)
561 goto error;
562
563 err = zlib_inflateEnd(&stream);
564 if (err != Z_OK)
565 goto error;
566
567 ret = stream.total_out;
568error:
569 return ret;
570}
571
572static int nvram_pstore_open(struct pstore_info *psi)
573{
574 /* Reset the iterator to start reading partitions again */
575 read_type = -1;
576 return 0;
577}
578
579/**
580 * nvram_pstore_write - pstore write callback for nvram
581 * @type: Type of message logged
582 * @reason: reason behind dump (oops/panic)
583 * @id: identifier to indicate the write performed
584 * @part: pstore writes data to registered buffer in parts,
585 * part number will indicate the same.
586 * @count: Indicates oops count
587 * @hsize: Size of header added by pstore
588 * @size: number of bytes written to the registered buffer
589 * @psi: registered pstore_info structure
590 *
591 * Called by pstore_dump() when an oops or panic report is logged in the
592 * printk buffer.
593 * Returns 0 on successful write.
594 */
595static int nvram_pstore_write(enum pstore_type_id type,
596 enum kmsg_dump_reason reason,
597 u64 *id, unsigned int part, int count,
598 size_t hsize, size_t size,
599 struct pstore_info *psi)
600{
601 int rc;
602 unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
603 struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
604
605 /* part 1 has the recent messages from printk buffer */
606 if (part > 1 || type != PSTORE_TYPE_DMESG ||
607 clobbering_unread_rtas_event())
608 return -1;
609
610 oops_hdr->version = OOPS_HDR_VERSION;
611 oops_hdr->report_length = (u16) size;
612 oops_hdr->timestamp = get_seconds();
613
614 if (big_oops_buf) {
615 rc = zip_oops(size);
616 /*
617 * If compression fails copy recent log messages from
618 * big_oops_buf to oops_data.
619 */
620 if (rc != 0) {
621 size_t diff = size - oops_data_sz + hsize;
622
623 if (size > oops_data_sz) {
624 memcpy(oops_data, big_oops_buf, hsize);
625 memcpy(oops_data + hsize, big_oops_buf + diff,
626 oops_data_sz - hsize);
627
628 oops_hdr->report_length = (u16) oops_data_sz;
629 } else
630 memcpy(oops_data, big_oops_buf, size);
631 } else
632 err_type = ERR_TYPE_KERNEL_PANIC_GZ;
633 }
634
635 rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
636 (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
637 count);
638
639 if (rc != 0)
640 return rc;
641
642 *id = part;
643 return 0;
644}
645
646/*
647 * Reads the oops/panic report, rtas, of-config and common partition.
648 * Returns the length of the data we read from each partition.
649 * Returns 0 if we've been called before.
650 */
651static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
652 int *count, struct timespec *time, char **buf,
653 struct pstore_info *psi)
654{
655 struct oops_log_info *oops_hdr;
656 unsigned int err_type, id_no, size = 0;
657 struct nvram_os_partition *part = NULL;
658 char *buff = NULL, *big_buff = NULL;
659 int sig = 0;
660 loff_t p;
661
662 read_type++;
663
664 switch (nvram_type_ids[read_type]) {
665 case PSTORE_TYPE_DMESG:
666 part = &oops_log_partition;
667 *type = PSTORE_TYPE_DMESG;
668 break;
669 case PSTORE_TYPE_PPC_RTAS:
670 part = &rtas_log_partition;
671 *type = PSTORE_TYPE_PPC_RTAS;
672 time->tv_sec = last_rtas_event;
673 time->tv_nsec = 0;
674 break;
675 case PSTORE_TYPE_PPC_OF:
676 sig = NVRAM_SIG_OF;
677 part = &of_config_partition;
678 *type = PSTORE_TYPE_PPC_OF;
679 *id = PSTORE_TYPE_PPC_OF;
680 time->tv_sec = 0;
681 time->tv_nsec = 0;
682 break;
683 case PSTORE_TYPE_PPC_COMMON:
684 sig = NVRAM_SIG_SYS;
685 part = &common_partition;
686 *type = PSTORE_TYPE_PPC_COMMON;
687 *id = PSTORE_TYPE_PPC_COMMON;
688 time->tv_sec = 0;
689 time->tv_nsec = 0;
690 break;
691 default:
692 return 0;
693 }
694
695 if (!part->os_partition) {
696 p = nvram_find_partition(part->name, sig, &size);
697 if (p <= 0) {
698 pr_err("nvram: Failed to find partition %s, "
699 "err %d\n", part->name, (int)p);
700 return 0;
701 }
702 part->index = p;
703 part->size = size;
704 }
705
706 buff = kmalloc(part->size, GFP_KERNEL);
707
708 if (!buff)
709 return -ENOMEM;
710
711 if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
712 kfree(buff);
713 return 0;
714 }
715
716 *count = 0;
717
718 if (part->os_partition)
719 *id = id_no;
720
721 if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
722 int length, unzipped_len;
723 size_t hdr_size;
724
725 oops_hdr = (struct oops_log_info *)buff;
726 if (oops_hdr->version < OOPS_HDR_VERSION) {
727 /* Old format oops header had 2-byte record size */
728 hdr_size = sizeof(u16);
729 length = oops_hdr->version;
730 time->tv_sec = 0;
731 time->tv_nsec = 0;
732 } else {
733 hdr_size = sizeof(*oops_hdr);
734 length = oops_hdr->report_length;
735 time->tv_sec = oops_hdr->timestamp;
736 time->tv_nsec = 0;
737 }
738 *buf = kmalloc(length, GFP_KERNEL);
739 if (*buf == NULL)
740 return -ENOMEM;
741 memcpy(*buf, buff + hdr_size, length);
742 kfree(buff);
743
744 if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) {
745 big_buff = kmalloc(big_oops_buf_sz, GFP_KERNEL);
746 if (!big_buff)
747 return -ENOMEM;
748
749 unzipped_len = nvram_decompress(*buf, big_buff,
750 length, big_oops_buf_sz);
751
752 if (unzipped_len < 0) {
753 pr_err("nvram: decompression failed, returned "
754 "rc %d\n", unzipped_len);
755 kfree(big_buff);
756 } else {
757 *buf = big_buff;
758 length = unzipped_len;
759 }
760 }
761 return length;
762 }
763
764 *buf = buff;
765 return part->size;
766}
767
768static struct pstore_info nvram_pstore_info = {
769 .owner = THIS_MODULE,
770 .name = "nvram",
771 .open = nvram_pstore_open,
772 .read = nvram_pstore_read,
773 .write = nvram_pstore_write,
774};
775
776static int nvram_pstore_init(void)
777{
778 int rc = 0;
779
780 if (big_oops_buf) {
781 nvram_pstore_info.buf = big_oops_buf;
782 nvram_pstore_info.bufsize = big_oops_buf_sz;
783 } else {
784 nvram_pstore_info.buf = oops_data;
785 nvram_pstore_info.bufsize = oops_data_sz;
786 }
787
788 rc = pstore_register(&nvram_pstore_info);
789 if (rc != 0)
790 pr_err("nvram: pstore_register() failed, defaults to "
791 "kmsg_dump; returned %d\n", rc);
792
793 return rc;
794}
795#else
796static int nvram_pstore_init(void)
797{
798 return -1;
799}
800#endif
801
408static void __init nvram_init_oops_partition(int rtas_partition_exists) 802static void __init nvram_init_oops_partition(int rtas_partition_exists)
409{ 803{
410 int rc; 804 int rc;
805 size_t size;
411 806
412 rc = pseries_nvram_init_os_partition(&oops_log_partition); 807 rc = pseries_nvram_init_os_partition(&oops_log_partition);
413 if (rc != 0) { 808 if (rc != 0) {
@@ -425,9 +820,8 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
425 oops_log_partition.name); 820 oops_log_partition.name);
426 return; 821 return;
427 } 822 }
428 oops_len = (u16*) oops_buf; 823 oops_data = oops_buf + sizeof(struct oops_log_info);
429 oops_data = oops_buf + sizeof(u16); 824 oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
430 oops_data_sz = oops_log_partition.size - sizeof(u16);
431 825
432 /* 826 /*
433 * Figure compression (preceded by elimination of each line's <n> 827 * Figure compression (preceded by elimination of each line's <n>
@@ -437,8 +831,9 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
437 big_oops_buf_sz = (oops_data_sz * 100) / 45; 831 big_oops_buf_sz = (oops_data_sz * 100) / 45;
438 big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); 832 big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
439 if (big_oops_buf) { 833 if (big_oops_buf) {
440 stream.workspace = kmalloc(zlib_deflate_workspacesize( 834 size = max(zlib_deflate_workspacesize(WINDOW_BITS, MEM_LEVEL),
441 WINDOW_BITS, MEM_LEVEL), GFP_KERNEL); 835 zlib_inflate_workspacesize());
836 stream.workspace = kmalloc(size, GFP_KERNEL);
442 if (!stream.workspace) { 837 if (!stream.workspace) {
443 pr_err("nvram: No memory for compression workspace; " 838 pr_err("nvram: No memory for compression workspace; "
444 "skipping compression of %s partition data\n", 839 "skipping compression of %s partition data\n",
@@ -452,6 +847,11 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
452 stream.workspace = NULL; 847 stream.workspace = NULL;
453 } 848 }
454 849
850 rc = nvram_pstore_init();
851
852 if (!rc)
853 return;
854
455 rc = kmsg_dump_register(&nvram_kmsg_dumper); 855 rc = kmsg_dump_register(&nvram_kmsg_dumper);
456 if (rc != 0) { 856 if (rc != 0) {
457 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); 857 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
@@ -501,70 +901,6 @@ int __init pSeries_nvram_init(void)
501 return 0; 901 return 0;
502} 902}
503 903
504/*
505 * Are we using the ibm,rtas-log for oops/panic reports? And if so,
506 * would logging this oops/panic overwrite an RTAS event that rtas_errd
507 * hasn't had a chance to read and process? Return 1 if so, else 0.
508 *
509 * We assume that if rtas_errd hasn't read the RTAS event in
510 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
511 */
512static int clobbering_unread_rtas_event(void)
513{
514 return (oops_log_partition.index == rtas_log_partition.index
515 && last_unread_rtas_event
516 && get_seconds() - last_unread_rtas_event <=
517 NVRAM_RTAS_READ_TIMEOUT);
518}
519
520/* Derived from logfs_compress() */
521static int nvram_compress(const void *in, void *out, size_t inlen,
522 size_t outlen)
523{
524 int err, ret;
525
526 ret = -EIO;
527 err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
528 MEM_LEVEL, Z_DEFAULT_STRATEGY);
529 if (err != Z_OK)
530 goto error;
531
532 stream.next_in = in;
533 stream.avail_in = inlen;
534 stream.total_in = 0;
535 stream.next_out = out;
536 stream.avail_out = outlen;
537 stream.total_out = 0;
538
539 err = zlib_deflate(&stream, Z_FINISH);
540 if (err != Z_STREAM_END)
541 goto error;
542
543 err = zlib_deflateEnd(&stream);
544 if (err != Z_OK)
545 goto error;
546
547 if (stream.total_out >= stream.total_in)
548 goto error;
549
550 ret = stream.total_out;
551error:
552 return ret;
553}
554
555/* Compress the text from big_oops_buf into oops_buf. */
556static int zip_oops(size_t text_len)
557{
558 int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
559 oops_data_sz);
560 if (zipped_len < 0) {
561 pr_err("nvram: compression failed; returned %d\n", zipped_len);
562 pr_err("nvram: logging uncompressed oops/panic report\n");
563 return -1;
564 }
565 *oops_len = (u16) zipped_len;
566 return 0;
567}
568 904
569/* 905/*
570 * This is our kmsg_dump callback, called after an oops or panic report 906 * This is our kmsg_dump callback, called after an oops or panic report
@@ -576,6 +912,7 @@ static int zip_oops(size_t text_len)
576static void oops_to_nvram(struct kmsg_dumper *dumper, 912static void oops_to_nvram(struct kmsg_dumper *dumper,
577 enum kmsg_dump_reason reason) 913 enum kmsg_dump_reason reason)
578{ 914{
915 struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
579 static unsigned int oops_count = 0; 916 static unsigned int oops_count = 0;
580 static bool panicking = false; 917 static bool panicking = false;
581 static DEFINE_SPINLOCK(lock); 918 static DEFINE_SPINLOCK(lock);
@@ -619,14 +956,17 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
619 } 956 }
620 if (rc != 0) { 957 if (rc != 0) {
621 kmsg_dump_rewind(dumper); 958 kmsg_dump_rewind(dumper);
622 kmsg_dump_get_buffer(dumper, true, 959 kmsg_dump_get_buffer(dumper, false,
623 oops_data, oops_data_sz, &text_len); 960 oops_data, oops_data_sz, &text_len);
624 err_type = ERR_TYPE_KERNEL_PANIC; 961 err_type = ERR_TYPE_KERNEL_PANIC;
625 *oops_len = (u16) text_len; 962 oops_hdr->version = OOPS_HDR_VERSION;
963 oops_hdr->report_length = (u16) text_len;
964 oops_hdr->timestamp = get_seconds();
626 } 965 }
627 966
628 (void) nvram_write_os_partition(&oops_log_partition, oops_buf, 967 (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
629 (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); 968 (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
969 ++oops_count);
630 970
631 spin_unlock_irqrestore(&lock, flags); 971 spin_unlock_irqrestore(&lock, flags);
632} 972}
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index c91b22be9288..efe61374f6ea 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -64,91 +64,6 @@ pcibios_find_pci_bus(struct device_node *dn)
64} 64}
65EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); 65EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
66 66
67/**
68 * __pcibios_remove_pci_devices - remove all devices under this bus
69 * @bus: the indicated PCI bus
70 * @purge_pe: destroy the PE on removal of PCI devices
71 *
72 * Remove all of the PCI devices under this bus both from the
73 * linux pci device tree, and from the powerpc EEH address cache.
74 * By default, the corresponding PE will be destroied during the
75 * normal PCI hotplug path. For PCI hotplug during EEH recovery,
76 * the corresponding PE won't be destroied and deallocated.
77 */
78void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe)
79{
80 struct pci_dev *dev, *tmp;
81 struct pci_bus *child_bus;
82
83 /* First go down child busses */
84 list_for_each_entry(child_bus, &bus->children, node)
85 __pcibios_remove_pci_devices(child_bus, purge_pe);
86
87 pr_debug("PCI: Removing devices on bus %04x:%02x\n",
88 pci_domain_nr(bus), bus->number);
89 list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
90 pr_debug(" * Removing %s...\n", pci_name(dev));
91 eeh_remove_bus_device(dev, purge_pe);
92 pci_stop_and_remove_bus_device(dev);
93 }
94}
95
96/**
97 * pcibios_remove_pci_devices - remove all devices under this bus
98 *
99 * Remove all of the PCI devices under this bus both from the
100 * linux pci device tree, and from the powerpc EEH address cache.
101 */
102void pcibios_remove_pci_devices(struct pci_bus *bus)
103{
104 __pcibios_remove_pci_devices(bus, 1);
105}
106EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
107
108/**
109 * pcibios_add_pci_devices - adds new pci devices to bus
110 *
111 * This routine will find and fixup new pci devices under
112 * the indicated bus. This routine presumes that there
113 * might already be some devices under this bridge, so
114 * it carefully tries to add only new devices. (And that
115 * is how this routine differs from other, similar pcibios
116 * routines.)
117 */
118void pcibios_add_pci_devices(struct pci_bus * bus)
119{
120 int slotno, num, mode, pass, max;
121 struct pci_dev *dev;
122 struct device_node *dn = pci_bus_to_OF_node(bus);
123
124 eeh_add_device_tree_early(dn);
125
126 mode = PCI_PROBE_NORMAL;
127 if (ppc_md.pci_probe_mode)
128 mode = ppc_md.pci_probe_mode(bus);
129
130 if (mode == PCI_PROBE_DEVTREE) {
131 /* use ofdt-based probe */
132 of_rescan_bus(dn, bus);
133 } else if (mode == PCI_PROBE_NORMAL) {
134 /* use legacy probe */
135 slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
136 num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
137 if (!num)
138 return;
139 pcibios_setup_bus_devices(bus);
140 max = bus->busn_res.start;
141 for (pass=0; pass < 2; pass++)
142 list_for_each_entry(dev, &bus->devices, bus_list) {
143 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
144 dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
145 max = pci_scan_bridge(bus, dev, max, pass);
146 }
147 }
148 pcibios_finish_adding_to_bus(bus);
149}
150EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
151
152struct pci_controller *init_phb_dynamic(struct device_node *dn) 67struct pci_controller *init_phb_dynamic(struct device_node *dn)
153{ 68{
154 struct pci_controller *phb; 69 struct pci_controller *phb;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c4dfccd3a3d9..721c0586b284 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -83,7 +83,7 @@ static void handle_system_shutdown(char event_modifier)
83 switch (event_modifier) { 83 switch (event_modifier) {
84 case EPOW_SHUTDOWN_NORMAL: 84 case EPOW_SHUTDOWN_NORMAL:
85 pr_emerg("Firmware initiated power off"); 85 pr_emerg("Firmware initiated power off");
86 orderly_poweroff(1); 86 orderly_poweroff(true);
87 break; 87 break;
88 88
89 case EPOW_SHUTDOWN_ON_UPS: 89 case EPOW_SHUTDOWN_ON_UPS:
@@ -95,13 +95,13 @@ static void handle_system_shutdown(char event_modifier)
95 pr_emerg("Loss of system critical functions reported by " 95 pr_emerg("Loss of system critical functions reported by "
96 "firmware"); 96 "firmware");
97 pr_emerg("Check RTAS error log for details"); 97 pr_emerg("Check RTAS error log for details");
98 orderly_poweroff(1); 98 orderly_poweroff(true);
99 break; 99 break;
100 100
101 case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: 101 case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
102 pr_emerg("Ambient temperature too high reported by firmware"); 102 pr_emerg("Ambient temperature too high reported by firmware");
103 pr_emerg("Check RTAS error log for details"); 103 pr_emerg("Check RTAS error log for details");
104 orderly_poweroff(1); 104 orderly_poweroff(true);
105 break; 105 break;
106 106
107 default: 107 default:
@@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log)
162 162
163 case EPOW_SYSTEM_HALT: 163 case EPOW_SYSTEM_HALT:
164 pr_emerg("Firmware initiated power off"); 164 pr_emerg("Firmware initiated power off");
165 orderly_poweroff(1); 165 orderly_poweroff(true);
166 break; 166 break;
167 167
168 case EPOW_MAIN_ENCLOSURE: 168 case EPOW_MAIN_ENCLOSURE:
@@ -287,6 +287,9 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
287 unsigned long *savep; 287 unsigned long *savep;
288 struct rtas_error_log *h, *errhdr = NULL; 288 struct rtas_error_log *h, *errhdr = NULL;
289 289
290 /* Mask top two bits */
291 regs->gpr[3] &= ~(0x3UL << 62);
292
290 if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { 293 if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
291 printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); 294 printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
292 return NULL; 295 return NULL;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 12bc8c3663ad..306643cc9dbc 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -192,7 +192,7 @@ static int smp_pSeries_cpu_bootable(unsigned int nr)
192 /* Special case - we inhibit secondary thread startup 192 /* Special case - we inhibit secondary thread startup
193 * during boot if the user requests it. 193 * during boot if the user requests it.
194 */ 194 */
195 if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { 195 if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
196 if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) 196 if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
197 return 0; 197 return 0;
198 if (smt_enabled_at_boot 198 if (smt_enabled_at_boot
diff --git a/arch/powerpc/relocs_check.pl b/arch/powerpc/relocs_check.pl
index 7f5b83808862..3f46e8b9c56d 100755
--- a/arch/powerpc/relocs_check.pl
+++ b/arch/powerpc/relocs_check.pl
@@ -7,7 +7,7 @@
7# as published by the Free Software Foundation; either version 7# as published by the Free Software Foundation; either version
8# 2 of the License, or (at your option) any later version. 8# 2 of the License, or (at your option) any later version.
9 9
10# This script checks the relcoations of a vmlinux for "suspicious" 10# This script checks the relocations of a vmlinux for "suspicious"
11# relocations. 11# relocations.
12 12
13use strict; 13use strict;
@@ -28,7 +28,7 @@ open(FD, "$objdump -R $vmlinux|") or die;
28while (<FD>) { 28while (<FD>) {
29 study $_; 29 study $_;
30 30
31 # Only look at relcoation lines. 31 # Only look at relocation lines.
32 next if (!/\s+R_/); 32 next if (!/\s+R_/);
33 33
34 # These relocations are okay 34 # These relocations are okay
@@ -45,7 +45,7 @@ while (<FD>) {
45 /\bR_PPC_ADDR16_HA\b/ or /\bR_PPC_RELATIVE\b/ or 45 /\bR_PPC_ADDR16_HA\b/ or /\bR_PPC_RELATIVE\b/ or
46 /\bR_PPC_NONE\b/); 46 /\bR_PPC_NONE\b/);
47 47
48 # If we see this type of relcoation it's an idication that 48 # If we see this type of relocation it's an idication that
49 # we /may/ be using an old version of binutils. 49 # we /may/ be using an old version of binutils.
50 if (/R_PPC64_UADDR64/) { 50 if (/R_PPC64_UADDR64/) {
51 $old_binutils++; 51 $old_binutils++;
@@ -61,6 +61,6 @@ if ($bad_relocs_count) {
61} 61}
62 62
63if ($old_binutils) { 63if ($old_binutils) {
64 print "WARNING: You need at binutils >= 2.19 to build a ". 64 print "WARNING: You need at least binutils >= 2.19 to build a ".
65 "CONFIG_RELCOATABLE kernel\n"; 65 "CONFIG_RELOCATABLE kernel\n";
66} 66}
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 99464a7bdb3b..f67ac900d870 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -4,6 +4,8 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
4 4
5mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o 5mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o
6obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) 6obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y)
7obj-$(CONFIG_MPIC_TIMER) += mpic_timer.o
8obj-$(CONFIG_FSL_MPIC_TIMER_WAKEUP) += fsl_mpic_timer_wakeup.o
7mpic-msgr-obj-$(CONFIG_MPIC_MSGR) += mpic_msgr.o 9mpic-msgr-obj-$(CONFIG_MPIC_MSGR) += mpic_msgr.o
8obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) $(mpic-msgr-obj-y) 10obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) $(mpic-msgr-obj-y)
9obj-$(CONFIG_PPC_EPAPR_HV_PIC) += ehv_pic.o 11obj-$(CONFIG_PPC_EPAPR_HV_PIC) += ehv_pic.o
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index d4fa03f2b6ac..5e6ff38ea69f 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -120,6 +120,7 @@ static irqreturn_t cpm_error_interrupt(int irq, void *dev)
120 120
121static struct irqaction cpm_error_irqaction = { 121static struct irqaction cpm_error_irqaction = {
122 .handler = cpm_error_interrupt, 122 .handler = cpm_error_interrupt,
123 .flags = IRQF_NO_THREAD,
123 .name = "error", 124 .name = "error",
124}; 125};
125 126
diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
new file mode 100644
index 000000000000..1707bf04dec6
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
@@ -0,0 +1,161 @@
1/*
2 * MPIC timer wakeup driver
3 *
4 * Copyright 2013 Freescale Semiconductor, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include <linux/errno.h>
15#include <linux/module.h>
16#include <linux/interrupt.h>
17#include <linux/device.h>
18
19#include <asm/mpic_timer.h>
20#include <asm/mpic.h>
21
22struct fsl_mpic_timer_wakeup {
23 struct mpic_timer *timer;
24 struct work_struct free_work;
25};
26
27static struct fsl_mpic_timer_wakeup *fsl_wakeup;
28static DEFINE_MUTEX(sysfs_lock);
29
30static void fsl_free_resource(struct work_struct *ws)
31{
32 struct fsl_mpic_timer_wakeup *wakeup =
33 container_of(ws, struct fsl_mpic_timer_wakeup, free_work);
34
35 mutex_lock(&sysfs_lock);
36
37 if (wakeup->timer) {
38 disable_irq_wake(wakeup->timer->irq);
39 mpic_free_timer(wakeup->timer);
40 }
41
42 wakeup->timer = NULL;
43 mutex_unlock(&sysfs_lock);
44}
45
46static irqreturn_t fsl_mpic_timer_irq(int irq, void *dev_id)
47{
48 struct fsl_mpic_timer_wakeup *wakeup = dev_id;
49
50 schedule_work(&wakeup->free_work);
51
52 return wakeup->timer ? IRQ_HANDLED : IRQ_NONE;
53}
54
55static ssize_t fsl_timer_wakeup_show(struct device *dev,
56 struct device_attribute *attr,
57 char *buf)
58{
59 struct timeval interval;
60 int val = 0;
61
62 mutex_lock(&sysfs_lock);
63 if (fsl_wakeup->timer) {
64 mpic_get_remain_time(fsl_wakeup->timer, &interval);
65 val = interval.tv_sec + 1;
66 }
67 mutex_unlock(&sysfs_lock);
68
69 return sprintf(buf, "%d\n", val);
70}
71
72static ssize_t fsl_timer_wakeup_store(struct device *dev,
73 struct device_attribute *attr,
74 const char *buf,
75 size_t count)
76{
77 struct timeval interval;
78 int ret;
79
80 interval.tv_usec = 0;
81 if (kstrtol(buf, 0, &interval.tv_sec))
82 return -EINVAL;
83
84 mutex_lock(&sysfs_lock);
85
86 if (fsl_wakeup->timer) {
87 disable_irq_wake(fsl_wakeup->timer->irq);
88 mpic_free_timer(fsl_wakeup->timer);
89 fsl_wakeup->timer = NULL;
90 }
91
92 if (!interval.tv_sec) {
93 mutex_unlock(&sysfs_lock);
94 return count;
95 }
96
97 fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq,
98 fsl_wakeup, &interval);
99 if (!fsl_wakeup->timer) {
100 mutex_unlock(&sysfs_lock);
101 return -EINVAL;
102 }
103
104 ret = enable_irq_wake(fsl_wakeup->timer->irq);
105 if (ret) {
106 mpic_free_timer(fsl_wakeup->timer);
107 fsl_wakeup->timer = NULL;
108 mutex_unlock(&sysfs_lock);
109
110 return ret;
111 }
112
113 mpic_start_timer(fsl_wakeup->timer);
114
115 mutex_unlock(&sysfs_lock);
116
117 return count;
118}
119
120static struct device_attribute mpic_attributes = __ATTR(timer_wakeup, 0644,
121 fsl_timer_wakeup_show, fsl_timer_wakeup_store);
122
123static int __init fsl_wakeup_sys_init(void)
124{
125 int ret;
126
127 fsl_wakeup = kzalloc(sizeof(struct fsl_mpic_timer_wakeup), GFP_KERNEL);
128 if (!fsl_wakeup)
129 return -ENOMEM;
130
131 INIT_WORK(&fsl_wakeup->free_work, fsl_free_resource);
132
133 ret = device_create_file(mpic_subsys.dev_root, &mpic_attributes);
134 if (ret)
135 kfree(fsl_wakeup);
136
137 return ret;
138}
139
140static void __exit fsl_wakeup_sys_exit(void)
141{
142 device_remove_file(mpic_subsys.dev_root, &mpic_attributes);
143
144 mutex_lock(&sysfs_lock);
145
146 if (fsl_wakeup->timer) {
147 disable_irq_wake(fsl_wakeup->timer->irq);
148 mpic_free_timer(fsl_wakeup->timer);
149 }
150
151 kfree(fsl_wakeup);
152
153 mutex_unlock(&sysfs_lock);
154}
155
156module_init(fsl_wakeup_sys_init);
157module_exit(fsl_wakeup_sys_exit);
158
159MODULE_DESCRIPTION("Freescale MPIC global timer wakeup driver");
160MODULE_LICENSE("GPL v2");
161MODULE_AUTHOR("Wang Dongsheng <dongsheng.wang@freescale.com>");
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 3cc2f9159ab1..1be54faf60dd 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -48,6 +48,12 @@
48#define DBG(fmt...) 48#define DBG(fmt...)
49#endif 49#endif
50 50
51struct bus_type mpic_subsys = {
52 .name = "mpic",
53 .dev_name = "mpic",
54};
55EXPORT_SYMBOL_GPL(mpic_subsys);
56
51static struct mpic *mpics; 57static struct mpic *mpics;
52static struct mpic *mpic_primary; 58static struct mpic *mpic_primary;
53static DEFINE_RAW_SPINLOCK(mpic_lock); 59static DEFINE_RAW_SPINLOCK(mpic_lock);
@@ -920,6 +926,22 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
920 return IRQ_SET_MASK_OK_NOCOPY; 926 return IRQ_SET_MASK_OK_NOCOPY;
921} 927}
922 928
929static int mpic_irq_set_wake(struct irq_data *d, unsigned int on)
930{
931 struct irq_desc *desc = container_of(d, struct irq_desc, irq_data);
932 struct mpic *mpic = mpic_from_irq_data(d);
933
934 if (!(mpic->flags & MPIC_FSL))
935 return -ENXIO;
936
937 if (on)
938 desc->action->flags |= IRQF_NO_SUSPEND;
939 else
940 desc->action->flags &= ~IRQF_NO_SUSPEND;
941
942 return 0;
943}
944
923void mpic_set_vector(unsigned int virq, unsigned int vector) 945void mpic_set_vector(unsigned int virq, unsigned int vector)
924{ 946{
925 struct mpic *mpic = mpic_from_irq(virq); 947 struct mpic *mpic = mpic_from_irq(virq);
@@ -957,6 +979,7 @@ static struct irq_chip mpic_irq_chip = {
957 .irq_unmask = mpic_unmask_irq, 979 .irq_unmask = mpic_unmask_irq,
958 .irq_eoi = mpic_end_irq, 980 .irq_eoi = mpic_end_irq,
959 .irq_set_type = mpic_set_irq_type, 981 .irq_set_type = mpic_set_irq_type,
982 .irq_set_wake = mpic_irq_set_wake,
960}; 983};
961 984
962#ifdef CONFIG_SMP 985#ifdef CONFIG_SMP
@@ -971,6 +994,7 @@ static struct irq_chip mpic_tm_chip = {
971 .irq_mask = mpic_mask_tm, 994 .irq_mask = mpic_mask_tm,
972 .irq_unmask = mpic_unmask_tm, 995 .irq_unmask = mpic_unmask_tm,
973 .irq_eoi = mpic_end_irq, 996 .irq_eoi = mpic_end_irq,
997 .irq_set_wake = mpic_irq_set_wake,
974}; 998};
975 999
976#ifdef CONFIG_MPIC_U3_HT_IRQS 1000#ifdef CONFIG_MPIC_U3_HT_IRQS
@@ -1173,10 +1197,33 @@ static struct irq_domain_ops mpic_host_ops = {
1173 .xlate = mpic_host_xlate, 1197 .xlate = mpic_host_xlate,
1174}; 1198};
1175 1199
1200static u32 fsl_mpic_get_version(struct mpic *mpic)
1201{
1202 u32 brr1;
1203
1204 if (!(mpic->flags & MPIC_FSL))
1205 return 0;
1206
1207 brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs,
1208 MPIC_FSL_BRR1);
1209
1210 return brr1 & MPIC_FSL_BRR1_VER;
1211}
1212
1176/* 1213/*
1177 * Exported functions 1214 * Exported functions
1178 */ 1215 */
1179 1216
1217u32 fsl_mpic_primary_get_version(void)
1218{
1219 struct mpic *mpic = mpic_primary;
1220
1221 if (mpic)
1222 return fsl_mpic_get_version(mpic);
1223
1224 return 0;
1225}
1226
1180struct mpic * __init mpic_alloc(struct device_node *node, 1227struct mpic * __init mpic_alloc(struct device_node *node,
1181 phys_addr_t phys_addr, 1228 phys_addr_t phys_addr,
1182 unsigned int flags, 1229 unsigned int flags,
@@ -1323,7 +1370,6 @@ struct mpic * __init mpic_alloc(struct device_node *node,
1323 mpic_map(mpic, mpic->paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000); 1370 mpic_map(mpic, mpic->paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000);
1324 1371
1325 if (mpic->flags & MPIC_FSL) { 1372 if (mpic->flags & MPIC_FSL) {
1326 u32 brr1;
1327 int ret; 1373 int ret;
1328 1374
1329 /* 1375 /*
@@ -1334,9 +1380,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
1334 mpic_map(mpic, mpic->paddr, &mpic->thiscpuregs, 1380 mpic_map(mpic, mpic->paddr, &mpic->thiscpuregs,
1335 MPIC_CPU_THISBASE, 0x1000); 1381 MPIC_CPU_THISBASE, 0x1000);
1336 1382
1337 brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs, 1383 fsl_version = fsl_mpic_get_version(mpic);
1338 MPIC_FSL_BRR1);
1339 fsl_version = brr1 & MPIC_FSL_BRR1_VER;
1340 1384
1341 /* Error interrupt mask register (EIMR) is required for 1385 /* Error interrupt mask register (EIMR) is required for
1342 * handling individual device error interrupts. EIMR 1386 * handling individual device error interrupts. EIMR
@@ -1526,9 +1570,7 @@ void __init mpic_init(struct mpic *mpic)
1526 mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf); 1570 mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf);
1527 1571
1528 if (mpic->flags & MPIC_FSL) { 1572 if (mpic->flags & MPIC_FSL) {
1529 u32 brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs, 1573 u32 version = fsl_mpic_get_version(mpic);
1530 MPIC_FSL_BRR1);
1531 u32 version = brr1 & MPIC_FSL_BRR1_VER;
1532 1574
1533 /* 1575 /*
1534 * Timer group B is present at the latest in MPIC 3.1 (e.g. 1576 * Timer group B is present at the latest in MPIC 3.1 (e.g.
@@ -1999,6 +2041,8 @@ static struct syscore_ops mpic_syscore_ops = {
1999static int mpic_init_sys(void) 2041static int mpic_init_sys(void)
2000{ 2042{
2001 register_syscore_ops(&mpic_syscore_ops); 2043 register_syscore_ops(&mpic_syscore_ops);
2044 subsys_system_register(&mpic_subsys, NULL);
2045
2002 return 0; 2046 return 0;
2003} 2047}
2004 2048
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
new file mode 100644
index 000000000000..c06db92a4fb1
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -0,0 +1,593 @@
1/*
2 * MPIC timer driver
3 *
4 * Copyright 2013 Freescale Semiconductor, Inc.
5 * Author: Dongsheng Wang <Dongsheng.Wang@freescale.com>
6 * Li Yang <leoli@freescale.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/kernel.h>
15#include <linux/init.h>
16#include <linux/module.h>
17#include <linux/errno.h>
18#include <linux/mm.h>
19#include <linux/interrupt.h>
20#include <linux/slab.h>
21#include <linux/of.h>
22#include <linux/of_device.h>
23#include <linux/syscore_ops.h>
24#include <sysdev/fsl_soc.h>
25#include <asm/io.h>
26
27#include <asm/mpic_timer.h>
28
29#define FSL_GLOBAL_TIMER 0x1
30
31/* Clock Ratio
32 * Divide by 64 0x00000300
33 * Divide by 32 0x00000200
34 * Divide by 16 0x00000100
35 * Divide by 8 0x00000000 (Hardware default div)
36 */
37#define MPIC_TIMER_TCR_CLKDIV 0x00000300
38
39#define MPIC_TIMER_TCR_ROVR_OFFSET 24
40
41#define TIMER_STOP 0x80000000
42#define TIMERS_PER_GROUP 4
43#define MAX_TICKS (~0U >> 1)
44#define MAX_TICKS_CASCADE (~0U)
45#define TIMER_OFFSET(num) (1 << (TIMERS_PER_GROUP - 1 - num))
46
47/* tv_usec should be less than ONE_SECOND, otherwise use tv_sec */
48#define ONE_SECOND 1000000
49
50struct timer_regs {
51 u32 gtccr;
52 u32 res0[3];
53 u32 gtbcr;
54 u32 res1[3];
55 u32 gtvpr;
56 u32 res2[3];
57 u32 gtdr;
58 u32 res3[3];
59};
60
61struct cascade_priv {
62 u32 tcr_value; /* TCR register: CASC & ROVR value */
63 unsigned int cascade_map; /* cascade map */
64 unsigned int timer_num; /* cascade control timer */
65};
66
67struct timer_group_priv {
68 struct timer_regs __iomem *regs;
69 struct mpic_timer timer[TIMERS_PER_GROUP];
70 struct list_head node;
71 unsigned int timerfreq;
72 unsigned int idle;
73 unsigned int flags;
74 spinlock_t lock;
75 void __iomem *group_tcr;
76};
77
78static struct cascade_priv cascade_timer[] = {
79 /* cascade timer 0 and 1 */
80 {0x1, 0xc, 0x1},
81 /* cascade timer 1 and 2 */
82 {0x2, 0x6, 0x2},
83 /* cascade timer 2 and 3 */
84 {0x4, 0x3, 0x3}
85};
86
87static LIST_HEAD(timer_group_list);
88
89static void convert_ticks_to_time(struct timer_group_priv *priv,
90 const u64 ticks, struct timeval *time)
91{
92 u64 tmp_sec;
93
94 time->tv_sec = (__kernel_time_t)div_u64(ticks, priv->timerfreq);
95 tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq;
96
97 time->tv_usec = (__kernel_suseconds_t)
98 div_u64((ticks - tmp_sec) * 1000000, priv->timerfreq);
99
100 return;
101}
102
103/* the time set by the user is converted to "ticks" */
104static int convert_time_to_ticks(struct timer_group_priv *priv,
105 const struct timeval *time, u64 *ticks)
106{
107 u64 max_value; /* prevent u64 overflow */
108 u64 tmp = 0;
109
110 u64 tmp_sec;
111 u64 tmp_ms;
112 u64 tmp_us;
113
114 max_value = div_u64(ULLONG_MAX, priv->timerfreq);
115
116 if (time->tv_sec > max_value ||
117 (time->tv_sec == max_value && time->tv_usec > 0))
118 return -EINVAL;
119
120 tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq;
121 tmp += tmp_sec;
122
123 tmp_ms = time->tv_usec / 1000;
124 tmp_ms = div_u64((u64)tmp_ms * (u64)priv->timerfreq, 1000);
125 tmp += tmp_ms;
126
127 tmp_us = time->tv_usec % 1000;
128 tmp_us = div_u64((u64)tmp_us * (u64)priv->timerfreq, 1000000);
129 tmp += tmp_us;
130
131 *ticks = tmp;
132
133 return 0;
134}
135
136/* detect whether there is a cascade timer available */
137static struct mpic_timer *detect_idle_cascade_timer(
138 struct timer_group_priv *priv)
139{
140 struct cascade_priv *casc_priv;
141 unsigned int map;
142 unsigned int array_size = ARRAY_SIZE(cascade_timer);
143 unsigned int num;
144 unsigned int i;
145 unsigned long flags;
146
147 casc_priv = cascade_timer;
148 for (i = 0; i < array_size; i++) {
149 spin_lock_irqsave(&priv->lock, flags);
150 map = casc_priv->cascade_map & priv->idle;
151 if (map == casc_priv->cascade_map) {
152 num = casc_priv->timer_num;
153 priv->timer[num].cascade_handle = casc_priv;
154
155 /* set timer busy */
156 priv->idle &= ~casc_priv->cascade_map;
157 spin_unlock_irqrestore(&priv->lock, flags);
158 return &priv->timer[num];
159 }
160 spin_unlock_irqrestore(&priv->lock, flags);
161 casc_priv++;
162 }
163
164 return NULL;
165}
166
167static int set_cascade_timer(struct timer_group_priv *priv, u64 ticks,
168 unsigned int num)
169{
170 struct cascade_priv *casc_priv;
171 u32 tcr;
172 u32 tmp_ticks;
173 u32 rem_ticks;
174
175 /* set group tcr reg for cascade */
176 casc_priv = priv->timer[num].cascade_handle;
177 if (!casc_priv)
178 return -EINVAL;
179
180 tcr = casc_priv->tcr_value |
181 (casc_priv->tcr_value << MPIC_TIMER_TCR_ROVR_OFFSET);
182 setbits32(priv->group_tcr, tcr);
183
184 tmp_ticks = div_u64_rem(ticks, MAX_TICKS_CASCADE, &rem_ticks);
185
186 out_be32(&priv->regs[num].gtccr, 0);
187 out_be32(&priv->regs[num].gtbcr, tmp_ticks | TIMER_STOP);
188
189 out_be32(&priv->regs[num - 1].gtccr, 0);
190 out_be32(&priv->regs[num - 1].gtbcr, rem_ticks);
191
192 return 0;
193}
194
195static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv,
196 u64 ticks)
197{
198 struct mpic_timer *allocated_timer;
199
200 /* Two cascade timers: Support the maximum time */
201 const u64 max_ticks = (u64)MAX_TICKS * (u64)MAX_TICKS_CASCADE;
202 int ret;
203
204 if (ticks > max_ticks)
205 return NULL;
206
207 /* detect idle timer */
208 allocated_timer = detect_idle_cascade_timer(priv);
209 if (!allocated_timer)
210 return NULL;
211
212 /* set ticks to timer */
213 ret = set_cascade_timer(priv, ticks, allocated_timer->num);
214 if (ret < 0)
215 return NULL;
216
217 return allocated_timer;
218}
219
220static struct mpic_timer *get_timer(const struct timeval *time)
221{
222 struct timer_group_priv *priv;
223 struct mpic_timer *timer;
224
225 u64 ticks;
226 unsigned int num;
227 unsigned int i;
228 unsigned long flags;
229 int ret;
230
231 list_for_each_entry(priv, &timer_group_list, node) {
232 ret = convert_time_to_ticks(priv, time, &ticks);
233 if (ret < 0)
234 return NULL;
235
236 if (ticks > MAX_TICKS) {
237 if (!(priv->flags & FSL_GLOBAL_TIMER))
238 return NULL;
239
240 timer = get_cascade_timer(priv, ticks);
241 if (!timer)
242 continue;
243
244 return timer;
245 }
246
247 for (i = 0; i < TIMERS_PER_GROUP; i++) {
248 /* one timer: Reverse allocation */
249 num = TIMERS_PER_GROUP - 1 - i;
250 spin_lock_irqsave(&priv->lock, flags);
251 if (priv->idle & (1 << i)) {
252 /* set timer busy */
253 priv->idle &= ~(1 << i);
254 /* set ticks & stop timer */
255 out_be32(&priv->regs[num].gtbcr,
256 ticks | TIMER_STOP);
257 out_be32(&priv->regs[num].gtccr, 0);
258 priv->timer[num].cascade_handle = NULL;
259 spin_unlock_irqrestore(&priv->lock, flags);
260 return &priv->timer[num];
261 }
262 spin_unlock_irqrestore(&priv->lock, flags);
263 }
264 }
265
266 return NULL;
267}
268
269/**
270 * mpic_start_timer - start hardware timer
271 * @handle: the timer to be started.
272 *
273 * It will do ->fn(->dev) callback from the hardware interrupt at
274 * the ->timeval point in the future.
275 */
276void mpic_start_timer(struct mpic_timer *handle)
277{
278 struct timer_group_priv *priv = container_of(handle,
279 struct timer_group_priv, timer[handle->num]);
280
281 clrbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP);
282}
283EXPORT_SYMBOL(mpic_start_timer);
284
285/**
286 * mpic_stop_timer - stop hardware timer
287 * @handle: the timer to be stoped
288 *
289 * The timer periodically generates an interrupt. Unless user stops the timer.
290 */
291void mpic_stop_timer(struct mpic_timer *handle)
292{
293 struct timer_group_priv *priv = container_of(handle,
294 struct timer_group_priv, timer[handle->num]);
295 struct cascade_priv *casc_priv;
296
297 setbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP);
298
299 casc_priv = priv->timer[handle->num].cascade_handle;
300 if (casc_priv) {
301 out_be32(&priv->regs[handle->num].gtccr, 0);
302 out_be32(&priv->regs[handle->num - 1].gtccr, 0);
303 } else {
304 out_be32(&priv->regs[handle->num].gtccr, 0);
305 }
306}
307EXPORT_SYMBOL(mpic_stop_timer);
308
309/**
310 * mpic_get_remain_time - get timer time
311 * @handle: the timer to be selected.
312 * @time: time for timer
313 *
314 * Query timer remaining time.
315 */
316void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time)
317{
318 struct timer_group_priv *priv = container_of(handle,
319 struct timer_group_priv, timer[handle->num]);
320 struct cascade_priv *casc_priv;
321
322 u64 ticks;
323 u32 tmp_ticks;
324
325 casc_priv = priv->timer[handle->num].cascade_handle;
326 if (casc_priv) {
327 tmp_ticks = in_be32(&priv->regs[handle->num].gtccr);
328 ticks = ((u64)tmp_ticks & UINT_MAX) * (u64)MAX_TICKS_CASCADE;
329 tmp_ticks = in_be32(&priv->regs[handle->num - 1].gtccr);
330 ticks += tmp_ticks;
331 } else {
332 ticks = in_be32(&priv->regs[handle->num].gtccr);
333 }
334
335 convert_ticks_to_time(priv, ticks, time);
336}
337EXPORT_SYMBOL(mpic_get_remain_time);
338
339/**
340 * mpic_free_timer - free hardware timer
341 * @handle: the timer to be removed.
342 *
343 * Free the timer.
344 *
345 * Note: can not be used in interrupt context.
346 */
347void mpic_free_timer(struct mpic_timer *handle)
348{
349 struct timer_group_priv *priv = container_of(handle,
350 struct timer_group_priv, timer[handle->num]);
351
352 struct cascade_priv *casc_priv;
353 unsigned long flags;
354
355 mpic_stop_timer(handle);
356
357 casc_priv = priv->timer[handle->num].cascade_handle;
358
359 free_irq(priv->timer[handle->num].irq, priv->timer[handle->num].dev);
360
361 spin_lock_irqsave(&priv->lock, flags);
362 if (casc_priv) {
363 u32 tcr;
364 tcr = casc_priv->tcr_value | (casc_priv->tcr_value <<
365 MPIC_TIMER_TCR_ROVR_OFFSET);
366 clrbits32(priv->group_tcr, tcr);
367 priv->idle |= casc_priv->cascade_map;
368 priv->timer[handle->num].cascade_handle = NULL;
369 } else {
370 priv->idle |= TIMER_OFFSET(handle->num);
371 }
372 spin_unlock_irqrestore(&priv->lock, flags);
373}
374EXPORT_SYMBOL(mpic_free_timer);
375
376/**
377 * mpic_request_timer - get a hardware timer
378 * @fn: interrupt handler function
379 * @dev: callback function of the data
380 * @time: time for timer
381 *
382 * This executes the "request_irq", returning NULL
383 * else "handle" on success.
384 */
385struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
386 const struct timeval *time)
387{
388 struct mpic_timer *allocated_timer;
389 int ret;
390
391 if (list_empty(&timer_group_list))
392 return NULL;
393
394 if (!(time->tv_sec + time->tv_usec) ||
395 time->tv_sec < 0 || time->tv_usec < 0)
396 return NULL;
397
398 if (time->tv_usec > ONE_SECOND)
399 return NULL;
400
401 allocated_timer = get_timer(time);
402 if (!allocated_timer)
403 return NULL;
404
405 ret = request_irq(allocated_timer->irq, fn,
406 IRQF_TRIGGER_LOW, "global-timer", dev);
407 if (ret) {
408 mpic_free_timer(allocated_timer);
409 return NULL;
410 }
411
412 allocated_timer->dev = dev;
413
414 return allocated_timer;
415}
416EXPORT_SYMBOL(mpic_request_timer);
417
418static int timer_group_get_freq(struct device_node *np,
419 struct timer_group_priv *priv)
420{
421 u32 div;
422
423 if (priv->flags & FSL_GLOBAL_TIMER) {
424 struct device_node *dn;
425
426 dn = of_find_compatible_node(NULL, NULL, "fsl,mpic");
427 if (dn) {
428 of_property_read_u32(dn, "clock-frequency",
429 &priv->timerfreq);
430 of_node_put(dn);
431 }
432 }
433
434 if (priv->timerfreq <= 0)
435 return -EINVAL;
436
437 if (priv->flags & FSL_GLOBAL_TIMER) {
438 div = (1 << (MPIC_TIMER_TCR_CLKDIV >> 8)) * 8;
439 priv->timerfreq /= div;
440 }
441
442 return 0;
443}
444
445static int timer_group_get_irq(struct device_node *np,
446 struct timer_group_priv *priv)
447{
448 const u32 all_timer[] = { 0, TIMERS_PER_GROUP };
449 const u32 *p;
450 u32 offset;
451 u32 count;
452
453 unsigned int i;
454 unsigned int j;
455 unsigned int irq_index = 0;
456 unsigned int irq;
457 int len;
458
459 p = of_get_property(np, "fsl,available-ranges", &len);
460 if (p && len % (2 * sizeof(u32)) != 0) {
461 pr_err("%s: malformed available-ranges property.\n",
462 np->full_name);
463 return -EINVAL;
464 }
465
466 if (!p) {
467 p = all_timer;
468 len = sizeof(all_timer);
469 }
470
471 len /= 2 * sizeof(u32);
472
473 for (i = 0; i < len; i++) {
474 offset = p[i * 2];
475 count = p[i * 2 + 1];
476 for (j = 0; j < count; j++) {
477 irq = irq_of_parse_and_map(np, irq_index);
478 if (!irq) {
479 pr_err("%s: irq parse and map failed.\n",
480 np->full_name);
481 return -EINVAL;
482 }
483
484 /* Set timer idle */
485 priv->idle |= TIMER_OFFSET((offset + j));
486 priv->timer[offset + j].irq = irq;
487 priv->timer[offset + j].num = offset + j;
488 irq_index++;
489 }
490 }
491
492 return 0;
493}
494
495static void timer_group_init(struct device_node *np)
496{
497 struct timer_group_priv *priv;
498 unsigned int i = 0;
499 int ret;
500
501 priv = kzalloc(sizeof(struct timer_group_priv), GFP_KERNEL);
502 if (!priv) {
503 pr_err("%s: cannot allocate memory for group.\n",
504 np->full_name);
505 return;
506 }
507
508 if (of_device_is_compatible(np, "fsl,mpic-global-timer"))
509 priv->flags |= FSL_GLOBAL_TIMER;
510
511 priv->regs = of_iomap(np, i++);
512 if (!priv->regs) {
513 pr_err("%s: cannot ioremap timer register address.\n",
514 np->full_name);
515 goto out;
516 }
517
518 if (priv->flags & FSL_GLOBAL_TIMER) {
519 priv->group_tcr = of_iomap(np, i++);
520 if (!priv->group_tcr) {
521 pr_err("%s: cannot ioremap tcr address.\n",
522 np->full_name);
523 goto out;
524 }
525 }
526
527 ret = timer_group_get_freq(np, priv);
528 if (ret < 0) {
529 pr_err("%s: cannot get timer frequency.\n", np->full_name);
530 goto out;
531 }
532
533 ret = timer_group_get_irq(np, priv);
534 if (ret < 0) {
535 pr_err("%s: cannot get timer irqs.\n", np->full_name);
536 goto out;
537 }
538
539 spin_lock_init(&priv->lock);
540
541 /* Init FSL timer hardware */
542 if (priv->flags & FSL_GLOBAL_TIMER)
543 setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV);
544
545 list_add_tail(&priv->node, &timer_group_list);
546
547 return;
548
549out:
550 if (priv->regs)
551 iounmap(priv->regs);
552
553 if (priv->group_tcr)
554 iounmap(priv->group_tcr);
555
556 kfree(priv);
557}
558
559static void mpic_timer_resume(void)
560{
561 struct timer_group_priv *priv;
562
563 list_for_each_entry(priv, &timer_group_list, node) {
564 /* Init FSL timer hardware */
565 if (priv->flags & FSL_GLOBAL_TIMER)
566 setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV);
567 }
568}
569
570static const struct of_device_id mpic_timer_ids[] = {
571 { .compatible = "fsl,mpic-global-timer", },
572 {},
573};
574
575static struct syscore_ops mpic_timer_syscore_ops = {
576 .resume = mpic_timer_resume,
577};
578
579static int __init mpic_timer_init(void)
580{
581 struct device_node *np = NULL;
582
583 for_each_matching_node(np, mpic_timer_ids)
584 timer_group_init(np);
585
586 register_syscore_ops(&mpic_timer_syscore_ops);
587
588 if (list_empty(&timer_group_list))
589 return -ENODEV;
590
591 return 0;
592}
593subsys_initcall(mpic_timer_init);