diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-03-16 04:01:55 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-03-16 04:01:55 -0400 |
commit | ba4e06d68ea4fd2be401d7226c68941892d6bbaf (patch) | |
tree | a9a7125a8c88ba543e4fcfb907869b97688dee3c | |
parent | 743146db071c4a828159211a295d12ff4f61752f (diff) | |
parent | 710d60cbf1b312a8075a2158cbfbbd9c66132dcc (diff) |
Merge branch 'linus' into x86/urgent, to pick up dependencies for a fix
Signed-off-by: Ingo Molnar <mingo@kernel.org>
261 files changed, 7703 insertions, 2756 deletions
diff --git a/Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt b/Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt new file mode 100644 index 000000000000..f6f1c14bf99b --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | Alpine MSIX controller | ||
2 | |||
3 | See arm,gic-v3.txt for SPI and MSI definitions. | ||
4 | |||
5 | Required properties: | ||
6 | |||
7 | - compatible: should be "al,alpine-msix" | ||
8 | - reg: physical base address and size of the registers | ||
9 | - interrupt-parent: specifies the parent interrupt controller. | ||
10 | - interrupt-controller: identifies the node as an interrupt controller | ||
11 | - msi-controller: identifies the node as an PCI Message Signaled Interrupt | ||
12 | controller | ||
13 | - al,msi-base-spi: SPI base of the MSI frame | ||
14 | - al,msi-num-spis: number of SPIs assigned to the MSI frame, relative to SPI0 | ||
15 | |||
16 | Example: | ||
17 | |||
18 | msix: msix { | ||
19 | compatible = "al,alpine-msix"; | ||
20 | reg = <0x0 0xfbe00000 0x0 0x100000>; | ||
21 | interrupt-parent = <&gic>; | ||
22 | interrupt-controller; | ||
23 | msi-controller; | ||
24 | al,msi-base-spi = <160>; | ||
25 | al,msi-num-spis = <160>; | ||
26 | }; | ||
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt index 5a1cb4bc3dfe..793c20ff8fcc 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt | |||
@@ -16,6 +16,7 @@ Main node required properties: | |||
16 | "arm,cortex-a15-gic" | 16 | "arm,cortex-a15-gic" |
17 | "arm,cortex-a7-gic" | 17 | "arm,cortex-a7-gic" |
18 | "arm,cortex-a9-gic" | 18 | "arm,cortex-a9-gic" |
19 | "arm,eb11mp-gic" | ||
19 | "arm,gic-400" | 20 | "arm,gic-400" |
20 | "arm,pl390" | 21 | "arm,pl390" |
21 | "arm,tc11mp-gic" | 22 | "arm,tc11mp-gic" |
diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt new file mode 100644 index 000000000000..8af0a8e613ab --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt | |||
@@ -0,0 +1,44 @@ | |||
1 | |||
2 | * Marvell ODMI for MSI support | ||
3 | |||
4 | Some Marvell SoCs have an On-Die Message Interrupt (ODMI) controller | ||
5 | which can be used by on-board peripheral for MSI interrupts. | ||
6 | |||
7 | Required properties: | ||
8 | |||
9 | - compatible : The value here should contain: | ||
10 | |||
11 | "marvell,ap806-odmi-controller", "marvell,odmi-controller". | ||
12 | |||
13 | - interrupt,controller : Identifies the node as an interrupt controller. | ||
14 | |||
15 | - msi-controller : Identifies the node as an MSI controller. | ||
16 | |||
17 | - marvell,odmi-frames : Number of ODMI frames available. Each frame | ||
18 | provides a number of events. | ||
19 | |||
20 | - reg : List of register definitions, one for each | ||
21 | ODMI frame. | ||
22 | |||
23 | - marvell,spi-base : List of GIC base SPI interrupts, one for each | ||
24 | ODMI frame. Those SPI interrupts are 0-based, | ||
25 | i.e marvell,spi-base = <128> will use SPI #96. | ||
26 | See Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt | ||
27 | for details about the GIC Device Tree binding. | ||
28 | |||
29 | - interrupt-parent : Reference to the parent interrupt controller. | ||
30 | |||
31 | Example: | ||
32 | |||
33 | odmi: odmi@300000 { | ||
34 | compatible = "marvell,ap806-odm-controller", | ||
35 | "marvell,odmi-controller"; | ||
36 | interrupt-controller; | ||
37 | msi-controller; | ||
38 | marvell,odmi-frames = <4>; | ||
39 | reg = <0x300000 0x4000>, | ||
40 | <0x304000 0x4000>, | ||
41 | <0x308000 0x4000>, | ||
42 | <0x30C000 0x4000>; | ||
43 | marvell,spi-base = <128>, <136>, <144>, <152>; | ||
44 | }; | ||
diff --git a/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt b/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt index aae4c384ee1f..173595305e26 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt | |||
@@ -23,6 +23,12 @@ Optional properties: | |||
23 | - mti,reserved-cpu-vectors : Specifies the list of CPU interrupt vectors | 23 | - mti,reserved-cpu-vectors : Specifies the list of CPU interrupt vectors |
24 | to which the GIC may not route interrupts. Valid values are 2 - 7. | 24 | to which the GIC may not route interrupts. Valid values are 2 - 7. |
25 | This property is ignored if the CPU is started in EIC mode. | 25 | This property is ignored if the CPU is started in EIC mode. |
26 | - mti,reserved-ipi-vectors : Specifies the range of GIC interrupts that are | ||
27 | reserved for IPIs. | ||
28 | It accepts 2 values, the 1st is the starting interrupt and the 2nd is the size | ||
29 | of the reserved range. | ||
30 | If not specified, the driver will allocate the last 2 * number of VPEs in the | ||
31 | system. | ||
26 | 32 | ||
27 | Required properties for timer sub-node: | 33 | Required properties for timer sub-node: |
28 | - compatible : Should be "mti,gic-timer". | 34 | - compatible : Should be "mti,gic-timer". |
@@ -44,6 +50,7 @@ Example: | |||
44 | #interrupt-cells = <3>; | 50 | #interrupt-cells = <3>; |
45 | 51 | ||
46 | mti,reserved-cpu-vectors = <7>; | 52 | mti,reserved-cpu-vectors = <7>; |
53 | mti,reserved-ipi-vectors = <40 8>; | ||
47 | 54 | ||
48 | timer { | 55 | timer { |
49 | compatible = "mti,gic-timer"; | 56 | compatible = "mti,gic-timer"; |
diff --git a/Documentation/devicetree/bindings/interrupt-controller/sigma,smp8642-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/sigma,smp8642-intc.txt new file mode 100644 index 000000000000..1f441fa0ad40 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/sigma,smp8642-intc.txt | |||
@@ -0,0 +1,49 @@ | |||
1 | Sigma Designs SMP86xx/SMP87xx secondary interrupt controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: should be "sigma,smp8642-intc" | ||
5 | - reg: physical address of MMIO region | ||
6 | - ranges: address space mapping of child nodes | ||
7 | - interrupt-parent: phandle of parent interrupt controller | ||
8 | - interrupt-controller: boolean | ||
9 | - #address-cells: should be <1> | ||
10 | - #size-cells: should be <1> | ||
11 | |||
12 | One child node per control block with properties: | ||
13 | - reg: address of registers for this control block | ||
14 | - interrupt-controller: boolean | ||
15 | - #interrupt-cells: should be <2>, interrupt index and flags per interrupts.txt | ||
16 | - interrupts: interrupt spec of primary interrupt controller | ||
17 | |||
18 | Example: | ||
19 | |||
20 | interrupt-controller@6e000 { | ||
21 | compatible = "sigma,smp8642-intc"; | ||
22 | reg = <0x6e000 0x400>; | ||
23 | ranges = <0x0 0x6e000 0x400>; | ||
24 | interrupt-parent = <&gic>; | ||
25 | interrupt-controller; | ||
26 | #address-cells = <1>; | ||
27 | #size-cells = <1>; | ||
28 | |||
29 | irq0: interrupt-controller@0 { | ||
30 | reg = <0x000 0x100>; | ||
31 | interrupt-controller; | ||
32 | #interrupt-cells = <2>; | ||
33 | interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>; | ||
34 | }; | ||
35 | |||
36 | irq1: interrupt-controller@100 { | ||
37 | reg = <0x100 0x100>; | ||
38 | interrupt-controller; | ||
39 | #interrupt-cells = <2>; | ||
40 | interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>; | ||
41 | }; | ||
42 | |||
43 | irq2: interrupt-controller@300 { | ||
44 | reg = <0x300 0x100>; | ||
45 | interrupt-controller; | ||
46 | #interrupt-cells = <2>; | ||
47 | interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>; | ||
48 | }; | ||
49 | }; | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8ae47a7b4923..4d9ca7d92a20 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -666,7 +666,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
666 | 666 | ||
667 | clearcpuid=BITNUM [X86] | 667 | clearcpuid=BITNUM [X86] |
668 | Disable CPUID feature X for the kernel. See | 668 | Disable CPUID feature X for the kernel. See |
669 | arch/x86/include/asm/cpufeature.h for the valid bit | 669 | arch/x86/include/asm/cpufeatures.h for the valid bit |
670 | numbers. Note the Linux specific bits are not necessarily | 670 | numbers. Note the Linux specific bits are not necessarily |
671 | stable over kernel options, but the vendor specific | 671 | stable over kernel options, but the vendor specific |
672 | ones should be. | 672 | ones should be. |
@@ -1687,6 +1687,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1687 | ip= [IP_PNP] | 1687 | ip= [IP_PNP] |
1688 | See Documentation/filesystems/nfs/nfsroot.txt. | 1688 | See Documentation/filesystems/nfs/nfsroot.txt. |
1689 | 1689 | ||
1690 | irqaffinity= [SMP] Set the default irq affinity mask | ||
1691 | Format: | ||
1692 | <cpu number>,...,<cpu number> | ||
1693 | or | ||
1694 | <cpu number>-<cpu number> | ||
1695 | (must be a positive range in ascending order) | ||
1696 | or a mixture | ||
1697 | <cpu number>,...,<cpu number>-<cpu number> | ||
1698 | |||
1690 | irqfixup [HW] | 1699 | irqfixup [HW] |
1691 | When an interrupt is not handled search all handlers | 1700 | When an interrupt is not handled search all handlers |
1692 | for it. Intended to get systems with badly broken | 1701 | for it. Intended to get systems with badly broken |
@@ -2566,6 +2575,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2566 | 2575 | ||
2567 | nointroute [IA-64] | 2576 | nointroute [IA-64] |
2568 | 2577 | ||
2578 | noinvpcid [X86] Disable the INVPCID cpu feature. | ||
2579 | |||
2569 | nojitter [IA-64] Disables jitter checking for ITC timers. | 2580 | nojitter [IA-64] Disables jitter checking for ITC timers. |
2570 | 2581 | ||
2571 | no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver | 2582 | no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver |
diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c index 6c6247aaa7b9..d99012f41602 100644 --- a/Documentation/ptp/testptp.c +++ b/Documentation/ptp/testptp.c | |||
@@ -277,13 +277,15 @@ int main(int argc, char *argv[]) | |||
277 | " %d external time stamp channels\n" | 277 | " %d external time stamp channels\n" |
278 | " %d programmable periodic signals\n" | 278 | " %d programmable periodic signals\n" |
279 | " %d pulse per second\n" | 279 | " %d pulse per second\n" |
280 | " %d programmable pins\n", | 280 | " %d programmable pins\n" |
281 | " %d cross timestamping\n", | ||
281 | caps.max_adj, | 282 | caps.max_adj, |
282 | caps.n_alarm, | 283 | caps.n_alarm, |
283 | caps.n_ext_ts, | 284 | caps.n_ext_ts, |
284 | caps.n_per_out, | 285 | caps.n_per_out, |
285 | caps.pps, | 286 | caps.pps, |
286 | caps.n_pins); | 287 | caps.n_pins, |
288 | caps.cross_timestamping); | ||
287 | } | 289 | } |
288 | } | 290 | } |
289 | 291 | ||
diff --git a/Documentation/x86/early-microcode.txt b/Documentation/x86/early-microcode.txt index d62bea6796da..c956d99cf1de 100644 --- a/Documentation/x86/early-microcode.txt +++ b/Documentation/x86/early-microcode.txt | |||
@@ -40,3 +40,28 @@ cp ../microcode.bin kernel/x86/microcode/GenuineIntel.bin (or AuthenticAMD.bin) | |||
40 | find . | cpio -o -H newc >../ucode.cpio | 40 | find . | cpio -o -H newc >../ucode.cpio |
41 | cd .. | 41 | cd .. |
42 | cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img | 42 | cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img |
43 | |||
44 | Builtin microcode | ||
45 | ================= | ||
46 | |||
47 | We can also load builtin microcode supplied through the regular firmware | ||
48 | builtin method CONFIG_FIRMWARE_IN_KERNEL. Here's an example: | ||
49 | |||
50 | CONFIG_FIRMWARE_IN_KERNEL=y | ||
51 | CONFIG_EXTRA_FIRMWARE="intel-ucode/06-3a-09 amd-ucode/microcode_amd_fam15h.bin" | ||
52 | CONFIG_EXTRA_FIRMWARE_DIR="/lib/firmware" | ||
53 | |||
54 | This basically means, you have the following tree structure locally: | ||
55 | |||
56 | /lib/firmware/ | ||
57 | |-- amd-ucode | ||
58 | ... | ||
59 | | |-- microcode_amd_fam15h.bin | ||
60 | ... | ||
61 | |-- intel-ucode | ||
62 | ... | ||
63 | | |-- 06-3a-09 | ||
64 | ... | ||
65 | |||
66 | so that the build system can find those files and integrate them into | ||
67 | the final kernel image. The early loader finds them and applies them. | ||
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 68ed3114c363..0965a71f9942 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt | |||
@@ -60,6 +60,8 @@ Machine check | |||
60 | threshold to 1. Enabling this may make memory predictive failure | 60 | threshold to 1. Enabling this may make memory predictive failure |
61 | analysis less effective if the bios sets thresholds for memory | 61 | analysis less effective if the bios sets thresholds for memory |
62 | errors since we will not see details for all errors. | 62 | errors since we will not see details for all errors. |
63 | mce=recovery | ||
64 | Force-enable recoverable machine check code paths | ||
63 | 65 | ||
64 | nomce (for compatibility with i386): same as mce=off | 66 | nomce (for compatibility with i386): same as mce=off |
65 | 67 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 2061ea77667c..57adf395a61f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -2422,6 +2422,7 @@ F: arch/mips/bmips/* | |||
2422 | F: arch/mips/include/asm/mach-bmips/* | 2422 | F: arch/mips/include/asm/mach-bmips/* |
2423 | F: arch/mips/kernel/*bmips* | 2423 | F: arch/mips/kernel/*bmips* |
2424 | F: arch/mips/boot/dts/brcm/bcm*.dts* | 2424 | F: arch/mips/boot/dts/brcm/bcm*.dts* |
2425 | F: drivers/irqchip/irq-bcm63* | ||
2425 | F: drivers/irqchip/irq-bcm7* | 2426 | F: drivers/irqchip/irq-bcm7* |
2426 | F: drivers/irqchip/irq-brcmstb* | 2427 | F: drivers/irqchip/irq-brcmstb* |
2427 | F: include/linux/bcm963xx_nvram.h | 2428 | F: include/linux/bcm963xx_nvram.h |
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 2f24447fef92..46bf263c3153 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c | |||
@@ -168,7 +168,7 @@ smp_callin(void) | |||
168 | cpuid, current, current->active_mm)); | 168 | cpuid, current, current->active_mm)); |
169 | 169 | ||
170 | preempt_disable(); | 170 | preempt_disable(); |
171 | cpu_startup_entry(CPUHP_ONLINE); | 171 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
172 | } | 172 | } |
173 | 173 | ||
174 | /* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */ | 174 | /* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */ |
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 424e937da5c8..4cb3add77c75 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c | |||
@@ -142,7 +142,7 @@ void start_kernel_secondary(void) | |||
142 | 142 | ||
143 | local_irq_enable(); | 143 | local_irq_enable(); |
144 | preempt_disable(); | 144 | preempt_disable(); |
145 | cpu_startup_entry(CPUHP_ONLINE); | 145 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
146 | } | 146 | } |
147 | 147 | ||
148 | /* | 148 | /* |
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 37312f6749f3..baee70267f29 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c | |||
@@ -409,7 +409,7 @@ asmlinkage void secondary_start_kernel(void) | |||
409 | /* | 409 | /* |
410 | * OK, it's off to the idle thread for us | 410 | * OK, it's off to the idle thread for us |
411 | */ | 411 | */ |
412 | cpu_startup_entry(CPUHP_ONLINE); | 412 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
413 | } | 413 | } |
414 | 414 | ||
415 | void __init smp_cpus_done(unsigned int max_cpus) | 415 | void __init smp_cpus_done(unsigned int max_cpus) |
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 64e3d2ce9a07..b003e3afd693 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig | |||
@@ -3,7 +3,6 @@ menuconfig ARCH_MVEBU | |||
3 | depends on ARCH_MULTI_V7 || ARCH_MULTI_V5 | 3 | depends on ARCH_MULTI_V7 || ARCH_MULTI_V5 |
4 | select ARCH_SUPPORTS_BIG_ENDIAN | 4 | select ARCH_SUPPORTS_BIG_ENDIAN |
5 | select CLKSRC_MMIO | 5 | select CLKSRC_MMIO |
6 | select GENERIC_IRQ_CHIP | ||
7 | select PINCTRL | 6 | select PINCTRL |
8 | select PLAT_ORION | 7 | select PLAT_ORION |
9 | select SOC_BUS | 8 | select SOC_BUS |
@@ -29,6 +28,7 @@ config MACH_ARMADA_370 | |||
29 | bool "Marvell Armada 370 boards" | 28 | bool "Marvell Armada 370 boards" |
30 | depends on ARCH_MULTI_V7 | 29 | depends on ARCH_MULTI_V7 |
31 | select ARMADA_370_CLK | 30 | select ARMADA_370_CLK |
31 | select ARMADA_370_XP_IRQ | ||
32 | select CPU_PJ4B | 32 | select CPU_PJ4B |
33 | select MACH_MVEBU_V7 | 33 | select MACH_MVEBU_V7 |
34 | select PINCTRL_ARMADA_370 | 34 | select PINCTRL_ARMADA_370 |
@@ -39,6 +39,7 @@ config MACH_ARMADA_370 | |||
39 | config MACH_ARMADA_375 | 39 | config MACH_ARMADA_375 |
40 | bool "Marvell Armada 375 boards" | 40 | bool "Marvell Armada 375 boards" |
41 | depends on ARCH_MULTI_V7 | 41 | depends on ARCH_MULTI_V7 |
42 | select ARMADA_370_XP_IRQ | ||
42 | select ARM_ERRATA_720789 | 43 | select ARM_ERRATA_720789 |
43 | select ARM_ERRATA_753970 | 44 | select ARM_ERRATA_753970 |
44 | select ARM_GIC | 45 | select ARM_GIC |
@@ -58,6 +59,7 @@ config MACH_ARMADA_38X | |||
58 | select ARM_ERRATA_720789 | 59 | select ARM_ERRATA_720789 |
59 | select ARM_ERRATA_753970 | 60 | select ARM_ERRATA_753970 |
60 | select ARM_GIC | 61 | select ARM_GIC |
62 | select ARMADA_370_XP_IRQ | ||
61 | select ARMADA_38X_CLK | 63 | select ARMADA_38X_CLK |
62 | select HAVE_ARM_SCU | 64 | select HAVE_ARM_SCU |
63 | select HAVE_ARM_TWD if SMP | 65 | select HAVE_ARM_TWD if SMP |
@@ -72,6 +74,7 @@ config MACH_ARMADA_39X | |||
72 | bool "Marvell Armada 39x boards" | 74 | bool "Marvell Armada 39x boards" |
73 | depends on ARCH_MULTI_V7 | 75 | depends on ARCH_MULTI_V7 |
74 | select ARM_GIC | 76 | select ARM_GIC |
77 | select ARMADA_370_XP_IRQ | ||
75 | select ARMADA_39X_CLK | 78 | select ARMADA_39X_CLK |
76 | select CACHE_L2X0 | 79 | select CACHE_L2X0 |
77 | select HAVE_ARM_SCU | 80 | select HAVE_ARM_SCU |
@@ -86,6 +89,7 @@ config MACH_ARMADA_39X | |||
86 | config MACH_ARMADA_XP | 89 | config MACH_ARMADA_XP |
87 | bool "Marvell Armada XP boards" | 90 | bool "Marvell Armada XP boards" |
88 | depends on ARCH_MULTI_V7 | 91 | depends on ARCH_MULTI_V7 |
92 | select ARMADA_370_XP_IRQ | ||
89 | select ARMADA_XP_CLK | 93 | select ARMADA_XP_CLK |
90 | select CPU_PJ4B | 94 | select CPU_PJ4B |
91 | select MACH_MVEBU_V7 | 95 | select MACH_MVEBU_V7 |
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b1adc51b2c2e..460765799c64 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c | |||
@@ -195,7 +195,7 @@ asmlinkage void secondary_start_kernel(void) | |||
195 | /* | 195 | /* |
196 | * OK, it's off to the idle thread for us | 196 | * OK, it's off to the idle thread for us |
197 | */ | 197 | */ |
198 | cpu_startup_entry(CPUHP_ONLINE); | 198 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
199 | } | 199 | } |
200 | 200 | ||
201 | #ifdef CONFIG_HOTPLUG_CPU | 201 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 0030e21cfceb..23c4ef5f8bdc 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c | |||
@@ -333,7 +333,7 @@ void secondary_start_kernel(void) | |||
333 | 333 | ||
334 | /* We are done with local CPU inits, unblock the boot CPU. */ | 334 | /* We are done with local CPU inits, unblock the boot CPU. */ |
335 | set_cpu_online(cpu, true); | 335 | set_cpu_online(cpu, true); |
336 | cpu_startup_entry(CPUHP_ONLINE); | 336 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
337 | } | 337 | } |
338 | 338 | ||
339 | void __init smp_prepare_boot_cpu(void) | 339 | void __init smp_prepare_boot_cpu(void) |
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index ff759f26b96a..983bae7d2665 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c | |||
@@ -180,7 +180,7 @@ void start_secondary(void) | |||
180 | 180 | ||
181 | local_irq_enable(); | 181 | local_irq_enable(); |
182 | 182 | ||
183 | cpu_startup_entry(CPUHP_ONLINE); | 183 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
184 | } | 184 | } |
185 | 185 | ||
186 | 186 | ||
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 0e76fad27975..74fe317477e6 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c | |||
@@ -454,7 +454,7 @@ start_secondary (void *unused) | |||
454 | preempt_disable(); | 454 | preempt_disable(); |
455 | smp_callin(); | 455 | smp_callin(); |
456 | 456 | ||
457 | cpu_startup_entry(CPUHP_ONLINE); | 457 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
458 | return 0; | 458 | return 0; |
459 | } | 459 | } |
460 | 460 | ||
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index a468467542f4..f98d2f6519d6 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c | |||
@@ -432,7 +432,7 @@ int __init start_secondary(void *unused) | |||
432 | */ | 432 | */ |
433 | local_flush_tlb_all(); | 433 | local_flush_tlb_all(); |
434 | 434 | ||
435 | cpu_startup_entry(CPUHP_ONLINE); | 435 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
436 | return 0; | 436 | return 0; |
437 | } | 437 | } |
438 | 438 | ||
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index c3c6f0864881..bad13232de51 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c | |||
@@ -396,7 +396,7 @@ asmlinkage void secondary_start_kernel(void) | |||
396 | /* | 396 | /* |
397 | * OK, it's off to the idle thread for us | 397 | * OK, it's off to the idle thread for us |
398 | */ | 398 | */ |
399 | cpu_startup_entry(CPUHP_ONLINE); | 399 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
400 | } | 400 | } |
401 | 401 | ||
402 | void __init smp_cpus_done(unsigned int max_cpus) | 402 | void __init smp_cpus_done(unsigned int max_cpus) |
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d3da79dda629..a65eacf59918 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig | |||
@@ -151,6 +151,7 @@ config BMIPS_GENERIC | |||
151 | select CSRC_R4K | 151 | select CSRC_R4K |
152 | select SYNC_R4K | 152 | select SYNC_R4K |
153 | select COMMON_CLK | 153 | select COMMON_CLK |
154 | select BCM6345_L1_IRQ | ||
154 | select BCM7038_L1_IRQ | 155 | select BCM7038_L1_IRQ |
155 | select BCM7120_L2_IRQ | 156 | select BCM7120_L2_IRQ |
156 | select BRCMSTB_L2_IRQ | 157 | select BRCMSTB_L2_IRQ |
@@ -2169,7 +2170,6 @@ config MIPS_MT_SMP | |||
2169 | select CPU_MIPSR2_IRQ_VI | 2170 | select CPU_MIPSR2_IRQ_VI |
2170 | select CPU_MIPSR2_IRQ_EI | 2171 | select CPU_MIPSR2_IRQ_EI |
2171 | select SYNC_R4K | 2172 | select SYNC_R4K |
2172 | select MIPS_GIC_IPI if MIPS_GIC | ||
2173 | select MIPS_MT | 2173 | select MIPS_MT |
2174 | select SMP | 2174 | select SMP |
2175 | select SMP_UP | 2175 | select SMP_UP |
@@ -2267,7 +2267,6 @@ config MIPS_VPE_APSP_API_MT | |||
2267 | config MIPS_CMP | 2267 | config MIPS_CMP |
2268 | bool "MIPS CMP framework support (DEPRECATED)" | 2268 | bool "MIPS CMP framework support (DEPRECATED)" |
2269 | depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6 | 2269 | depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6 |
2270 | select MIPS_GIC_IPI if MIPS_GIC | ||
2271 | select SMP | 2270 | select SMP |
2272 | select SYNC_R4K | 2271 | select SYNC_R4K |
2273 | select SYS_SUPPORTS_SMP | 2272 | select SYS_SUPPORTS_SMP |
@@ -2287,7 +2286,6 @@ config MIPS_CPS | |||
2287 | select MIPS_CM | 2286 | select MIPS_CM |
2288 | select MIPS_CPC | 2287 | select MIPS_CPC |
2289 | select MIPS_CPS_PM if HOTPLUG_CPU | 2288 | select MIPS_CPS_PM if HOTPLUG_CPU |
2290 | select MIPS_GIC_IPI if MIPS_GIC | ||
2291 | select SMP | 2289 | select SMP |
2292 | select SYNC_R4K if (CEVT_R4K || CSRC_R4K) | 2290 | select SYNC_R4K if (CEVT_R4K || CSRC_R4K) |
2293 | select SYS_SUPPORTS_HOTPLUG_CPU | 2291 | select SYS_SUPPORTS_HOTPLUG_CPU |
@@ -2305,10 +2303,6 @@ config MIPS_CPS_PM | |||
2305 | select MIPS_CPC | 2303 | select MIPS_CPC |
2306 | bool | 2304 | bool |
2307 | 2305 | ||
2308 | config MIPS_GIC_IPI | ||
2309 | depends on MIPS_GIC | ||
2310 | bool | ||
2311 | |||
2312 | config MIPS_CM | 2306 | config MIPS_CM |
2313 | bool | 2307 | bool |
2314 | 2308 | ||
diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c index 511c06560dc1..2dfff1f19004 100644 --- a/arch/mips/ath79/irq.c +++ b/arch/mips/ath79/irq.c | |||
@@ -26,90 +26,6 @@ | |||
26 | #include "common.h" | 26 | #include "common.h" |
27 | #include "machtypes.h" | 27 | #include "machtypes.h" |
28 | 28 | ||
29 | static void __init ath79_misc_intc_domain_init( | ||
30 | struct device_node *node, int irq); | ||
31 | |||
32 | static void ath79_misc_irq_handler(struct irq_desc *desc) | ||
33 | { | ||
34 | struct irq_domain *domain = irq_desc_get_handler_data(desc); | ||
35 | void __iomem *base = domain->host_data; | ||
36 | u32 pending; | ||
37 | |||
38 | pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) & | ||
39 | __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
40 | |||
41 | if (!pending) { | ||
42 | spurious_interrupt(); | ||
43 | return; | ||
44 | } | ||
45 | |||
46 | while (pending) { | ||
47 | int bit = __ffs(pending); | ||
48 | |||
49 | generic_handle_irq(irq_linear_revmap(domain, bit)); | ||
50 | pending &= ~BIT(bit); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | static void ar71xx_misc_irq_unmask(struct irq_data *d) | ||
55 | { | ||
56 | void __iomem *base = irq_data_get_irq_chip_data(d); | ||
57 | unsigned int irq = d->hwirq; | ||
58 | u32 t; | ||
59 | |||
60 | t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
61 | __raw_writel(t | (1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
62 | |||
63 | /* flush write */ | ||
64 | __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
65 | } | ||
66 | |||
67 | static void ar71xx_misc_irq_mask(struct irq_data *d) | ||
68 | { | ||
69 | void __iomem *base = irq_data_get_irq_chip_data(d); | ||
70 | unsigned int irq = d->hwirq; | ||
71 | u32 t; | ||
72 | |||
73 | t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
74 | __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
75 | |||
76 | /* flush write */ | ||
77 | __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
78 | } | ||
79 | |||
80 | static void ar724x_misc_irq_ack(struct irq_data *d) | ||
81 | { | ||
82 | void __iomem *base = irq_data_get_irq_chip_data(d); | ||
83 | unsigned int irq = d->hwirq; | ||
84 | u32 t; | ||
85 | |||
86 | t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS); | ||
87 | __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_STATUS); | ||
88 | |||
89 | /* flush write */ | ||
90 | __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS); | ||
91 | } | ||
92 | |||
93 | static struct irq_chip ath79_misc_irq_chip = { | ||
94 | .name = "MISC", | ||
95 | .irq_unmask = ar71xx_misc_irq_unmask, | ||
96 | .irq_mask = ar71xx_misc_irq_mask, | ||
97 | }; | ||
98 | |||
99 | static void __init ath79_misc_irq_init(void) | ||
100 | { | ||
101 | if (soc_is_ar71xx() || soc_is_ar913x()) | ||
102 | ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; | ||
103 | else if (soc_is_ar724x() || | ||
104 | soc_is_ar933x() || | ||
105 | soc_is_ar934x() || | ||
106 | soc_is_qca955x()) | ||
107 | ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; | ||
108 | else | ||
109 | BUG(); | ||
110 | |||
111 | ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6)); | ||
112 | } | ||
113 | 29 | ||
114 | static void ar934x_ip2_irq_dispatch(struct irq_desc *desc) | 30 | static void ar934x_ip2_irq_dispatch(struct irq_desc *desc) |
115 | { | 31 | { |
@@ -212,142 +128,12 @@ static void qca955x_irq_init(void) | |||
212 | irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch); | 128 | irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch); |
213 | } | 129 | } |
214 | 130 | ||
215 | /* | ||
216 | * The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for | ||
217 | * these devices typically allocate coherent DMA memory, however the | ||
218 | * DMA controller may still have some unsynchronized data in the FIFO. | ||
219 | * Issue a flush in the handlers to ensure that the driver sees | ||
220 | * the update. | ||
221 | * | ||
222 | * This array map the interrupt lines to the DDR write buffer channels. | ||
223 | */ | ||
224 | |||
225 | static unsigned irq_wb_chan[8] = { | ||
226 | -1, -1, -1, -1, -1, -1, -1, -1, | ||
227 | }; | ||
228 | |||
229 | asmlinkage void plat_irq_dispatch(void) | ||
230 | { | ||
231 | unsigned long pending; | ||
232 | int irq; | ||
233 | |||
234 | pending = read_c0_status() & read_c0_cause() & ST0_IM; | ||
235 | |||
236 | if (!pending) { | ||
237 | spurious_interrupt(); | ||
238 | return; | ||
239 | } | ||
240 | |||
241 | pending >>= CAUSEB_IP; | ||
242 | while (pending) { | ||
243 | irq = fls(pending) - 1; | ||
244 | if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1) | ||
245 | ath79_ddr_wb_flush(irq_wb_chan[irq]); | ||
246 | do_IRQ(MIPS_CPU_IRQ_BASE + irq); | ||
247 | pending &= ~BIT(irq); | ||
248 | } | ||
249 | } | ||
250 | |||
251 | static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) | ||
252 | { | ||
253 | irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq); | ||
254 | irq_set_chip_data(irq, d->host_data); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | static const struct irq_domain_ops misc_irq_domain_ops = { | ||
259 | .xlate = irq_domain_xlate_onecell, | ||
260 | .map = misc_map, | ||
261 | }; | ||
262 | |||
263 | static void __init ath79_misc_intc_domain_init( | ||
264 | struct device_node *node, int irq) | ||
265 | { | ||
266 | void __iomem *base = ath79_reset_base; | ||
267 | struct irq_domain *domain; | ||
268 | |||
269 | domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT, | ||
270 | ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base); | ||
271 | if (!domain) | ||
272 | panic("Failed to add MISC irqdomain"); | ||
273 | |||
274 | /* Disable and clear all interrupts */ | ||
275 | __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
276 | __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS); | ||
277 | |||
278 | irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain); | ||
279 | } | ||
280 | |||
281 | static int __init ath79_misc_intc_of_init( | ||
282 | struct device_node *node, struct device_node *parent) | ||
283 | { | ||
284 | int irq; | ||
285 | |||
286 | irq = irq_of_parse_and_map(node, 0); | ||
287 | if (!irq) | ||
288 | panic("Failed to get MISC IRQ"); | ||
289 | |||
290 | ath79_misc_intc_domain_init(node, irq); | ||
291 | return 0; | ||
292 | } | ||
293 | |||
294 | static int __init ar7100_misc_intc_of_init( | ||
295 | struct device_node *node, struct device_node *parent) | ||
296 | { | ||
297 | ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; | ||
298 | return ath79_misc_intc_of_init(node, parent); | ||
299 | } | ||
300 | |||
301 | IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc", | ||
302 | ar7100_misc_intc_of_init); | ||
303 | |||
304 | static int __init ar7240_misc_intc_of_init( | ||
305 | struct device_node *node, struct device_node *parent) | ||
306 | { | ||
307 | ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; | ||
308 | return ath79_misc_intc_of_init(node, parent); | ||
309 | } | ||
310 | |||
311 | IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc", | ||
312 | ar7240_misc_intc_of_init); | ||
313 | |||
314 | static int __init ar79_cpu_intc_of_init( | ||
315 | struct device_node *node, struct device_node *parent) | ||
316 | { | ||
317 | int err, i, count; | ||
318 | |||
319 | /* Fill the irq_wb_chan table */ | ||
320 | count = of_count_phandle_with_args( | ||
321 | node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells"); | ||
322 | |||
323 | for (i = 0; i < count; i++) { | ||
324 | struct of_phandle_args args; | ||
325 | u32 irq = i; | ||
326 | |||
327 | of_property_read_u32_index( | ||
328 | node, "qca,ddr-wb-channel-interrupts", i, &irq); | ||
329 | if (irq >= ARRAY_SIZE(irq_wb_chan)) | ||
330 | continue; | ||
331 | |||
332 | err = of_parse_phandle_with_args( | ||
333 | node, "qca,ddr-wb-channels", | ||
334 | "#qca,ddr-wb-channel-cells", | ||
335 | i, &args); | ||
336 | if (err) | ||
337 | return err; | ||
338 | |||
339 | irq_wb_chan[irq] = args.args[0]; | ||
340 | pr_info("IRQ: Set flush channel of IRQ%d to %d\n", | ||
341 | irq, args.args[0]); | ||
342 | } | ||
343 | |||
344 | return mips_cpu_irq_of_init(node, parent); | ||
345 | } | ||
346 | IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc", | ||
347 | ar79_cpu_intc_of_init); | ||
348 | |||
349 | void __init arch_init_irq(void) | 131 | void __init arch_init_irq(void) |
350 | { | 132 | { |
133 | unsigned irq_wb_chan2 = -1; | ||
134 | unsigned irq_wb_chan3 = -1; | ||
135 | bool misc_is_ar71xx; | ||
136 | |||
351 | if (mips_machtype == ATH79_MACH_GENERIC_OF) { | 137 | if (mips_machtype == ATH79_MACH_GENERIC_OF) { |
352 | irqchip_init(); | 138 | irqchip_init(); |
353 | return; | 139 | return; |
@@ -355,14 +141,26 @@ void __init arch_init_irq(void) | |||
355 | 141 | ||
356 | if (soc_is_ar71xx() || soc_is_ar724x() || | 142 | if (soc_is_ar71xx() || soc_is_ar724x() || |
357 | soc_is_ar913x() || soc_is_ar933x()) { | 143 | soc_is_ar913x() || soc_is_ar933x()) { |
358 | irq_wb_chan[2] = 3; | 144 | irq_wb_chan2 = 3; |
359 | irq_wb_chan[3] = 2; | 145 | irq_wb_chan3 = 2; |
360 | } else if (soc_is_ar934x()) { | 146 | } else if (soc_is_ar934x()) { |
361 | irq_wb_chan[3] = 2; | 147 | irq_wb_chan3 = 2; |
362 | } | 148 | } |
363 | 149 | ||
364 | mips_cpu_irq_init(); | 150 | ath79_cpu_irq_init(irq_wb_chan2, irq_wb_chan3); |
365 | ath79_misc_irq_init(); | 151 | |
152 | if (soc_is_ar71xx() || soc_is_ar913x()) | ||
153 | misc_is_ar71xx = true; | ||
154 | else if (soc_is_ar724x() || | ||
155 | soc_is_ar933x() || | ||
156 | soc_is_ar934x() || | ||
157 | soc_is_qca955x()) | ||
158 | misc_is_ar71xx = false; | ||
159 | else | ||
160 | BUG(); | ||
161 | ath79_misc_irq_init( | ||
162 | ath79_reset_base + AR71XX_RESET_REG_MISC_INT_STATUS, | ||
163 | ATH79_CPU_IRQ(6), ATH79_MISC_IRQ_BASE, misc_is_ar71xx); | ||
366 | 164 | ||
367 | if (soc_is_ar934x()) | 165 | if (soc_is_ar934x()) |
368 | ar934x_ip2_irq_init(); | 166 | ar934x_ip2_irq_init(); |
diff --git a/arch/mips/bmips/irq.c b/arch/mips/bmips/irq.c index e7fc6f9348ba..7efefcf44033 100644 --- a/arch/mips/bmips/irq.c +++ b/arch/mips/bmips/irq.c | |||
@@ -15,6 +15,12 @@ | |||
15 | #include <asm/irq_cpu.h> | 15 | #include <asm/irq_cpu.h> |
16 | #include <asm/time.h> | 16 | #include <asm/time.h> |
17 | 17 | ||
18 | static const struct of_device_id smp_intc_dt_match[] = { | ||
19 | { .compatible = "brcm,bcm7038-l1-intc" }, | ||
20 | { .compatible = "brcm,bcm6345-l1-intc" }, | ||
21 | {} | ||
22 | }; | ||
23 | |||
18 | unsigned int get_c0_compare_int(void) | 24 | unsigned int get_c0_compare_int(void) |
19 | { | 25 | { |
20 | return CP0_LEGACY_COMPARE_IRQ; | 26 | return CP0_LEGACY_COMPARE_IRQ; |
@@ -24,8 +30,8 @@ void __init arch_init_irq(void) | |||
24 | { | 30 | { |
25 | struct device_node *dn; | 31 | struct device_node *dn; |
26 | 32 | ||
27 | /* Only the STB (bcm7038) controller supports SMP IRQ affinity */ | 33 | /* Only these controllers support SMP IRQ affinity */ |
28 | dn = of_find_compatible_node(NULL, NULL, "brcm,bcm7038-l1-intc"); | 34 | dn = of_find_matching_node(NULL, smp_intc_dt_match); |
29 | if (dn) | 35 | if (dn) |
30 | of_node_put(dn); | 36 | of_node_put(dn); |
31 | else | 37 | else |
diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h index 2b3487213d1e..441faa92c3cd 100644 --- a/arch/mips/include/asm/mach-ath79/ath79.h +++ b/arch/mips/include/asm/mach-ath79/ath79.h | |||
@@ -144,4 +144,8 @@ static inline u32 ath79_reset_rr(unsigned reg) | |||
144 | void ath79_device_reset_set(u32 mask); | 144 | void ath79_device_reset_set(u32 mask); |
145 | void ath79_device_reset_clear(u32 mask); | 145 | void ath79_device_reset_clear(u32 mask); |
146 | 146 | ||
147 | void ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3); | ||
148 | void ath79_misc_irq_init(void __iomem *regs, int irq, | ||
149 | int irq_base, bool is_ar71xx); | ||
150 | |||
147 | #endif /* __ASM_MACH_ATH79_H */ | 151 | #endif /* __ASM_MACH_ATH79_H */ |
diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 6ba1fb8b11e2..db7c322f057f 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h | |||
@@ -44,8 +44,9 @@ static inline void plat_smp_setup(void) | |||
44 | mp_ops->smp_setup(); | 44 | mp_ops->smp_setup(); |
45 | } | 45 | } |
46 | 46 | ||
47 | extern void gic_send_ipi_single(int cpu, unsigned int action); | 47 | extern void mips_smp_send_ipi_single(int cpu, unsigned int action); |
48 | extern void gic_send_ipi_mask(const struct cpumask *mask, unsigned int action); | 48 | extern void mips_smp_send_ipi_mask(const struct cpumask *mask, |
49 | unsigned int action); | ||
49 | 50 | ||
50 | #else /* !CONFIG_SMP */ | 51 | #else /* !CONFIG_SMP */ |
51 | 52 | ||
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 68e2b7db9348..b0988fd62fcc 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile | |||
@@ -52,7 +52,6 @@ obj-$(CONFIG_MIPS_MT_SMP) += smp-mt.o | |||
52 | obj-$(CONFIG_MIPS_CMP) += smp-cmp.o | 52 | obj-$(CONFIG_MIPS_CMP) += smp-cmp.o |
53 | obj-$(CONFIG_MIPS_CPS) += smp-cps.o cps-vec.o | 53 | obj-$(CONFIG_MIPS_CPS) += smp-cps.o cps-vec.o |
54 | obj-$(CONFIG_MIPS_CPS_NS16550) += cps-vec-ns16550.o | 54 | obj-$(CONFIG_MIPS_CPS_NS16550) += cps-vec-ns16550.o |
55 | obj-$(CONFIG_MIPS_GIC_IPI) += smp-gic.o | ||
56 | obj-$(CONFIG_MIPS_SPRAM) += spram.o | 55 | obj-$(CONFIG_MIPS_SPRAM) += spram.o |
57 | 56 | ||
58 | obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o | 57 | obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o |
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index d5e0f949dc48..76923349b4fe 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c | |||
@@ -149,8 +149,8 @@ void __init cmp_prepare_cpus(unsigned int max_cpus) | |||
149 | } | 149 | } |
150 | 150 | ||
151 | struct plat_smp_ops cmp_smp_ops = { | 151 | struct plat_smp_ops cmp_smp_ops = { |
152 | .send_ipi_single = gic_send_ipi_single, | 152 | .send_ipi_single = mips_smp_send_ipi_single, |
153 | .send_ipi_mask = gic_send_ipi_mask, | 153 | .send_ipi_mask = mips_smp_send_ipi_mask, |
154 | .init_secondary = cmp_init_secondary, | 154 | .init_secondary = cmp_init_secondary, |
155 | .smp_finish = cmp_smp_finish, | 155 | .smp_finish = cmp_smp_finish, |
156 | .boot_secondary = cmp_boot_secondary, | 156 | .boot_secondary = cmp_boot_secondary, |
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 2ad4e4c96d61..253e1409338c 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c | |||
@@ -472,8 +472,8 @@ static struct plat_smp_ops cps_smp_ops = { | |||
472 | .boot_secondary = cps_boot_secondary, | 472 | .boot_secondary = cps_boot_secondary, |
473 | .init_secondary = cps_init_secondary, | 473 | .init_secondary = cps_init_secondary, |
474 | .smp_finish = cps_smp_finish, | 474 | .smp_finish = cps_smp_finish, |
475 | .send_ipi_single = gic_send_ipi_single, | 475 | .send_ipi_single = mips_smp_send_ipi_single, |
476 | .send_ipi_mask = gic_send_ipi_mask, | 476 | .send_ipi_mask = mips_smp_send_ipi_mask, |
477 | #ifdef CONFIG_HOTPLUG_CPU | 477 | #ifdef CONFIG_HOTPLUG_CPU |
478 | .cpu_disable = cps_cpu_disable, | 478 | .cpu_disable = cps_cpu_disable, |
479 | .cpu_die = cps_cpu_die, | 479 | .cpu_die = cps_cpu_die, |
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 86311a164ef1..4f9570a57e8d 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c | |||
@@ -121,7 +121,7 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action) | |||
121 | 121 | ||
122 | #ifdef CONFIG_MIPS_GIC | 122 | #ifdef CONFIG_MIPS_GIC |
123 | if (gic_present) { | 123 | if (gic_present) { |
124 | gic_send_ipi_single(cpu, action); | 124 | mips_smp_send_ipi_single(cpu, action); |
125 | return; | 125 | return; |
126 | } | 126 | } |
127 | #endif | 127 | #endif |
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 2b521e07b860..37708d9af638 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c | |||
@@ -33,12 +33,16 @@ | |||
33 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
34 | #include <linux/err.h> | 34 | #include <linux/err.h> |
35 | #include <linux/ftrace.h> | 35 | #include <linux/ftrace.h> |
36 | #include <linux/irqdomain.h> | ||
37 | #include <linux/of.h> | ||
38 | #include <linux/of_irq.h> | ||
36 | 39 | ||
37 | #include <linux/atomic.h> | 40 | #include <linux/atomic.h> |
38 | #include <asm/cpu.h> | 41 | #include <asm/cpu.h> |
39 | #include <asm/processor.h> | 42 | #include <asm/processor.h> |
40 | #include <asm/idle.h> | 43 | #include <asm/idle.h> |
41 | #include <asm/r4k-timer.h> | 44 | #include <asm/r4k-timer.h> |
45 | #include <asm/mips-cpc.h> | ||
42 | #include <asm/mmu_context.h> | 46 | #include <asm/mmu_context.h> |
43 | #include <asm/time.h> | 47 | #include <asm/time.h> |
44 | #include <asm/setup.h> | 48 | #include <asm/setup.h> |
@@ -79,6 +83,11 @@ static cpumask_t cpu_core_setup_map; | |||
79 | 83 | ||
80 | cpumask_t cpu_coherent_mask; | 84 | cpumask_t cpu_coherent_mask; |
81 | 85 | ||
86 | #ifdef CONFIG_GENERIC_IRQ_IPI | ||
87 | static struct irq_desc *call_desc; | ||
88 | static struct irq_desc *sched_desc; | ||
89 | #endif | ||
90 | |||
82 | static inline void set_cpu_sibling_map(int cpu) | 91 | static inline void set_cpu_sibling_map(int cpu) |
83 | { | 92 | { |
84 | int i; | 93 | int i; |
@@ -146,6 +155,133 @@ void register_smp_ops(struct plat_smp_ops *ops) | |||
146 | mp_ops = ops; | 155 | mp_ops = ops; |
147 | } | 156 | } |
148 | 157 | ||
158 | #ifdef CONFIG_GENERIC_IRQ_IPI | ||
159 | void mips_smp_send_ipi_single(int cpu, unsigned int action) | ||
160 | { | ||
161 | mips_smp_send_ipi_mask(cpumask_of(cpu), action); | ||
162 | } | ||
163 | |||
164 | void mips_smp_send_ipi_mask(const struct cpumask *mask, unsigned int action) | ||
165 | { | ||
166 | unsigned long flags; | ||
167 | unsigned int core; | ||
168 | int cpu; | ||
169 | |||
170 | local_irq_save(flags); | ||
171 | |||
172 | switch (action) { | ||
173 | case SMP_CALL_FUNCTION: | ||
174 | __ipi_send_mask(call_desc, mask); | ||
175 | break; | ||
176 | |||
177 | case SMP_RESCHEDULE_YOURSELF: | ||
178 | __ipi_send_mask(sched_desc, mask); | ||
179 | break; | ||
180 | |||
181 | default: | ||
182 | BUG(); | ||
183 | } | ||
184 | |||
185 | if (mips_cpc_present()) { | ||
186 | for_each_cpu(cpu, mask) { | ||
187 | core = cpu_data[cpu].core; | ||
188 | |||
189 | if (core == current_cpu_data.core) | ||
190 | continue; | ||
191 | |||
192 | while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) { | ||
193 | mips_cpc_lock_other(core); | ||
194 | write_cpc_co_cmd(CPC_Cx_CMD_PWRUP); | ||
195 | mips_cpc_unlock_other(); | ||
196 | } | ||
197 | } | ||
198 | } | ||
199 | |||
200 | local_irq_restore(flags); | ||
201 | } | ||
202 | |||
203 | |||
204 | static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) | ||
205 | { | ||
206 | scheduler_ipi(); | ||
207 | |||
208 | return IRQ_HANDLED; | ||
209 | } | ||
210 | |||
211 | static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) | ||
212 | { | ||
213 | generic_smp_call_function_interrupt(); | ||
214 | |||
215 | return IRQ_HANDLED; | ||
216 | } | ||
217 | |||
218 | static struct irqaction irq_resched = { | ||
219 | .handler = ipi_resched_interrupt, | ||
220 | .flags = IRQF_PERCPU, | ||
221 | .name = "IPI resched" | ||
222 | }; | ||
223 | |||
224 | static struct irqaction irq_call = { | ||
225 | .handler = ipi_call_interrupt, | ||
226 | .flags = IRQF_PERCPU, | ||
227 | .name = "IPI call" | ||
228 | }; | ||
229 | |||
230 | static __init void smp_ipi_init_one(unsigned int virq, | ||
231 | struct irqaction *action) | ||
232 | { | ||
233 | int ret; | ||
234 | |||
235 | irq_set_handler(virq, handle_percpu_irq); | ||
236 | ret = setup_irq(virq, action); | ||
237 | BUG_ON(ret); | ||
238 | } | ||
239 | |||
240 | static int __init mips_smp_ipi_init(void) | ||
241 | { | ||
242 | unsigned int call_virq, sched_virq; | ||
243 | struct irq_domain *ipidomain; | ||
244 | struct device_node *node; | ||
245 | |||
246 | node = of_irq_find_parent(of_root); | ||
247 | ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI); | ||
248 | |||
249 | /* | ||
250 | * Some platforms have half DT setup. So if we found irq node but | ||
251 | * didn't find an ipidomain, try to search for one that is not in the | ||
252 | * DT. | ||
253 | */ | ||
254 | if (node && !ipidomain) | ||
255 | ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI); | ||
256 | |||
257 | BUG_ON(!ipidomain); | ||
258 | |||
259 | call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask); | ||
260 | BUG_ON(!call_virq); | ||
261 | |||
262 | sched_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask); | ||
263 | BUG_ON(!sched_virq); | ||
264 | |||
265 | if (irq_domain_is_ipi_per_cpu(ipidomain)) { | ||
266 | int cpu; | ||
267 | |||
268 | for_each_cpu(cpu, cpu_possible_mask) { | ||
269 | smp_ipi_init_one(call_virq + cpu, &irq_call); | ||
270 | smp_ipi_init_one(sched_virq + cpu, &irq_resched); | ||
271 | } | ||
272 | } else { | ||
273 | smp_ipi_init_one(call_virq, &irq_call); | ||
274 | smp_ipi_init_one(sched_virq, &irq_resched); | ||
275 | } | ||
276 | |||
277 | call_desc = irq_to_desc(call_virq); | ||
278 | sched_desc = irq_to_desc(sched_virq); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | early_initcall(mips_smp_ipi_init); | ||
283 | #endif | ||
284 | |||
149 | /* | 285 | /* |
150 | * First C code run on the secondary CPUs after being started up by | 286 | * First C code run on the secondary CPUs after being started up by |
151 | * the master. | 287 | * the master. |
@@ -192,7 +328,7 @@ asmlinkage void start_secondary(void) | |||
192 | WARN_ON_ONCE(!irqs_disabled()); | 328 | WARN_ON_ONCE(!irqs_disabled()); |
193 | mp_ops->smp_finish(); | 329 | mp_ops->smp_finish(); |
194 | 330 | ||
195 | cpu_startup_entry(CPUHP_ONLINE); | 331 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
196 | } | 332 | } |
197 | 333 | ||
198 | static void stop_this_cpu(void *dummy) | 334 | static void stop_this_cpu(void *dummy) |
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index f984193718b1..426173c4b0b9 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c | |||
@@ -675,7 +675,7 @@ int __init start_secondary(void *unused) | |||
675 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | 675 | #ifdef CONFIG_GENERIC_CLOCKEVENTS |
676 | init_clockevents(); | 676 | init_clockevents(); |
677 | #endif | 677 | #endif |
678 | cpu_startup_entry(CPUHP_ONLINE); | 678 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
679 | return 0; | 679 | return 0; |
680 | } | 680 | } |
681 | 681 | ||
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 52e85973a283..c2a9cc55a62f 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c | |||
@@ -305,7 +305,7 @@ void __init smp_callin(void) | |||
305 | 305 | ||
306 | local_irq_enable(); /* Interrupts have been off until now */ | 306 | local_irq_enable(); /* Interrupts have been off until now */ |
307 | 307 | ||
308 | cpu_startup_entry(CPUHP_ONLINE); | 308 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
309 | 309 | ||
310 | /* NOTREACHED */ | 310 | /* NOTREACHED */ |
311 | panic("smp_callin() AAAAaaaaahhhh....\n"); | 311 | panic("smp_callin() AAAAaaaaahhhh....\n"); |
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ec9ec2058d2d..cc13d4c83291 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c | |||
@@ -727,7 +727,7 @@ void start_secondary(void *unused) | |||
727 | 727 | ||
728 | local_irq_enable(); | 728 | local_irq_enable(); |
729 | 729 | ||
730 | cpu_startup_entry(CPUHP_ONLINE); | 730 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
731 | 731 | ||
732 | BUG(); | 732 | BUG(); |
733 | } | 733 | } |
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3c65a8eae34d..40a6b4f9c36c 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c | |||
@@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid) | |||
798 | set_cpu_online(smp_processor_id(), true); | 798 | set_cpu_online(smp_processor_id(), true); |
799 | inc_irq_stat(CPU_RST); | 799 | inc_irq_stat(CPU_RST); |
800 | local_irq_enable(); | 800 | local_irq_enable(); |
801 | cpu_startup_entry(CPUHP_ONLINE); | 801 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
802 | } | 802 | } |
803 | 803 | ||
804 | /* Upping and downing of CPUs */ | 804 | /* Upping and downing of CPUs */ |
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index de6be008fc01..13f633add29a 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c | |||
@@ -203,7 +203,7 @@ asmlinkage void start_secondary(void) | |||
203 | set_cpu_online(cpu, true); | 203 | set_cpu_online(cpu, true); |
204 | per_cpu(cpu_state, cpu) = CPU_ONLINE; | 204 | per_cpu(cpu_state, cpu) = CPU_ONLINE; |
205 | 205 | ||
206 | cpu_startup_entry(CPUHP_ONLINE); | 206 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
207 | } | 207 | } |
208 | 208 | ||
209 | extern struct { | 209 | extern struct { |
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index b3a5d81b20f0..fb30e7c6a5b1 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c | |||
@@ -364,7 +364,7 @@ static void sparc_start_secondary(void *arg) | |||
364 | local_irq_enable(); | 364 | local_irq_enable(); |
365 | 365 | ||
366 | wmb(); | 366 | wmb(); |
367 | cpu_startup_entry(CPUHP_ONLINE); | 367 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
368 | 368 | ||
369 | /* We should never reach here! */ | 369 | /* We should never reach here! */ |
370 | BUG(); | 370 | BUG(); |
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 19cd08d18672..8a6151a628ce 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c | |||
@@ -134,7 +134,7 @@ void smp_callin(void) | |||
134 | 134 | ||
135 | local_irq_enable(); | 135 | local_irq_enable(); |
136 | 136 | ||
137 | cpu_startup_entry(CPUHP_ONLINE); | 137 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
138 | } | 138 | } |
139 | 139 | ||
140 | void cpu_panic(void) | 140 | void cpu_panic(void) |
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 20d52a98e171..6c0abaacec33 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c | |||
@@ -208,7 +208,7 @@ void online_secondary(void) | |||
208 | /* Set up tile-timer clock-event device on this cpu */ | 208 | /* Set up tile-timer clock-event device on this cpu */ |
209 | setup_tile_timer(); | 209 | setup_tile_timer(); |
210 | 210 | ||
211 | cpu_startup_entry(CPUHP_ONLINE); | 211 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
212 | } | 212 | } |
213 | 213 | ||
214 | int __cpu_up(unsigned int cpu, struct task_struct *tidle) | 214 | int __cpu_up(unsigned int cpu, struct task_struct *tidle) |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b1051057e5b0..8f2e6659281b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1163,22 +1163,23 @@ config MICROCODE | |||
1163 | bool "CPU microcode loading support" | 1163 | bool "CPU microcode loading support" |
1164 | default y | 1164 | default y |
1165 | depends on CPU_SUP_AMD || CPU_SUP_INTEL | 1165 | depends on CPU_SUP_AMD || CPU_SUP_INTEL |
1166 | depends on BLK_DEV_INITRD | ||
1167 | select FW_LOADER | 1166 | select FW_LOADER |
1168 | ---help--- | 1167 | ---help--- |
1169 | |||
1170 | If you say Y here, you will be able to update the microcode on | 1168 | If you say Y here, you will be able to update the microcode on |
1171 | certain Intel and AMD processors. The Intel support is for the | 1169 | Intel and AMD processors. The Intel support is for the IA32 family, |
1172 | IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, | 1170 | e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The |
1173 | Xeon etc. The AMD support is for families 0x10 and later. You will | 1171 | AMD support is for families 0x10 and later. You will obviously need |
1174 | obviously need the actual microcode binary data itself which is not | 1172 | the actual microcode binary data itself which is not shipped with |
1175 | shipped with the Linux kernel. | 1173 | the Linux kernel. |
1176 | 1174 | ||
1177 | This option selects the general module only, you need to select | 1175 | The preferred method to load microcode from a detached initrd is described |
1178 | at least one vendor specific module as well. | 1176 | in Documentation/x86/early-microcode.txt. For that you need to enable |
1179 | 1177 | CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the | |
1180 | To compile this driver as a module, choose M here: the module | 1178 | initrd for microcode blobs. |
1181 | will be called microcode. | 1179 | |
1180 | In addition, you can build-in the microcode into the kernel. For that you | ||
1181 | need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode | ||
1182 | to the CONFIG_EXTRA_FIRMWARE config option. | ||
1182 | 1183 | ||
1183 | config MICROCODE_INTEL | 1184 | config MICROCODE_INTEL |
1184 | bool "Intel microcode loading support" | 1185 | bool "Intel microcode loading support" |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7816b7b276f4..67eec55093a5 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -338,16 +338,6 @@ config DEBUG_IMR_SELFTEST | |||
338 | 338 | ||
339 | If unsure say N here. | 339 | If unsure say N here. |
340 | 340 | ||
341 | config X86_DEBUG_STATIC_CPU_HAS | ||
342 | bool "Debug alternatives" | ||
343 | depends on DEBUG_KERNEL | ||
344 | ---help--- | ||
345 | This option causes additional code to be generated which | ||
346 | fails if static_cpu_has() is used before alternatives have | ||
347 | run. | ||
348 | |||
349 | If unsure, say N. | ||
350 | |||
351 | config X86_DEBUG_FPU | 341 | config X86_DEBUG_FPU |
352 | bool "Debug the x86 FPU code" | 342 | bool "Debug the x86 FPU code" |
353 | depends on DEBUG_KERNEL | 343 | depends on DEBUG_KERNEL |
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h index ea97697e51e4..4cb404fd45ce 100644 --- a/arch/x86/boot/cpuflags.h +++ b/arch/x86/boot/cpuflags.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef BOOT_CPUFLAGS_H | 1 | #ifndef BOOT_CPUFLAGS_H |
2 | #define BOOT_CPUFLAGS_H | 2 | #define BOOT_CPUFLAGS_H |
3 | 3 | ||
4 | #include <asm/cpufeature.h> | 4 | #include <asm/cpufeatures.h> |
5 | #include <asm/processor-flags.h> | 5 | #include <asm/processor-flags.h> |
6 | 6 | ||
7 | struct cpu_features { | 7 | struct cpu_features { |
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 637097e66a62..f72498dc90d2 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c | |||
@@ -17,7 +17,7 @@ | |||
17 | 17 | ||
18 | #include "../include/asm/required-features.h" | 18 | #include "../include/asm/required-features.h" |
19 | #include "../include/asm/disabled-features.h" | 19 | #include "../include/asm/disabled-features.h" |
20 | #include "../include/asm/cpufeature.h" | 20 | #include "../include/asm/cpufeatures.h" |
21 | #include "../kernel/cpu/capflags.c" | 21 | #include "../kernel/cpu/capflags.c" |
22 | 22 | ||
23 | int main(void) | 23 | int main(void) |
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index a7661c430cd9..0702d2531bc7 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c | |||
@@ -49,7 +49,6 @@ typedef unsigned int u32; | |||
49 | 49 | ||
50 | /* This must be large enough to hold the entire setup */ | 50 | /* This must be large enough to hold the entire setup */ |
51 | u8 buf[SETUP_SECT_MAX*512]; | 51 | u8 buf[SETUP_SECT_MAX*512]; |
52 | int is_big_kernel; | ||
53 | 52 | ||
54 | #define PECOFF_RELOC_RESERVE 0x20 | 53 | #define PECOFF_RELOC_RESERVE 0x20 |
55 | 54 | ||
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 028be48c8839..e25a1630320c 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -288,7 +288,7 @@ CONFIG_NLS_ISO8859_1=y | |||
288 | CONFIG_NLS_UTF8=y | 288 | CONFIG_NLS_UTF8=y |
289 | CONFIG_PRINTK_TIME=y | 289 | CONFIG_PRINTK_TIME=y |
290 | # CONFIG_ENABLE_WARN_DEPRECATED is not set | 290 | # CONFIG_ENABLE_WARN_DEPRECATED is not set |
291 | CONFIG_FRAME_WARN=2048 | 291 | CONFIG_FRAME_WARN=1024 |
292 | CONFIG_MAGIC_SYSRQ=y | 292 | CONFIG_MAGIC_SYSRQ=y |
293 | # CONFIG_UNUSED_SYMBOLS is not set | 293 | # CONFIG_UNUSED_SYMBOLS is not set |
294 | CONFIG_DEBUG_KERNEL=y | 294 | CONFIG_DEBUG_KERNEL=y |
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 07d2c6c86a54..27226df3f7d8 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c | |||
@@ -33,7 +33,7 @@ | |||
33 | #include <linux/crc32.h> | 33 | #include <linux/crc32.h> |
34 | #include <crypto/internal/hash.h> | 34 | #include <crypto/internal/hash.h> |
35 | 35 | ||
36 | #include <asm/cpufeature.h> | 36 | #include <asm/cpufeatures.h> |
37 | #include <asm/cpu_device_id.h> | 37 | #include <asm/cpu_device_id.h> |
38 | #include <asm/fpu/api.h> | 38 | #include <asm/fpu/api.h> |
39 | 39 | ||
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 0e9871693f24..0857b1a1de3b 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c | |||
@@ -30,7 +30,7 @@ | |||
30 | #include <linux/kernel.h> | 30 | #include <linux/kernel.h> |
31 | #include <crypto/internal/hash.h> | 31 | #include <crypto/internal/hash.h> |
32 | 32 | ||
33 | #include <asm/cpufeature.h> | 33 | #include <asm/cpufeatures.h> |
34 | #include <asm/cpu_device_id.h> | 34 | #include <asm/cpu_device_id.h> |
35 | #include <asm/fpu/internal.h> | 35 | #include <asm/fpu/internal.h> |
36 | 36 | ||
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index a3fcfc97a311..cd4df9322501 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c | |||
@@ -30,7 +30,7 @@ | |||
30 | #include <linux/string.h> | 30 | #include <linux/string.h> |
31 | #include <linux/kernel.h> | 31 | #include <linux/kernel.h> |
32 | #include <asm/fpu/api.h> | 32 | #include <asm/fpu/api.h> |
33 | #include <asm/cpufeature.h> | 33 | #include <asm/cpufeatures.h> |
34 | #include <asm/cpu_device_id.h> | 34 | #include <asm/cpu_device_id.h> |
35 | 35 | ||
36 | asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, | 36 | asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, |
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index e32206e09868..9a9e5884066c 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h | |||
@@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with | |||
201 | .byte 0xf1 | 201 | .byte 0xf1 |
202 | .endm | 202 | .endm |
203 | 203 | ||
204 | #else /* CONFIG_X86_64 */ | ||
205 | |||
206 | /* | ||
207 | * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These | ||
208 | * are different from the entry_32.S versions in not changing the segment | ||
209 | * registers. So only suitable for in kernel use, not when transitioning | ||
210 | * from or to user space. The resulting stack frame is not a standard | ||
211 | * pt_regs frame. The main use case is calling C code from assembler | ||
212 | * when all the registers need to be preserved. | ||
213 | */ | ||
214 | |||
215 | .macro SAVE_ALL | ||
216 | pushl %eax | ||
217 | pushl %ebp | ||
218 | pushl %edi | ||
219 | pushl %esi | ||
220 | pushl %edx | ||
221 | pushl %ecx | ||
222 | pushl %ebx | ||
223 | .endm | ||
224 | |||
225 | .macro RESTORE_ALL | ||
226 | popl %ebx | ||
227 | popl %ecx | ||
228 | popl %edx | ||
229 | popl %esi | ||
230 | popl %edi | ||
231 | popl %ebp | ||
232 | popl %eax | ||
233 | .endm | ||
234 | |||
235 | #endif /* CONFIG_X86_64 */ | 204 | #endif /* CONFIG_X86_64 */ |
236 | 205 | ||
237 | /* | 206 | /* |
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 03663740c866..e79d93d44ecd 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/traps.h> | 26 | #include <asm/traps.h> |
27 | #include <asm/vdso.h> | 27 | #include <asm/vdso.h> |
28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
29 | #include <asm/cpufeature.h> | ||
29 | 30 | ||
30 | #define CREATE_TRACE_POINTS | 31 | #define CREATE_TRACE_POINTS |
31 | #include <trace/events/syscalls.h> | 32 | #include <trace/events/syscalls.h> |
@@ -44,6 +45,8 @@ __visible void enter_from_user_mode(void) | |||
44 | CT_WARN_ON(ct_state() != CONTEXT_USER); | 45 | CT_WARN_ON(ct_state() != CONTEXT_USER); |
45 | user_exit(); | 46 | user_exit(); |
46 | } | 47 | } |
48 | #else | ||
49 | static inline void enter_from_user_mode(void) {} | ||
47 | #endif | 50 | #endif |
48 | 51 | ||
49 | static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) | 52 | static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) |
@@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) | |||
84 | 87 | ||
85 | work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; | 88 | work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; |
86 | 89 | ||
87 | #ifdef CONFIG_CONTEXT_TRACKING | ||
88 | /* | ||
89 | * If TIF_NOHZ is set, we are required to call user_exit() before | ||
90 | * doing anything that could touch RCU. | ||
91 | */ | ||
92 | if (work & _TIF_NOHZ) { | ||
93 | enter_from_user_mode(); | ||
94 | work &= ~_TIF_NOHZ; | ||
95 | } | ||
96 | #endif | ||
97 | |||
98 | #ifdef CONFIG_SECCOMP | 90 | #ifdef CONFIG_SECCOMP |
99 | /* | 91 | /* |
100 | * Do seccomp first -- it should minimize exposure of other | 92 | * Do seccomp first -- it should minimize exposure of other |
@@ -171,16 +163,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, | |||
171 | if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) | 163 | if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) |
172 | BUG_ON(regs != task_pt_regs(current)); | 164 | BUG_ON(regs != task_pt_regs(current)); |
173 | 165 | ||
174 | /* | ||
175 | * If we stepped into a sysenter/syscall insn, it trapped in | ||
176 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | ||
177 | * If user-mode had set TF itself, then it's still clear from | ||
178 | * do_debug() and we need to set it again to restore the user | ||
179 | * state. If we entered on the slow path, TF was already set. | ||
180 | */ | ||
181 | if (work & _TIF_SINGLESTEP) | ||
182 | regs->flags |= X86_EFLAGS_TF; | ||
183 | |||
184 | #ifdef CONFIG_SECCOMP | 166 | #ifdef CONFIG_SECCOMP |
185 | /* | 167 | /* |
186 | * Call seccomp_phase2 before running the other hooks so that | 168 | * Call seccomp_phase2 before running the other hooks so that |
@@ -268,6 +250,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |||
268 | /* Called with IRQs disabled. */ | 250 | /* Called with IRQs disabled. */ |
269 | __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) | 251 | __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) |
270 | { | 252 | { |
253 | struct thread_info *ti = pt_regs_to_thread_info(regs); | ||
271 | u32 cached_flags; | 254 | u32 cached_flags; |
272 | 255 | ||
273 | if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) | 256 | if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) |
@@ -275,12 +258,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) | |||
275 | 258 | ||
276 | lockdep_sys_exit(); | 259 | lockdep_sys_exit(); |
277 | 260 | ||
278 | cached_flags = | 261 | cached_flags = READ_ONCE(ti->flags); |
279 | READ_ONCE(pt_regs_to_thread_info(regs)->flags); | ||
280 | 262 | ||
281 | if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) | 263 | if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) |
282 | exit_to_usermode_loop(regs, cached_flags); | 264 | exit_to_usermode_loop(regs, cached_flags); |
283 | 265 | ||
266 | #ifdef CONFIG_COMPAT | ||
267 | /* | ||
268 | * Compat syscalls set TS_COMPAT. Make sure we clear it before | ||
269 | * returning to user mode. We need to clear it *after* signal | ||
270 | * handling, because syscall restart has a fixup for compat | ||
271 | * syscalls. The fixup is exercised by the ptrace_syscall_32 | ||
272 | * selftest. | ||
273 | */ | ||
274 | ti->status &= ~TS_COMPAT; | ||
275 | #endif | ||
276 | |||
284 | user_enter(); | 277 | user_enter(); |
285 | } | 278 | } |
286 | 279 | ||
@@ -332,33 +325,45 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) | |||
332 | if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS)) | 325 | if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS)) |
333 | syscall_slow_exit_work(regs, cached_flags); | 326 | syscall_slow_exit_work(regs, cached_flags); |
334 | 327 | ||
335 | #ifdef CONFIG_COMPAT | 328 | local_irq_disable(); |
329 | prepare_exit_to_usermode(regs); | ||
330 | } | ||
331 | |||
332 | #ifdef CONFIG_X86_64 | ||
333 | __visible void do_syscall_64(struct pt_regs *regs) | ||
334 | { | ||
335 | struct thread_info *ti = pt_regs_to_thread_info(regs); | ||
336 | unsigned long nr = regs->orig_ax; | ||
337 | |||
338 | enter_from_user_mode(); | ||
339 | local_irq_enable(); | ||
340 | |||
341 | if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) | ||
342 | nr = syscall_trace_enter(regs); | ||
343 | |||
336 | /* | 344 | /* |
337 | * Compat syscalls set TS_COMPAT. Make sure we clear it before | 345 | * NB: Native and x32 syscalls are dispatched from the same |
338 | * returning to user mode. | 346 | * table. The only functional difference is the x32 bit in |
347 | * regs->orig_ax, which changes the behavior of some syscalls. | ||
339 | */ | 348 | */ |
340 | ti->status &= ~TS_COMPAT; | 349 | if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { |
341 | #endif | 350 | regs->ax = sys_call_table[nr & __SYSCALL_MASK]( |
351 | regs->di, regs->si, regs->dx, | ||
352 | regs->r10, regs->r8, regs->r9); | ||
353 | } | ||
342 | 354 | ||
343 | local_irq_disable(); | 355 | syscall_return_slowpath(regs); |
344 | prepare_exit_to_usermode(regs); | ||
345 | } | 356 | } |
357 | #endif | ||
346 | 358 | ||
347 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) | 359 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
348 | /* | 360 | /* |
349 | * Does a 32-bit syscall. Called with IRQs on and does all entry and | 361 | * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does |
350 | * exit work and returns with IRQs off. This function is extremely hot | 362 | * all entry and exit work and returns with IRQs off. This function is |
351 | * in workloads that use it, and it's usually called from | 363 | * extremely hot in workloads that use it, and it's usually called from |
352 | * do_fast_syscall_32, so forcibly inline it to improve performance. | 364 | * do_fast_syscall_32, so forcibly inline it to improve performance. |
353 | */ | 365 | */ |
354 | #ifdef CONFIG_X86_32 | 366 | static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) |
355 | /* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */ | ||
356 | __visible | ||
357 | #else | ||
358 | /* 64-bit kernels use do_syscall_32_irqs_off() instead. */ | ||
359 | static | ||
360 | #endif | ||
361 | __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) | ||
362 | { | 367 | { |
363 | struct thread_info *ti = pt_regs_to_thread_info(regs); | 368 | struct thread_info *ti = pt_regs_to_thread_info(regs); |
364 | unsigned int nr = (unsigned int)regs->orig_ax; | 369 | unsigned int nr = (unsigned int)regs->orig_ax; |
@@ -393,14 +398,13 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) | |||
393 | syscall_return_slowpath(regs); | 398 | syscall_return_slowpath(regs); |
394 | } | 399 | } |
395 | 400 | ||
396 | #ifdef CONFIG_X86_64 | 401 | /* Handles int $0x80 */ |
397 | /* Handles INT80 on 64-bit kernels */ | 402 | __visible void do_int80_syscall_32(struct pt_regs *regs) |
398 | __visible void do_syscall_32_irqs_off(struct pt_regs *regs) | ||
399 | { | 403 | { |
404 | enter_from_user_mode(); | ||
400 | local_irq_enable(); | 405 | local_irq_enable(); |
401 | do_syscall_32_irqs_on(regs); | 406 | do_syscall_32_irqs_on(regs); |
402 | } | 407 | } |
403 | #endif | ||
404 | 408 | ||
405 | /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ | 409 | /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ |
406 | __visible long do_fast_syscall_32(struct pt_regs *regs) | 410 | __visible long do_fast_syscall_32(struct pt_regs *regs) |
@@ -420,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) | |||
420 | */ | 424 | */ |
421 | regs->ip = landing_pad; | 425 | regs->ip = landing_pad; |
422 | 426 | ||
423 | /* | 427 | enter_from_user_mode(); |
424 | * Fetch EBP from where the vDSO stashed it. | 428 | |
425 | * | ||
426 | * WARNING: We are in CONTEXT_USER and RCU isn't paying attention! | ||
427 | */ | ||
428 | local_irq_enable(); | 429 | local_irq_enable(); |
430 | |||
431 | /* Fetch EBP from where the vDSO stashed it. */ | ||
429 | if ( | 432 | if ( |
430 | #ifdef CONFIG_X86_64 | 433 | #ifdef CONFIG_X86_64 |
431 | /* | 434 | /* |
@@ -443,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs) | |||
443 | /* User code screwed up. */ | 446 | /* User code screwed up. */ |
444 | local_irq_disable(); | 447 | local_irq_disable(); |
445 | regs->ax = -EFAULT; | 448 | regs->ax = -EFAULT; |
446 | #ifdef CONFIG_CONTEXT_TRACKING | ||
447 | enter_from_user_mode(); | ||
448 | #endif | ||
449 | prepare_exit_to_usermode(regs); | 449 | prepare_exit_to_usermode(regs); |
450 | return 0; /* Keep it simple: use IRET. */ | 450 | return 0; /* Keep it simple: use IRET. */ |
451 | } | 451 | } |
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bb3e376d0f33..10868aa734dc 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S | |||
@@ -40,7 +40,7 @@ | |||
40 | #include <asm/processor-flags.h> | 40 | #include <asm/processor-flags.h> |
41 | #include <asm/ftrace.h> | 41 | #include <asm/ftrace.h> |
42 | #include <asm/irq_vectors.h> | 42 | #include <asm/irq_vectors.h> |
43 | #include <asm/cpufeature.h> | 43 | #include <asm/cpufeatures.h> |
44 | #include <asm/alternative-asm.h> | 44 | #include <asm/alternative-asm.h> |
45 | #include <asm/asm.h> | 45 | #include <asm/asm.h> |
46 | #include <asm/smap.h> | 46 | #include <asm/smap.h> |
@@ -287,14 +287,64 @@ need_resched: | |||
287 | END(resume_kernel) | 287 | END(resume_kernel) |
288 | #endif | 288 | #endif |
289 | 289 | ||
290 | # SYSENTER call handler stub | 290 | GLOBAL(__begin_SYSENTER_singlestep_region) |
291 | /* | ||
292 | * All code from here through __end_SYSENTER_singlestep_region is subject | ||
293 | * to being single-stepped if a user program sets TF and executes SYSENTER. | ||
294 | * There is absolutely nothing that we can do to prevent this from happening | ||
295 | * (thanks Intel!). To keep our handling of this situation as simple as | ||
296 | * possible, we handle TF just like AC and NT, except that our #DB handler | ||
297 | * will ignore all of the single-step traps generated in this range. | ||
298 | */ | ||
299 | |||
300 | #ifdef CONFIG_XEN | ||
301 | /* | ||
302 | * Xen doesn't set %esp to be precisely what the normal SYSENTER | ||
303 | * entry point expects, so fix it up before using the normal path. | ||
304 | */ | ||
305 | ENTRY(xen_sysenter_target) | ||
306 | addl $5*4, %esp /* remove xen-provided frame */ | ||
307 | jmp sysenter_past_esp | ||
308 | #endif | ||
309 | |||
310 | /* | ||
311 | * 32-bit SYSENTER entry. | ||
312 | * | ||
313 | * 32-bit system calls through the vDSO's __kernel_vsyscall enter here | ||
314 | * if X86_FEATURE_SEP is available. This is the preferred system call | ||
315 | * entry on 32-bit systems. | ||
316 | * | ||
317 | * The SYSENTER instruction, in principle, should *only* occur in the | ||
318 | * vDSO. In practice, a small number of Android devices were shipped | ||
319 | * with a copy of Bionic that inlined a SYSENTER instruction. This | ||
320 | * never happened in any of Google's Bionic versions -- it only happened | ||
321 | * in a narrow range of Intel-provided versions. | ||
322 | * | ||
323 | * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs. | ||
324 | * IF and VM in RFLAGS are cleared (IOW: interrupts are off). | ||
325 | * SYSENTER does not save anything on the stack, | ||
326 | * and does not save old EIP (!!!), ESP, or EFLAGS. | ||
327 | * | ||
328 | * To avoid losing track of EFLAGS.VM (and thus potentially corrupting | ||
329 | * user and/or vm86 state), we explicitly disable the SYSENTER | ||
330 | * instruction in vm86 mode by reprogramming the MSRs. | ||
331 | * | ||
332 | * Arguments: | ||
333 | * eax system call number | ||
334 | * ebx arg1 | ||
335 | * ecx arg2 | ||
336 | * edx arg3 | ||
337 | * esi arg4 | ||
338 | * edi arg5 | ||
339 | * ebp user stack | ||
340 | * 0(%ebp) arg6 | ||
341 | */ | ||
291 | ENTRY(entry_SYSENTER_32) | 342 | ENTRY(entry_SYSENTER_32) |
292 | movl TSS_sysenter_sp0(%esp), %esp | 343 | movl TSS_sysenter_sp0(%esp), %esp |
293 | sysenter_past_esp: | 344 | sysenter_past_esp: |
294 | pushl $__USER_DS /* pt_regs->ss */ | 345 | pushl $__USER_DS /* pt_regs->ss */ |
295 | pushl %ebp /* pt_regs->sp (stashed in bp) */ | 346 | pushl %ebp /* pt_regs->sp (stashed in bp) */ |
296 | pushfl /* pt_regs->flags (except IF = 0) */ | 347 | pushfl /* pt_regs->flags (except IF = 0) */ |
297 | ASM_CLAC /* Clear AC after saving FLAGS */ | ||
298 | orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ | 348 | orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ |
299 | pushl $__USER_CS /* pt_regs->cs */ | 349 | pushl $__USER_CS /* pt_regs->cs */ |
300 | pushl $0 /* pt_regs->ip = 0 (placeholder) */ | 350 | pushl $0 /* pt_regs->ip = 0 (placeholder) */ |
@@ -302,6 +352,29 @@ sysenter_past_esp: | |||
302 | SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ | 352 | SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ |
303 | 353 | ||
304 | /* | 354 | /* |
355 | * SYSENTER doesn't filter flags, so we need to clear NT, AC | ||
356 | * and TF ourselves. To save a few cycles, we can check whether | ||
357 | * either was set instead of doing an unconditional popfq. | ||
358 | * This needs to happen before enabling interrupts so that | ||
359 | * we don't get preempted with NT set. | ||
360 | * | ||
361 | * If TF is set, we will single-step all the way to here -- do_debug | ||
362 | * will ignore all the traps. (Yes, this is slow, but so is | ||
363 | * single-stepping in general. This allows us to avoid having | ||
364 | * a more complicated code to handle the case where a user program | ||
365 | * forces us to single-step through the SYSENTER entry code.) | ||
366 | * | ||
367 | * NB.: .Lsysenter_fix_flags is a label with the code under it moved | ||
368 | * out-of-line as an optimization: NT is unlikely to be set in the | ||
369 | * majority of the cases and instead of polluting the I$ unnecessarily, | ||
370 | * we're keeping that code behind a branch which will predict as | ||
371 | * not-taken and therefore its instructions won't be fetched. | ||
372 | */ | ||
373 | testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp) | ||
374 | jnz .Lsysenter_fix_flags | ||
375 | .Lsysenter_flags_fixed: | ||
376 | |||
377 | /* | ||
305 | * User mode is traced as though IRQs are on, and SYSENTER | 378 | * User mode is traced as though IRQs are on, and SYSENTER |
306 | * turned them off. | 379 | * turned them off. |
307 | */ | 380 | */ |
@@ -327,6 +400,15 @@ sysenter_past_esp: | |||
327 | popl %eax /* pt_regs->ax */ | 400 | popl %eax /* pt_regs->ax */ |
328 | 401 | ||
329 | /* | 402 | /* |
403 | * Restore all flags except IF. (We restore IF separately because | ||
404 | * STI gives a one-instruction window in which we won't be interrupted, | ||
405 | * whereas POPF does not.) | ||
406 | */ | ||
407 | addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */ | ||
408 | btr $X86_EFLAGS_IF_BIT, (%esp) | ||
409 | popfl | ||
410 | |||
411 | /* | ||
330 | * Return back to the vDSO, which will pop ecx and edx. | 412 | * Return back to the vDSO, which will pop ecx and edx. |
331 | * Don't bother with DS and ES (they already contain __USER_DS). | 413 | * Don't bother with DS and ES (they already contain __USER_DS). |
332 | */ | 414 | */ |
@@ -339,28 +421,63 @@ sysenter_past_esp: | |||
339 | .popsection | 421 | .popsection |
340 | _ASM_EXTABLE(1b, 2b) | 422 | _ASM_EXTABLE(1b, 2b) |
341 | PTGS_TO_GS_EX | 423 | PTGS_TO_GS_EX |
424 | |||
425 | .Lsysenter_fix_flags: | ||
426 | pushl $X86_EFLAGS_FIXED | ||
427 | popfl | ||
428 | jmp .Lsysenter_flags_fixed | ||
429 | GLOBAL(__end_SYSENTER_singlestep_region) | ||
342 | ENDPROC(entry_SYSENTER_32) | 430 | ENDPROC(entry_SYSENTER_32) |
343 | 431 | ||
344 | # system call handler stub | 432 | /* |
433 | * 32-bit legacy system call entry. | ||
434 | * | ||
435 | * 32-bit x86 Linux system calls traditionally used the INT $0x80 | ||
436 | * instruction. INT $0x80 lands here. | ||
437 | * | ||
438 | * This entry point can be used by any 32-bit perform system calls. | ||
439 | * Instances of INT $0x80 can be found inline in various programs and | ||
440 | * libraries. It is also used by the vDSO's __kernel_vsyscall | ||
441 | * fallback for hardware that doesn't support a faster entry method. | ||
442 | * Restarted 32-bit system calls also fall back to INT $0x80 | ||
443 | * regardless of what instruction was originally used to do the system | ||
444 | * call. (64-bit programs can use INT $0x80 as well, but they can | ||
445 | * only run on 64-bit kernels and therefore land in | ||
446 | * entry_INT80_compat.) | ||
447 | * | ||
448 | * This is considered a slow path. It is not used by most libc | ||
449 | * implementations on modern hardware except during process startup. | ||
450 | * | ||
451 | * Arguments: | ||
452 | * eax system call number | ||
453 | * ebx arg1 | ||
454 | * ecx arg2 | ||
455 | * edx arg3 | ||
456 | * esi arg4 | ||
457 | * edi arg5 | ||
458 | * ebp arg6 | ||
459 | */ | ||
345 | ENTRY(entry_INT80_32) | 460 | ENTRY(entry_INT80_32) |
346 | ASM_CLAC | 461 | ASM_CLAC |
347 | pushl %eax /* pt_regs->orig_ax */ | 462 | pushl %eax /* pt_regs->orig_ax */ |
348 | SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ | 463 | SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ |
349 | 464 | ||
350 | /* | 465 | /* |
351 | * User mode is traced as though IRQs are on. Unlike the 64-bit | 466 | * User mode is traced as though IRQs are on, and the interrupt gate |
352 | * case, INT80 is a trap gate on 32-bit kernels, so interrupts | 467 | * turned them off. |
353 | * are already on (unless user code is messing around with iopl). | ||
354 | */ | 468 | */ |
469 | TRACE_IRQS_OFF | ||
355 | 470 | ||
356 | movl %esp, %eax | 471 | movl %esp, %eax |
357 | call do_syscall_32_irqs_on | 472 | call do_int80_syscall_32 |
358 | .Lsyscall_32_done: | 473 | .Lsyscall_32_done: |
359 | 474 | ||
360 | restore_all: | 475 | restore_all: |
361 | TRACE_IRQS_IRET | 476 | TRACE_IRQS_IRET |
362 | restore_all_notrace: | 477 | restore_all_notrace: |
363 | #ifdef CONFIG_X86_ESPFIX32 | 478 | #ifdef CONFIG_X86_ESPFIX32 |
479 | ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX | ||
480 | |||
364 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS | 481 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS |
365 | /* | 482 | /* |
366 | * Warning: PT_OLDSS(%esp) contains the wrong/random values if we | 483 | * Warning: PT_OLDSS(%esp) contains the wrong/random values if we |
@@ -387,19 +504,6 @@ ENTRY(iret_exc ) | |||
387 | 504 | ||
388 | #ifdef CONFIG_X86_ESPFIX32 | 505 | #ifdef CONFIG_X86_ESPFIX32 |
389 | ldt_ss: | 506 | ldt_ss: |
390 | #ifdef CONFIG_PARAVIRT | ||
391 | /* | ||
392 | * The kernel can't run on a non-flat stack if paravirt mode | ||
393 | * is active. Rather than try to fixup the high bits of | ||
394 | * ESP, bypass this code entirely. This may break DOSemu | ||
395 | * and/or Wine support in a paravirt VM, although the option | ||
396 | * is still available to implement the setting of the high | ||
397 | * 16-bits in the INTERRUPT_RETURN paravirt-op. | ||
398 | */ | ||
399 | cmpl $0, pv_info+PARAVIRT_enabled | ||
400 | jne restore_nocheck | ||
401 | #endif | ||
402 | |||
403 | /* | 507 | /* |
404 | * Setup and switch to ESPFIX stack | 508 | * Setup and switch to ESPFIX stack |
405 | * | 509 | * |
@@ -632,14 +736,6 @@ ENTRY(spurious_interrupt_bug) | |||
632 | END(spurious_interrupt_bug) | 736 | END(spurious_interrupt_bug) |
633 | 737 | ||
634 | #ifdef CONFIG_XEN | 738 | #ifdef CONFIG_XEN |
635 | /* | ||
636 | * Xen doesn't set %esp to be precisely what the normal SYSENTER | ||
637 | * entry point expects, so fix it up before using the normal path. | ||
638 | */ | ||
639 | ENTRY(xen_sysenter_target) | ||
640 | addl $5*4, %esp /* remove xen-provided frame */ | ||
641 | jmp sysenter_past_esp | ||
642 | |||
643 | ENTRY(xen_hypervisor_callback) | 739 | ENTRY(xen_hypervisor_callback) |
644 | pushl $-1 /* orig_ax = -1 => not a system call */ | 740 | pushl $-1 /* orig_ax = -1 => not a system call */ |
645 | SAVE_ALL | 741 | SAVE_ALL |
@@ -939,51 +1035,48 @@ error_code: | |||
939 | jmp ret_from_exception | 1035 | jmp ret_from_exception |
940 | END(page_fault) | 1036 | END(page_fault) |
941 | 1037 | ||
942 | /* | ||
943 | * Debug traps and NMI can happen at the one SYSENTER instruction | ||
944 | * that sets up the real kernel stack. Check here, since we can't | ||
945 | * allow the wrong stack to be used. | ||
946 | * | ||
947 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have | ||
948 | * already pushed 3 words if it hits on the sysenter instruction: | ||
949 | * eflags, cs and eip. | ||
950 | * | ||
951 | * We just load the right stack, and push the three (known) values | ||
952 | * by hand onto the new stack - while updating the return eip past | ||
953 | * the instruction that would have done it for sysenter. | ||
954 | */ | ||
955 | .macro FIX_STACK offset ok label | ||
956 | cmpw $__KERNEL_CS, 4(%esp) | ||
957 | jne \ok | ||
958 | \label: | ||
959 | movl TSS_sysenter_sp0 + \offset(%esp), %esp | ||
960 | pushfl | ||
961 | pushl $__KERNEL_CS | ||
962 | pushl $sysenter_past_esp | ||
963 | .endm | ||
964 | |||
965 | ENTRY(debug) | 1038 | ENTRY(debug) |
1039 | /* | ||
1040 | * #DB can happen at the first instruction of | ||
1041 | * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this | ||
1042 | * happens, then we will be running on a very small stack. We | ||
1043 | * need to detect this condition and switch to the thread | ||
1044 | * stack before calling any C code at all. | ||
1045 | * | ||
1046 | * If you edit this code, keep in mind that NMIs can happen in here. | ||
1047 | */ | ||
966 | ASM_CLAC | 1048 | ASM_CLAC |
967 | cmpl $entry_SYSENTER_32, (%esp) | ||
968 | jne debug_stack_correct | ||
969 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn | ||
970 | debug_stack_correct: | ||
971 | pushl $-1 # mark this as an int | 1049 | pushl $-1 # mark this as an int |
972 | SAVE_ALL | 1050 | SAVE_ALL |
973 | TRACE_IRQS_OFF | ||
974 | xorl %edx, %edx # error code 0 | 1051 | xorl %edx, %edx # error code 0 |
975 | movl %esp, %eax # pt_regs pointer | 1052 | movl %esp, %eax # pt_regs pointer |
1053 | |||
1054 | /* Are we currently on the SYSENTER stack? */ | ||
1055 | PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) | ||
1056 | subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ | ||
1057 | cmpl $SIZEOF_SYSENTER_stack, %ecx | ||
1058 | jb .Ldebug_from_sysenter_stack | ||
1059 | |||
1060 | TRACE_IRQS_OFF | ||
1061 | call do_debug | ||
1062 | jmp ret_from_exception | ||
1063 | |||
1064 | .Ldebug_from_sysenter_stack: | ||
1065 | /* We're on the SYSENTER stack. Switch off. */ | ||
1066 | movl %esp, %ebp | ||
1067 | movl PER_CPU_VAR(cpu_current_top_of_stack), %esp | ||
1068 | TRACE_IRQS_OFF | ||
976 | call do_debug | 1069 | call do_debug |
1070 | movl %ebp, %esp | ||
977 | jmp ret_from_exception | 1071 | jmp ret_from_exception |
978 | END(debug) | 1072 | END(debug) |
979 | 1073 | ||
980 | /* | 1074 | /* |
981 | * NMI is doubly nasty. It can happen _while_ we're handling | 1075 | * NMI is doubly nasty. It can happen on the first instruction of |
982 | * a debug fault, and the debug fault hasn't yet been able to | 1076 | * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning |
983 | * clear up the stack. So we first check whether we got an | 1077 | * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32 |
984 | * NMI on the sysenter entry path, but after that we need to | 1078 | * switched stacks. We handle both conditions by simply checking whether we |
985 | * check whether we got an NMI on the debug path where the debug | 1079 | * interrupted kernel code running on the SYSENTER stack. |
986 | * fault happened on the sysenter path. | ||
987 | */ | 1080 | */ |
988 | ENTRY(nmi) | 1081 | ENTRY(nmi) |
989 | ASM_CLAC | 1082 | ASM_CLAC |
@@ -994,41 +1087,32 @@ ENTRY(nmi) | |||
994 | popl %eax | 1087 | popl %eax |
995 | je nmi_espfix_stack | 1088 | je nmi_espfix_stack |
996 | #endif | 1089 | #endif |
997 | cmpl $entry_SYSENTER_32, (%esp) | 1090 | |
998 | je nmi_stack_fixup | 1091 | pushl %eax # pt_regs->orig_ax |
999 | pushl %eax | ||
1000 | movl %esp, %eax | ||
1001 | /* | ||
1002 | * Do not access memory above the end of our stack page, | ||
1003 | * it might not exist. | ||
1004 | */ | ||
1005 | andl $(THREAD_SIZE-1), %eax | ||
1006 | cmpl $(THREAD_SIZE-20), %eax | ||
1007 | popl %eax | ||
1008 | jae nmi_stack_correct | ||
1009 | cmpl $entry_SYSENTER_32, 12(%esp) | ||
1010 | je nmi_debug_stack_check | ||
1011 | nmi_stack_correct: | ||
1012 | pushl %eax | ||
1013 | SAVE_ALL | 1092 | SAVE_ALL |
1014 | xorl %edx, %edx # zero error code | 1093 | xorl %edx, %edx # zero error code |
1015 | movl %esp, %eax # pt_regs pointer | 1094 | movl %esp, %eax # pt_regs pointer |
1095 | |||
1096 | /* Are we currently on the SYSENTER stack? */ | ||
1097 | PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) | ||
1098 | subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ | ||
1099 | cmpl $SIZEOF_SYSENTER_stack, %ecx | ||
1100 | jb .Lnmi_from_sysenter_stack | ||
1101 | |||
1102 | /* Not on SYSENTER stack. */ | ||
1016 | call do_nmi | 1103 | call do_nmi |
1017 | jmp restore_all_notrace | 1104 | jmp restore_all_notrace |
1018 | 1105 | ||
1019 | nmi_stack_fixup: | 1106 | .Lnmi_from_sysenter_stack: |
1020 | FIX_STACK 12, nmi_stack_correct, 1 | 1107 | /* |
1021 | jmp nmi_stack_correct | 1108 | * We're on the SYSENTER stack. Switch off. No one (not even debug) |
1022 | 1109 | * is using the thread stack right now, so it's safe for us to use it. | |
1023 | nmi_debug_stack_check: | 1110 | */ |
1024 | cmpw $__KERNEL_CS, 16(%esp) | 1111 | movl %esp, %ebp |
1025 | jne nmi_stack_correct | 1112 | movl PER_CPU_VAR(cpu_current_top_of_stack), %esp |
1026 | cmpl $debug, (%esp) | 1113 | call do_nmi |
1027 | jb nmi_stack_correct | 1114 | movl %ebp, %esp |
1028 | cmpl $debug_esp_fix_insn, (%esp) | 1115 | jmp restore_all_notrace |
1029 | ja nmi_stack_correct | ||
1030 | FIX_STACK 24, nmi_stack_correct, 1 | ||
1031 | jmp nmi_stack_correct | ||
1032 | 1116 | ||
1033 | #ifdef CONFIG_X86_ESPFIX32 | 1117 | #ifdef CONFIG_X86_ESPFIX32 |
1034 | nmi_espfix_stack: | 1118 | nmi_espfix_stack: |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9d34d3cfceb6..858b555e274b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -103,6 +103,16 @@ ENDPROC(native_usergs_sysret64) | |||
103 | /* | 103 | /* |
104 | * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers. | 104 | * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers. |
105 | * | 105 | * |
106 | * This is the only entry point used for 64-bit system calls. The | ||
107 | * hardware interface is reasonably well designed and the register to | ||
108 | * argument mapping Linux uses fits well with the registers that are | ||
109 | * available when SYSCALL is used. | ||
110 | * | ||
111 | * SYSCALL instructions can be found inlined in libc implementations as | ||
112 | * well as some other programs and libraries. There are also a handful | ||
113 | * of SYSCALL instructions in the vDSO used, for example, as a | ||
114 | * clock_gettimeofday fallback. | ||
115 | * | ||
106 | * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, | 116 | * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, |
107 | * then loads new ss, cs, and rip from previously programmed MSRs. | 117 | * then loads new ss, cs, and rip from previously programmed MSRs. |
108 | * rflags gets masked by a value from another MSR (so CLD and CLAC | 118 | * rflags gets masked by a value from another MSR (so CLD and CLAC |
@@ -145,17 +155,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) | |||
145 | movq %rsp, PER_CPU_VAR(rsp_scratch) | 155 | movq %rsp, PER_CPU_VAR(rsp_scratch) |
146 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | 156 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
147 | 157 | ||
158 | TRACE_IRQS_OFF | ||
159 | |||
148 | /* Construct struct pt_regs on stack */ | 160 | /* Construct struct pt_regs on stack */ |
149 | pushq $__USER_DS /* pt_regs->ss */ | 161 | pushq $__USER_DS /* pt_regs->ss */ |
150 | pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ | 162 | pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ |
151 | /* | ||
152 | * Re-enable interrupts. | ||
153 | * We use 'rsp_scratch' as a scratch space, hence irq-off block above | ||
154 | * must execute atomically in the face of possible interrupt-driven | ||
155 | * task preemption. We must enable interrupts only after we're done | ||
156 | * with using rsp_scratch: | ||
157 | */ | ||
158 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
159 | pushq %r11 /* pt_regs->flags */ | 163 | pushq %r11 /* pt_regs->flags */ |
160 | pushq $__USER_CS /* pt_regs->cs */ | 164 | pushq $__USER_CS /* pt_regs->cs */ |
161 | pushq %rcx /* pt_regs->ip */ | 165 | pushq %rcx /* pt_regs->ip */ |
@@ -171,9 +175,21 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) | |||
171 | pushq %r11 /* pt_regs->r11 */ | 175 | pushq %r11 /* pt_regs->r11 */ |
172 | sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ | 176 | sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ |
173 | 177 | ||
174 | testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) | 178 | /* |
175 | jnz tracesys | 179 | * If we need to do entry work or if we guess we'll need to do |
180 | * exit work, go straight to the slow path. | ||
181 | */ | ||
182 | testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) | ||
183 | jnz entry_SYSCALL64_slow_path | ||
184 | |||
176 | entry_SYSCALL_64_fastpath: | 185 | entry_SYSCALL_64_fastpath: |
186 | /* | ||
187 | * Easy case: enable interrupts and issue the syscall. If the syscall | ||
188 | * needs pt_regs, we'll call a stub that disables interrupts again | ||
189 | * and jumps to the slow path. | ||
190 | */ | ||
191 | TRACE_IRQS_ON | ||
192 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
177 | #if __SYSCALL_MASK == ~0 | 193 | #if __SYSCALL_MASK == ~0 |
178 | cmpq $__NR_syscall_max, %rax | 194 | cmpq $__NR_syscall_max, %rax |
179 | #else | 195 | #else |
@@ -182,103 +198,56 @@ entry_SYSCALL_64_fastpath: | |||
182 | #endif | 198 | #endif |
183 | ja 1f /* return -ENOSYS (already in pt_regs->ax) */ | 199 | ja 1f /* return -ENOSYS (already in pt_regs->ax) */ |
184 | movq %r10, %rcx | 200 | movq %r10, %rcx |
201 | |||
202 | /* | ||
203 | * This call instruction is handled specially in stub_ptregs_64. | ||
204 | * It might end up jumping to the slow path. If it jumps, RAX | ||
205 | * and all argument registers are clobbered. | ||
206 | */ | ||
185 | call *sys_call_table(, %rax, 8) | 207 | call *sys_call_table(, %rax, 8) |
208 | .Lentry_SYSCALL_64_after_fastpath_call: | ||
209 | |||
186 | movq %rax, RAX(%rsp) | 210 | movq %rax, RAX(%rsp) |
187 | 1: | 211 | 1: |
188 | /* | ||
189 | * Syscall return path ending with SYSRET (fast path). | ||
190 | * Has incompletely filled pt_regs. | ||
191 | */ | ||
192 | LOCKDEP_SYS_EXIT | ||
193 | /* | ||
194 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, | ||
195 | * it is too small to ever cause noticeable irq latency. | ||
196 | */ | ||
197 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
198 | 212 | ||
199 | /* | 213 | /* |
200 | * We must check ti flags with interrupts (or at least preemption) | 214 | * If we get here, then we know that pt_regs is clean for SYSRET64. |
201 | * off because we must *never* return to userspace without | 215 | * If we see that no exit work is required (which we are required |
202 | * processing exit work that is enqueued if we're preempted here. | 216 | * to check with IRQs off), then we can go straight to SYSRET64. |
203 | * In particular, returning to userspace with any of the one-shot | ||
204 | * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is | ||
205 | * very bad. | ||
206 | */ | 217 | */ |
218 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
219 | TRACE_IRQS_OFF | ||
207 | testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) | 220 | testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
208 | jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ | 221 | jnz 1f |
209 | 222 | ||
210 | RESTORE_C_REGS_EXCEPT_RCX_R11 | 223 | LOCKDEP_SYS_EXIT |
224 | TRACE_IRQS_ON /* user mode is traced as IRQs on */ | ||
211 | movq RIP(%rsp), %rcx | 225 | movq RIP(%rsp), %rcx |
212 | movq EFLAGS(%rsp), %r11 | 226 | movq EFLAGS(%rsp), %r11 |
227 | RESTORE_C_REGS_EXCEPT_RCX_R11 | ||
213 | movq RSP(%rsp), %rsp | 228 | movq RSP(%rsp), %rsp |
214 | /* | ||
215 | * 64-bit SYSRET restores rip from rcx, | ||
216 | * rflags from r11 (but RF and VM bits are forced to 0), | ||
217 | * cs and ss are loaded from MSRs. | ||
218 | * Restoration of rflags re-enables interrupts. | ||
219 | * | ||
220 | * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss | ||
221 | * descriptor is not reinitialized. This means that we should | ||
222 | * avoid SYSRET with SS == NULL, which could happen if we schedule, | ||
223 | * exit the kernel, and re-enter using an interrupt vector. (All | ||
224 | * interrupt entries on x86_64 set SS to NULL.) We prevent that | ||
225 | * from happening by reloading SS in __switch_to. (Actually | ||
226 | * detecting the failure in 64-bit userspace is tricky but can be | ||
227 | * done.) | ||
228 | */ | ||
229 | USERGS_SYSRET64 | 229 | USERGS_SYSRET64 |
230 | 230 | ||
231 | GLOBAL(int_ret_from_sys_call_irqs_off) | 231 | 1: |
232 | /* | ||
233 | * The fast path looked good when we started, but something changed | ||
234 | * along the way and we need to switch to the slow path. Calling | ||
235 | * raise(3) will trigger this, for example. IRQs are off. | ||
236 | */ | ||
232 | TRACE_IRQS_ON | 237 | TRACE_IRQS_ON |
233 | ENABLE_INTERRUPTS(CLBR_NONE) | 238 | ENABLE_INTERRUPTS(CLBR_NONE) |
234 | jmp int_ret_from_sys_call | ||
235 | |||
236 | /* Do syscall entry tracing */ | ||
237 | tracesys: | ||
238 | movq %rsp, %rdi | ||
239 | movl $AUDIT_ARCH_X86_64, %esi | ||
240 | call syscall_trace_enter_phase1 | ||
241 | test %rax, %rax | ||
242 | jnz tracesys_phase2 /* if needed, run the slow path */ | ||
243 | RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ | ||
244 | movq ORIG_RAX(%rsp), %rax | ||
245 | jmp entry_SYSCALL_64_fastpath /* and return to the fast path */ | ||
246 | |||
247 | tracesys_phase2: | ||
248 | SAVE_EXTRA_REGS | 239 | SAVE_EXTRA_REGS |
249 | movq %rsp, %rdi | 240 | movq %rsp, %rdi |
250 | movl $AUDIT_ARCH_X86_64, %esi | 241 | call syscall_return_slowpath /* returns with IRQs disabled */ |
251 | movq %rax, %rdx | 242 | jmp return_from_SYSCALL_64 |
252 | call syscall_trace_enter_phase2 | ||
253 | |||
254 | /* | ||
255 | * Reload registers from stack in case ptrace changed them. | ||
256 | * We don't reload %rax because syscall_trace_entry_phase2() returned | ||
257 | * the value it wants us to use in the table lookup. | ||
258 | */ | ||
259 | RESTORE_C_REGS_EXCEPT_RAX | ||
260 | RESTORE_EXTRA_REGS | ||
261 | #if __SYSCALL_MASK == ~0 | ||
262 | cmpq $__NR_syscall_max, %rax | ||
263 | #else | ||
264 | andl $__SYSCALL_MASK, %eax | ||
265 | cmpl $__NR_syscall_max, %eax | ||
266 | #endif | ||
267 | ja 1f /* return -ENOSYS (already in pt_regs->ax) */ | ||
268 | movq %r10, %rcx /* fixup for C */ | ||
269 | call *sys_call_table(, %rax, 8) | ||
270 | movq %rax, RAX(%rsp) | ||
271 | 1: | ||
272 | /* Use IRET because user could have changed pt_regs->foo */ | ||
273 | 243 | ||
274 | /* | 244 | entry_SYSCALL64_slow_path: |
275 | * Syscall return path ending with IRET. | 245 | /* IRQs are off. */ |
276 | * Has correct iret frame. | ||
277 | */ | ||
278 | GLOBAL(int_ret_from_sys_call) | ||
279 | SAVE_EXTRA_REGS | 246 | SAVE_EXTRA_REGS |
280 | movq %rsp, %rdi | 247 | movq %rsp, %rdi |
281 | call syscall_return_slowpath /* returns with IRQs disabled */ | 248 | call do_syscall_64 /* returns with IRQs disabled */ |
249 | |||
250 | return_from_SYSCALL_64: | ||
282 | RESTORE_EXTRA_REGS | 251 | RESTORE_EXTRA_REGS |
283 | TRACE_IRQS_IRETQ /* we're about to change IF */ | 252 | TRACE_IRQS_IRETQ /* we're about to change IF */ |
284 | 253 | ||
@@ -355,83 +324,45 @@ opportunistic_sysret_failed: | |||
355 | jmp restore_c_regs_and_iret | 324 | jmp restore_c_regs_and_iret |
356 | END(entry_SYSCALL_64) | 325 | END(entry_SYSCALL_64) |
357 | 326 | ||
327 | ENTRY(stub_ptregs_64) | ||
328 | /* | ||
329 | * Syscalls marked as needing ptregs land here. | ||
330 | * If we are on the fast path, we need to save the extra regs, | ||
331 | * which we achieve by trying again on the slow path. If we are on | ||
332 | * the slow path, the extra regs are already saved. | ||
333 | * | ||
334 | * RAX stores a pointer to the C function implementing the syscall. | ||
335 | * IRQs are on. | ||
336 | */ | ||
337 | cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) | ||
338 | jne 1f | ||
358 | 339 | ||
359 | .macro FORK_LIKE func | ||
360 | ENTRY(stub_\func) | ||
361 | SAVE_EXTRA_REGS 8 | ||
362 | jmp sys_\func | ||
363 | END(stub_\func) | ||
364 | .endm | ||
365 | |||
366 | FORK_LIKE clone | ||
367 | FORK_LIKE fork | ||
368 | FORK_LIKE vfork | ||
369 | |||
370 | ENTRY(stub_execve) | ||
371 | call sys_execve | ||
372 | return_from_execve: | ||
373 | testl %eax, %eax | ||
374 | jz 1f | ||
375 | /* exec failed, can use fast SYSRET code path in this case */ | ||
376 | ret | ||
377 | 1: | ||
378 | /* must use IRET code path (pt_regs->cs may have changed) */ | ||
379 | addq $8, %rsp | ||
380 | ZERO_EXTRA_REGS | ||
381 | movq %rax, RAX(%rsp) | ||
382 | jmp int_ret_from_sys_call | ||
383 | END(stub_execve) | ||
384 | /* | ||
385 | * Remaining execve stubs are only 7 bytes long. | ||
386 | * ENTRY() often aligns to 16 bytes, which in this case has no benefits. | ||
387 | */ | ||
388 | .align 8 | ||
389 | GLOBAL(stub_execveat) | ||
390 | call sys_execveat | ||
391 | jmp return_from_execve | ||
392 | END(stub_execveat) | ||
393 | |||
394 | #if defined(CONFIG_X86_X32_ABI) | ||
395 | .align 8 | ||
396 | GLOBAL(stub_x32_execve) | ||
397 | call compat_sys_execve | ||
398 | jmp return_from_execve | ||
399 | END(stub_x32_execve) | ||
400 | .align 8 | ||
401 | GLOBAL(stub_x32_execveat) | ||
402 | call compat_sys_execveat | ||
403 | jmp return_from_execve | ||
404 | END(stub_x32_execveat) | ||
405 | #endif | ||
406 | |||
407 | /* | ||
408 | * sigreturn is special because it needs to restore all registers on return. | ||
409 | * This cannot be done with SYSRET, so use the IRET return path instead. | ||
410 | */ | ||
411 | ENTRY(stub_rt_sigreturn) | ||
412 | /* | 340 | /* |
413 | * SAVE_EXTRA_REGS result is not normally needed: | 341 | * Called from fast path -- disable IRQs again, pop return address |
414 | * sigreturn overwrites all pt_regs->GPREGS. | 342 | * and jump to slow path |
415 | * But sigreturn can fail (!), and there is no easy way to detect that. | ||
416 | * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error, | ||
417 | * we SAVE_EXTRA_REGS here. | ||
418 | */ | 343 | */ |
419 | SAVE_EXTRA_REGS 8 | 344 | DISABLE_INTERRUPTS(CLBR_NONE) |
420 | call sys_rt_sigreturn | 345 | TRACE_IRQS_OFF |
421 | return_from_stub: | 346 | popq %rax |
422 | addq $8, %rsp | 347 | jmp entry_SYSCALL64_slow_path |
423 | RESTORE_EXTRA_REGS | ||
424 | movq %rax, RAX(%rsp) | ||
425 | jmp int_ret_from_sys_call | ||
426 | END(stub_rt_sigreturn) | ||
427 | 348 | ||
428 | #ifdef CONFIG_X86_X32_ABI | 349 | 1: |
429 | ENTRY(stub_x32_rt_sigreturn) | 350 | /* Called from C */ |
430 | SAVE_EXTRA_REGS 8 | 351 | jmp *%rax /* called from C */ |
431 | call sys32_x32_rt_sigreturn | 352 | END(stub_ptregs_64) |
432 | jmp return_from_stub | 353 | |
433 | END(stub_x32_rt_sigreturn) | 354 | .macro ptregs_stub func |
434 | #endif | 355 | ENTRY(ptregs_\func) |
356 | leaq \func(%rip), %rax | ||
357 | jmp stub_ptregs_64 | ||
358 | END(ptregs_\func) | ||
359 | .endm | ||
360 | |||
361 | /* Instantiate ptregs_stub for each ptregs-using syscall */ | ||
362 | #define __SYSCALL_64_QUAL_(sym) | ||
363 | #define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym | ||
364 | #define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) | ||
365 | #include <asm/syscalls_64.h> | ||
435 | 366 | ||
436 | /* | 367 | /* |
437 | * A newly forked process directly context switches into this address. | 368 | * A newly forked process directly context switches into this address. |
@@ -439,7 +370,6 @@ END(stub_x32_rt_sigreturn) | |||
439 | * rdi: prev task we switched from | 370 | * rdi: prev task we switched from |
440 | */ | 371 | */ |
441 | ENTRY(ret_from_fork) | 372 | ENTRY(ret_from_fork) |
442 | |||
443 | LOCK ; btr $TIF_FORK, TI_flags(%r8) | 373 | LOCK ; btr $TIF_FORK, TI_flags(%r8) |
444 | 374 | ||
445 | pushq $0x0002 | 375 | pushq $0x0002 |
@@ -447,28 +377,32 @@ ENTRY(ret_from_fork) | |||
447 | 377 | ||
448 | call schedule_tail /* rdi: 'prev' task parameter */ | 378 | call schedule_tail /* rdi: 'prev' task parameter */ |
449 | 379 | ||
450 | RESTORE_EXTRA_REGS | ||
451 | |||
452 | testb $3, CS(%rsp) /* from kernel_thread? */ | 380 | testb $3, CS(%rsp) /* from kernel_thread? */ |
381 | jnz 1f | ||
453 | 382 | ||
454 | /* | 383 | /* |
455 | * By the time we get here, we have no idea whether our pt_regs, | 384 | * We came from kernel_thread. This code path is quite twisted, and |
456 | * ti flags, and ti status came from the 64-bit SYSCALL fast path, | 385 | * someone should clean it up. |
457 | * the slow path, or one of the 32-bit compat paths. | 386 | * |
458 | * Use IRET code path to return, since it can safely handle | 387 | * copy_thread_tls stashes the function pointer in RBX and the |
459 | * all of the above. | 388 | * parameter to be passed in RBP. The called function is permitted |
389 | * to call do_execve and thereby jump to user mode. | ||
460 | */ | 390 | */ |
461 | jnz int_ret_from_sys_call | 391 | movq RBP(%rsp), %rdi |
392 | call *RBX(%rsp) | ||
393 | movl $0, RAX(%rsp) | ||
462 | 394 | ||
463 | /* | 395 | /* |
464 | * We came from kernel_thread | 396 | * Fall through as though we're exiting a syscall. This makes a |
465 | * nb: we depend on RESTORE_EXTRA_REGS above | 397 | * twisted sort of sense if we just called do_execve. |
466 | */ | 398 | */ |
467 | movq %rbp, %rdi | 399 | |
468 | call *%rbx | 400 | 1: |
469 | movl $0, RAX(%rsp) | 401 | movq %rsp, %rdi |
470 | RESTORE_EXTRA_REGS | 402 | call syscall_return_slowpath /* returns with IRQs disabled */ |
471 | jmp int_ret_from_sys_call | 403 | TRACE_IRQS_ON /* user mode is traced as IRQS on */ |
404 | SWAPGS | ||
405 | jmp restore_regs_and_iret | ||
472 | END(ret_from_fork) | 406 | END(ret_from_fork) |
473 | 407 | ||
474 | /* | 408 | /* |
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 3c990eeee40b..847f2f0c31e5 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S | |||
@@ -19,12 +19,21 @@ | |||
19 | .section .entry.text, "ax" | 19 | .section .entry.text, "ax" |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * 32-bit SYSENTER instruction entry. | 22 | * 32-bit SYSENTER entry. |
23 | * | 23 | * |
24 | * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs. | 24 | * 32-bit system calls through the vDSO's __kernel_vsyscall enter here |
25 | * IF and VM in rflags are cleared (IOW: interrupts are off). | 25 | * on 64-bit kernels running on Intel CPUs. |
26 | * | ||
27 | * The SYSENTER instruction, in principle, should *only* occur in the | ||
28 | * vDSO. In practice, a small number of Android devices were shipped | ||
29 | * with a copy of Bionic that inlined a SYSENTER instruction. This | ||
30 | * never happened in any of Google's Bionic versions -- it only happened | ||
31 | * in a narrow range of Intel-provided versions. | ||
32 | * | ||
33 | * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs. | ||
34 | * IF and VM in RFLAGS are cleared (IOW: interrupts are off). | ||
26 | * SYSENTER does not save anything on the stack, | 35 | * SYSENTER does not save anything on the stack, |
27 | * and does not save old rip (!!!) and rflags. | 36 | * and does not save old RIP (!!!), RSP, or RFLAGS. |
28 | * | 37 | * |
29 | * Arguments: | 38 | * Arguments: |
30 | * eax system call number | 39 | * eax system call number |
@@ -35,10 +44,6 @@ | |||
35 | * edi arg5 | 44 | * edi arg5 |
36 | * ebp user stack | 45 | * ebp user stack |
37 | * 0(%ebp) arg6 | 46 | * 0(%ebp) arg6 |
38 | * | ||
39 | * This is purely a fast path. For anything complicated we use the int 0x80 | ||
40 | * path below. We set up a complete hardware stack frame to share code | ||
41 | * with the int 0x80 path. | ||
42 | */ | 47 | */ |
43 | ENTRY(entry_SYSENTER_compat) | 48 | ENTRY(entry_SYSENTER_compat) |
44 | /* Interrupts are off on entry. */ | 49 | /* Interrupts are off on entry. */ |
@@ -66,8 +71,6 @@ ENTRY(entry_SYSENTER_compat) | |||
66 | */ | 71 | */ |
67 | pushfq /* pt_regs->flags (except IF = 0) */ | 72 | pushfq /* pt_regs->flags (except IF = 0) */ |
68 | orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ | 73 | orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ |
69 | ASM_CLAC /* Clear AC after saving FLAGS */ | ||
70 | |||
71 | pushq $__USER32_CS /* pt_regs->cs */ | 74 | pushq $__USER32_CS /* pt_regs->cs */ |
72 | xorq %r8,%r8 | 75 | xorq %r8,%r8 |
73 | pushq %r8 /* pt_regs->ip = 0 (placeholder) */ | 76 | pushq %r8 /* pt_regs->ip = 0 (placeholder) */ |
@@ -90,19 +93,25 @@ ENTRY(entry_SYSENTER_compat) | |||
90 | cld | 93 | cld |
91 | 94 | ||
92 | /* | 95 | /* |
93 | * Sysenter doesn't filter flags, so we need to clear NT | 96 | * SYSENTER doesn't filter flags, so we need to clear NT and AC |
94 | * ourselves. To save a few cycles, we can check whether | 97 | * ourselves. To save a few cycles, we can check whether |
95 | * NT was set instead of doing an unconditional popfq. | 98 | * either was set instead of doing an unconditional popfq. |
96 | * This needs to happen before enabling interrupts so that | 99 | * This needs to happen before enabling interrupts so that |
97 | * we don't get preempted with NT set. | 100 | * we don't get preempted with NT set. |
98 | * | 101 | * |
102 | * If TF is set, we will single-step all the way to here -- do_debug | ||
103 | * will ignore all the traps. (Yes, this is slow, but so is | ||
104 | * single-stepping in general. This allows us to avoid having | ||
105 | * a more complicated code to handle the case where a user program | ||
106 | * forces us to single-step through the SYSENTER entry code.) | ||
107 | * | ||
99 | * NB.: .Lsysenter_fix_flags is a label with the code under it moved | 108 | * NB.: .Lsysenter_fix_flags is a label with the code under it moved |
100 | * out-of-line as an optimization: NT is unlikely to be set in the | 109 | * out-of-line as an optimization: NT is unlikely to be set in the |
101 | * majority of the cases and instead of polluting the I$ unnecessarily, | 110 | * majority of the cases and instead of polluting the I$ unnecessarily, |
102 | * we're keeping that code behind a branch which will predict as | 111 | * we're keeping that code behind a branch which will predict as |
103 | * not-taken and therefore its instructions won't be fetched. | 112 | * not-taken and therefore its instructions won't be fetched. |
104 | */ | 113 | */ |
105 | testl $X86_EFLAGS_NT, EFLAGS(%rsp) | 114 | testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp) |
106 | jnz .Lsysenter_fix_flags | 115 | jnz .Lsysenter_fix_flags |
107 | .Lsysenter_flags_fixed: | 116 | .Lsysenter_flags_fixed: |
108 | 117 | ||
@@ -123,20 +132,42 @@ ENTRY(entry_SYSENTER_compat) | |||
123 | pushq $X86_EFLAGS_FIXED | 132 | pushq $X86_EFLAGS_FIXED |
124 | popfq | 133 | popfq |
125 | jmp .Lsysenter_flags_fixed | 134 | jmp .Lsysenter_flags_fixed |
135 | GLOBAL(__end_entry_SYSENTER_compat) | ||
126 | ENDPROC(entry_SYSENTER_compat) | 136 | ENDPROC(entry_SYSENTER_compat) |
127 | 137 | ||
128 | /* | 138 | /* |
129 | * 32-bit SYSCALL instruction entry. | 139 | * 32-bit SYSCALL entry. |
140 | * | ||
141 | * 32-bit system calls through the vDSO's __kernel_vsyscall enter here | ||
142 | * on 64-bit kernels running on AMD CPUs. | ||
143 | * | ||
144 | * The SYSCALL instruction, in principle, should *only* occur in the | ||
145 | * vDSO. In practice, it appears that this really is the case. | ||
146 | * As evidence: | ||
147 | * | ||
148 | * - The calling convention for SYSCALL has changed several times without | ||
149 | * anyone noticing. | ||
130 | * | 150 | * |
131 | * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, | 151 | * - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything |
132 | * then loads new ss, cs, and rip from previously programmed MSRs. | 152 | * user task that did SYSCALL without immediately reloading SS |
133 | * rflags gets masked by a value from another MSR (so CLD and CLAC | 153 | * would randomly crash. |
134 | * are not needed). SYSCALL does not save anything on the stack | ||
135 | * and does not change rsp. | ||
136 | * | 154 | * |
137 | * Note: rflags saving+masking-with-MSR happens only in Long mode | 155 | * - Most programmers do not directly target AMD CPUs, and the 32-bit |
156 | * SYSCALL instruction does not exist on Intel CPUs. Even on AMD | ||
157 | * CPUs, Linux disables the SYSCALL instruction on 32-bit kernels | ||
158 | * because the SYSCALL instruction in legacy/native 32-bit mode (as | ||
159 | * opposed to compat mode) is sufficiently poorly designed as to be | ||
160 | * essentially unusable. | ||
161 | * | ||
162 | * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves | ||
163 | * RFLAGS to R11, then loads new SS, CS, and RIP from previously | ||
164 | * programmed MSRs. RFLAGS gets masked by a value from another MSR | ||
165 | * (so CLD and CLAC are not needed). SYSCALL does not save anything on | ||
166 | * the stack and does not change RSP. | ||
167 | * | ||
168 | * Note: RFLAGS saving+masking-with-MSR happens only in Long mode | ||
138 | * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it). | 169 | * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it). |
139 | * Don't get confused: rflags saving+masking depends on Long Mode Active bit | 170 | * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit |
140 | * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes | 171 | * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes |
141 | * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). | 172 | * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). |
142 | * | 173 | * |
@@ -236,7 +267,21 @@ sysret32_from_system_call: | |||
236 | END(entry_SYSCALL_compat) | 267 | END(entry_SYSCALL_compat) |
237 | 268 | ||
238 | /* | 269 | /* |
239 | * Emulated IA32 system calls via int 0x80. | 270 | * 32-bit legacy system call entry. |
271 | * | ||
272 | * 32-bit x86 Linux system calls traditionally used the INT $0x80 | ||
273 | * instruction. INT $0x80 lands here. | ||
274 | * | ||
275 | * This entry point can be used by 32-bit and 64-bit programs to perform | ||
276 | * 32-bit system calls. Instances of INT $0x80 can be found inline in | ||
277 | * various programs and libraries. It is also used by the vDSO's | ||
278 | * __kernel_vsyscall fallback for hardware that doesn't support a faster | ||
279 | * entry method. Restarted 32-bit system calls also fall back to INT | ||
280 | * $0x80 regardless of what instruction was originally used to do the | ||
281 | * system call. | ||
282 | * | ||
283 | * This is considered a slow path. It is not used by most libc | ||
284 | * implementations on modern hardware except during process startup. | ||
240 | * | 285 | * |
241 | * Arguments: | 286 | * Arguments: |
242 | * eax system call number | 287 | * eax system call number |
@@ -245,17 +290,8 @@ END(entry_SYSCALL_compat) | |||
245 | * edx arg3 | 290 | * edx arg3 |
246 | * esi arg4 | 291 | * esi arg4 |
247 | * edi arg5 | 292 | * edi arg5 |
248 | * ebp arg6 (note: not saved in the stack frame, should not be touched) | 293 | * ebp arg6 |
249 | * | ||
250 | * Notes: | ||
251 | * Uses the same stack frame as the x86-64 version. | ||
252 | * All registers except eax must be saved (but ptrace may violate that). | ||
253 | * Arguments are zero extended. For system calls that want sign extension and | ||
254 | * take long arguments a wrapper is needed. Most calls can just be called | ||
255 | * directly. | ||
256 | * Assumes it is only called from user space and entered with interrupts off. | ||
257 | */ | 294 | */ |
258 | |||
259 | ENTRY(entry_INT80_compat) | 295 | ENTRY(entry_INT80_compat) |
260 | /* | 296 | /* |
261 | * Interrupts are off on entry. | 297 | * Interrupts are off on entry. |
@@ -300,7 +336,7 @@ ENTRY(entry_INT80_compat) | |||
300 | TRACE_IRQS_OFF | 336 | TRACE_IRQS_OFF |
301 | 337 | ||
302 | movq %rsp, %rdi | 338 | movq %rsp, %rdi |
303 | call do_syscall_32_irqs_off | 339 | call do_int80_syscall_32 |
304 | .Lsyscall_32_done: | 340 | .Lsyscall_32_done: |
305 | 341 | ||
306 | /* Go back to user mode. */ | 342 | /* Go back to user mode. */ |
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 9a6649857106..8f895ee13a1c 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c | |||
@@ -6,17 +6,11 @@ | |||
6 | #include <asm/asm-offsets.h> | 6 | #include <asm/asm-offsets.h> |
7 | #include <asm/syscall.h> | 7 | #include <asm/syscall.h> |
8 | 8 | ||
9 | #ifdef CONFIG_IA32_EMULATION | 9 | #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; |
10 | #define SYM(sym, compat) compat | ||
11 | #else | ||
12 | #define SYM(sym, compat) sym | ||
13 | #endif | ||
14 | |||
15 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; | ||
16 | #include <asm/syscalls_32.h> | 10 | #include <asm/syscalls_32.h> |
17 | #undef __SYSCALL_I386 | 11 | #undef __SYSCALL_I386 |
18 | 12 | ||
19 | #define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat), | 13 | #define __SYSCALL_I386(nr, sym, qual) [nr] = sym, |
20 | 14 | ||
21 | extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); | 15 | extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
22 | 16 | ||
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 41283d22be7a..9dbc5abb6162 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c | |||
@@ -6,19 +6,14 @@ | |||
6 | #include <asm/asm-offsets.h> | 6 | #include <asm/asm-offsets.h> |
7 | #include <asm/syscall.h> | 7 | #include <asm/syscall.h> |
8 | 8 | ||
9 | #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) | 9 | #define __SYSCALL_64_QUAL_(sym) sym |
10 | #define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym | ||
10 | 11 | ||
11 | #ifdef CONFIG_X86_X32_ABI | 12 | #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
12 | # define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat) | ||
13 | #else | ||
14 | # define __SYSCALL_X32(nr, sym, compat) /* nothing */ | ||
15 | #endif | ||
16 | |||
17 | #define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; | ||
18 | #include <asm/syscalls_64.h> | 13 | #include <asm/syscalls_64.h> |
19 | #undef __SYSCALL_64 | 14 | #undef __SYSCALL_64 |
20 | 15 | ||
21 | #define __SYSCALL_64(nr, sym, compat) [nr] = sym, | 16 | #define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), |
22 | 17 | ||
23 | extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); | 18 | extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
24 | 19 | ||
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index dc1040a50bdc..2e5b565adacc 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl | |||
@@ -21,7 +21,7 @@ | |||
21 | 12 common brk sys_brk | 21 | 12 common brk sys_brk |
22 | 13 64 rt_sigaction sys_rt_sigaction | 22 | 13 64 rt_sigaction sys_rt_sigaction |
23 | 14 common rt_sigprocmask sys_rt_sigprocmask | 23 | 14 common rt_sigprocmask sys_rt_sigprocmask |
24 | 15 64 rt_sigreturn stub_rt_sigreturn | 24 | 15 64 rt_sigreturn sys_rt_sigreturn/ptregs |
25 | 16 64 ioctl sys_ioctl | 25 | 16 64 ioctl sys_ioctl |
26 | 17 common pread64 sys_pread64 | 26 | 17 common pread64 sys_pread64 |
27 | 18 common pwrite64 sys_pwrite64 | 27 | 18 common pwrite64 sys_pwrite64 |
@@ -62,10 +62,10 @@ | |||
62 | 53 common socketpair sys_socketpair | 62 | 53 common socketpair sys_socketpair |
63 | 54 64 setsockopt sys_setsockopt | 63 | 54 64 setsockopt sys_setsockopt |
64 | 55 64 getsockopt sys_getsockopt | 64 | 55 64 getsockopt sys_getsockopt |
65 | 56 common clone stub_clone | 65 | 56 common clone sys_clone/ptregs |
66 | 57 common fork stub_fork | 66 | 57 common fork sys_fork/ptregs |
67 | 58 common vfork stub_vfork | 67 | 58 common vfork sys_vfork/ptregs |
68 | 59 64 execve stub_execve | 68 | 59 64 execve sys_execve/ptregs |
69 | 60 common exit sys_exit | 69 | 60 common exit sys_exit |
70 | 61 common wait4 sys_wait4 | 70 | 61 common wait4 sys_wait4 |
71 | 62 common kill sys_kill | 71 | 62 common kill sys_kill |
@@ -178,7 +178,7 @@ | |||
178 | 169 common reboot sys_reboot | 178 | 169 common reboot sys_reboot |
179 | 170 common sethostname sys_sethostname | 179 | 170 common sethostname sys_sethostname |
180 | 171 common setdomainname sys_setdomainname | 180 | 171 common setdomainname sys_setdomainname |
181 | 172 common iopl sys_iopl | 181 | 172 common iopl sys_iopl/ptregs |
182 | 173 common ioperm sys_ioperm | 182 | 173 common ioperm sys_ioperm |
183 | 174 64 create_module | 183 | 174 64 create_module |
184 | 175 common init_module sys_init_module | 184 | 175 common init_module sys_init_module |
@@ -328,7 +328,7 @@ | |||
328 | 319 common memfd_create sys_memfd_create | 328 | 319 common memfd_create sys_memfd_create |
329 | 320 common kexec_file_load sys_kexec_file_load | 329 | 320 common kexec_file_load sys_kexec_file_load |
330 | 321 common bpf sys_bpf | 330 | 321 common bpf sys_bpf |
331 | 322 64 execveat stub_execveat | 331 | 322 64 execveat sys_execveat/ptregs |
332 | 323 common userfaultfd sys_userfaultfd | 332 | 323 common userfaultfd sys_userfaultfd |
333 | 324 common membarrier sys_membarrier | 333 | 324 common membarrier sys_membarrier |
334 | 325 common mlock2 sys_mlock2 | 334 | 325 common mlock2 sys_mlock2 |
@@ -339,14 +339,14 @@ | |||
339 | # for native 64-bit operation. | 339 | # for native 64-bit operation. |
340 | # | 340 | # |
341 | 512 x32 rt_sigaction compat_sys_rt_sigaction | 341 | 512 x32 rt_sigaction compat_sys_rt_sigaction |
342 | 513 x32 rt_sigreturn stub_x32_rt_sigreturn | 342 | 513 x32 rt_sigreturn sys32_x32_rt_sigreturn |
343 | 514 x32 ioctl compat_sys_ioctl | 343 | 514 x32 ioctl compat_sys_ioctl |
344 | 515 x32 readv compat_sys_readv | 344 | 515 x32 readv compat_sys_readv |
345 | 516 x32 writev compat_sys_writev | 345 | 516 x32 writev compat_sys_writev |
346 | 517 x32 recvfrom compat_sys_recvfrom | 346 | 517 x32 recvfrom compat_sys_recvfrom |
347 | 518 x32 sendmsg compat_sys_sendmsg | 347 | 518 x32 sendmsg compat_sys_sendmsg |
348 | 519 x32 recvmsg compat_sys_recvmsg | 348 | 519 x32 recvmsg compat_sys_recvmsg |
349 | 520 x32 execve stub_x32_execve | 349 | 520 x32 execve compat_sys_execve/ptregs |
350 | 521 x32 ptrace compat_sys_ptrace | 350 | 521 x32 ptrace compat_sys_ptrace |
351 | 522 x32 rt_sigpending compat_sys_rt_sigpending | 351 | 522 x32 rt_sigpending compat_sys_rt_sigpending |
352 | 523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait | 352 | 523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait |
@@ -371,4 +371,4 @@ | |||
371 | 542 x32 getsockopt compat_sys_getsockopt | 371 | 542 x32 getsockopt compat_sys_getsockopt |
372 | 543 x32 io_setup compat_sys_io_setup | 372 | 543 x32 io_setup compat_sys_io_setup |
373 | 544 x32 io_submit compat_sys_io_submit | 373 | 544 x32 io_submit compat_sys_io_submit |
374 | 545 x32 execveat stub_x32_execveat | 374 | 545 x32 execveat compat_sys_execveat/ptregs |
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh index 0e7f8ec071e7..cd3d3015d7df 100644 --- a/arch/x86/entry/syscalls/syscalltbl.sh +++ b/arch/x86/entry/syscalls/syscalltbl.sh | |||
@@ -3,13 +3,63 @@ | |||
3 | in="$1" | 3 | in="$1" |
4 | out="$2" | 4 | out="$2" |
5 | 5 | ||
6 | syscall_macro() { | ||
7 | abi="$1" | ||
8 | nr="$2" | ||
9 | entry="$3" | ||
10 | |||
11 | # Entry can be either just a function name or "function/qualifier" | ||
12 | real_entry="${entry%%/*}" | ||
13 | qualifier="${entry:${#real_entry}}" # Strip the function name | ||
14 | qualifier="${qualifier:1}" # Strip the slash, if any | ||
15 | |||
16 | echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)" | ||
17 | } | ||
18 | |||
19 | emit() { | ||
20 | abi="$1" | ||
21 | nr="$2" | ||
22 | entry="$3" | ||
23 | compat="$4" | ||
24 | |||
25 | if [ "$abi" == "64" -a -n "$compat" ]; then | ||
26 | echo "a compat entry for a 64-bit syscall makes no sense" >&2 | ||
27 | exit 1 | ||
28 | fi | ||
29 | |||
30 | if [ -z "$compat" ]; then | ||
31 | if [ -n "$entry" ]; then | ||
32 | syscall_macro "$abi" "$nr" "$entry" | ||
33 | fi | ||
34 | else | ||
35 | echo "#ifdef CONFIG_X86_32" | ||
36 | if [ -n "$entry" ]; then | ||
37 | syscall_macro "$abi" "$nr" "$entry" | ||
38 | fi | ||
39 | echo "#else" | ||
40 | syscall_macro "$abi" "$nr" "$compat" | ||
41 | echo "#endif" | ||
42 | fi | ||
43 | } | ||
44 | |||
6 | grep '^[0-9]' "$in" | sort -n | ( | 45 | grep '^[0-9]' "$in" | sort -n | ( |
7 | while read nr abi name entry compat; do | 46 | while read nr abi name entry compat; do |
8 | abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` | 47 | abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` |
9 | if [ -n "$compat" ]; then | 48 | if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then |
10 | echo "__SYSCALL_${abi}($nr, $entry, $compat)" | 49 | # COMMON is the same as 64, except that we don't expect X32 |
11 | elif [ -n "$entry" ]; then | 50 | # programs to use it. Our expectation has nothing to do with |
12 | echo "__SYSCALL_${abi}($nr, $entry, $entry)" | 51 | # any generated code, so treat them the same. |
52 | emit 64 "$nr" "$entry" "$compat" | ||
53 | elif [ "$abi" == "X32" ]; then | ||
54 | # X32 is equivalent to 64 on an X32-compatible kernel. | ||
55 | echo "#ifdef CONFIG_X86_X32_ABI" | ||
56 | emit 64 "$nr" "$entry" "$compat" | ||
57 | echo "#endif" | ||
58 | elif [ "$abi" == "I386" ]; then | ||
59 | emit "$abi" "$nr" "$entry" "$compat" | ||
60 | else | ||
61 | echo "Unknown abi $abi" >&2 | ||
62 | exit 1 | ||
13 | fi | 63 | fi |
14 | done | 64 | done |
15 | ) > "$out" | 65 | ) > "$out" |
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 3f69326ed545..63a03bb91497 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h | |||
@@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, | |||
150 | } | 150 | } |
151 | fprintf(outfile, "\n};\n\n"); | 151 | fprintf(outfile, "\n};\n\n"); |
152 | 152 | ||
153 | fprintf(outfile, "static struct page *pages[%lu];\n\n", | ||
154 | mapping_size / 4096); | ||
155 | |||
156 | fprintf(outfile, "const struct vdso_image %s = {\n", name); | 153 | fprintf(outfile, "const struct vdso_image %s = {\n", name); |
157 | fprintf(outfile, "\t.data = raw_data,\n"); | 154 | fprintf(outfile, "\t.data = raw_data,\n"); |
158 | fprintf(outfile, "\t.size = %lu,\n", mapping_size); | 155 | fprintf(outfile, "\t.size = %lu,\n", mapping_size); |
159 | fprintf(outfile, "\t.text_mapping = {\n"); | ||
160 | fprintf(outfile, "\t\t.name = \"[vdso]\",\n"); | ||
161 | fprintf(outfile, "\t\t.pages = pages,\n"); | ||
162 | fprintf(outfile, "\t},\n"); | ||
163 | if (alt_sec) { | 156 | if (alt_sec) { |
164 | fprintf(outfile, "\t.alt = %lu,\n", | 157 | fprintf(outfile, "\t.alt = %lu,\n", |
165 | (unsigned long)GET_LE(&alt_sec->sh_offset)); | 158 | (unsigned long)GET_LE(&alt_sec->sh_offset)); |
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 08a317a9ae4b..7853b53959cd 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/mm_types.h> | 12 | #include <linux/mm_types.h> |
13 | 13 | ||
14 | #include <asm/cpufeature.h> | ||
15 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
16 | #include <asm/vdso.h> | 15 | #include <asm/vdso.h> |
17 | 16 | ||
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 3a1d9297074b..0109ac6cb79c 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S | |||
@@ -3,7 +3,7 @@ | |||
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <asm/dwarf2.h> | 5 | #include <asm/dwarf2.h> |
6 | #include <asm/cpufeature.h> | 6 | #include <asm/cpufeatures.h> |
7 | #include <asm/alternative-asm.h> | 7 | #include <asm/alternative-asm.h> |
8 | 8 | ||
9 | /* | 9 | /* |
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index b8f69e264ac4..10f704584922 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/page.h> | 20 | #include <asm/page.h> |
21 | #include <asm/hpet.h> | 21 | #include <asm/hpet.h> |
22 | #include <asm/desc.h> | 22 | #include <asm/desc.h> |
23 | #include <asm/cpufeature.h> | ||
23 | 24 | ||
24 | #if defined(CONFIG_X86_64) | 25 | #if defined(CONFIG_X86_64) |
25 | unsigned int __read_mostly vdso64_enabled = 1; | 26 | unsigned int __read_mostly vdso64_enabled = 1; |
@@ -27,13 +28,7 @@ unsigned int __read_mostly vdso64_enabled = 1; | |||
27 | 28 | ||
28 | void __init init_vdso_image(const struct vdso_image *image) | 29 | void __init init_vdso_image(const struct vdso_image *image) |
29 | { | 30 | { |
30 | int i; | ||
31 | int npages = (image->size) / PAGE_SIZE; | ||
32 | |||
33 | BUG_ON(image->size % PAGE_SIZE != 0); | 31 | BUG_ON(image->size % PAGE_SIZE != 0); |
34 | for (i = 0; i < npages; i++) | ||
35 | image->text_mapping.pages[i] = | ||
36 | virt_to_page(image->data + i*PAGE_SIZE); | ||
37 | 32 | ||
38 | apply_alternatives((struct alt_instr *)(image->data + image->alt), | 33 | apply_alternatives((struct alt_instr *)(image->data + image->alt), |
39 | (struct alt_instr *)(image->data + image->alt + | 34 | (struct alt_instr *)(image->data + image->alt + |
@@ -90,18 +85,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) | |||
90 | #endif | 85 | #endif |
91 | } | 86 | } |
92 | 87 | ||
88 | static int vdso_fault(const struct vm_special_mapping *sm, | ||
89 | struct vm_area_struct *vma, struct vm_fault *vmf) | ||
90 | { | ||
91 | const struct vdso_image *image = vma->vm_mm->context.vdso_image; | ||
92 | |||
93 | if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size) | ||
94 | return VM_FAULT_SIGBUS; | ||
95 | |||
96 | vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT)); | ||
97 | get_page(vmf->page); | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | static const struct vm_special_mapping text_mapping = { | ||
102 | .name = "[vdso]", | ||
103 | .fault = vdso_fault, | ||
104 | }; | ||
105 | |||
106 | static int vvar_fault(const struct vm_special_mapping *sm, | ||
107 | struct vm_area_struct *vma, struct vm_fault *vmf) | ||
108 | { | ||
109 | const struct vdso_image *image = vma->vm_mm->context.vdso_image; | ||
110 | long sym_offset; | ||
111 | int ret = -EFAULT; | ||
112 | |||
113 | if (!image) | ||
114 | return VM_FAULT_SIGBUS; | ||
115 | |||
116 | sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) + | ||
117 | image->sym_vvar_start; | ||
118 | |||
119 | /* | ||
120 | * Sanity check: a symbol offset of zero means that the page | ||
121 | * does not exist for this vdso image, not that the page is at | ||
122 | * offset zero relative to the text mapping. This should be | ||
123 | * impossible here, because sym_offset should only be zero for | ||
124 | * the page past the end of the vvar mapping. | ||
125 | */ | ||
126 | if (sym_offset == 0) | ||
127 | return VM_FAULT_SIGBUS; | ||
128 | |||
129 | if (sym_offset == image->sym_vvar_page) { | ||
130 | ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, | ||
131 | __pa_symbol(&__vvar_page) >> PAGE_SHIFT); | ||
132 | } else if (sym_offset == image->sym_hpet_page) { | ||
133 | #ifdef CONFIG_HPET_TIMER | ||
134 | if (hpet_address && vclock_was_used(VCLOCK_HPET)) { | ||
135 | ret = vm_insert_pfn_prot( | ||
136 | vma, | ||
137 | (unsigned long)vmf->virtual_address, | ||
138 | hpet_address >> PAGE_SHIFT, | ||
139 | pgprot_noncached(PAGE_READONLY)); | ||
140 | } | ||
141 | #endif | ||
142 | } else if (sym_offset == image->sym_pvclock_page) { | ||
143 | struct pvclock_vsyscall_time_info *pvti = | ||
144 | pvclock_pvti_cpu0_va(); | ||
145 | if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { | ||
146 | ret = vm_insert_pfn( | ||
147 | vma, | ||
148 | (unsigned long)vmf->virtual_address, | ||
149 | __pa(pvti) >> PAGE_SHIFT); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | if (ret == 0 || ret == -EBUSY) | ||
154 | return VM_FAULT_NOPAGE; | ||
155 | |||
156 | return VM_FAULT_SIGBUS; | ||
157 | } | ||
158 | |||
93 | static int map_vdso(const struct vdso_image *image, bool calculate_addr) | 159 | static int map_vdso(const struct vdso_image *image, bool calculate_addr) |
94 | { | 160 | { |
95 | struct mm_struct *mm = current->mm; | 161 | struct mm_struct *mm = current->mm; |
96 | struct vm_area_struct *vma; | 162 | struct vm_area_struct *vma; |
97 | unsigned long addr, text_start; | 163 | unsigned long addr, text_start; |
98 | int ret = 0; | 164 | int ret = 0; |
99 | static struct page *no_pages[] = {NULL}; | 165 | static const struct vm_special_mapping vvar_mapping = { |
100 | static struct vm_special_mapping vvar_mapping = { | ||
101 | .name = "[vvar]", | 166 | .name = "[vvar]", |
102 | .pages = no_pages, | 167 | .fault = vvar_fault, |
103 | }; | 168 | }; |
104 | struct pvclock_vsyscall_time_info *pvti; | ||
105 | 169 | ||
106 | if (calculate_addr) { | 170 | if (calculate_addr) { |
107 | addr = vdso_addr(current->mm->start_stack, | 171 | addr = vdso_addr(current->mm->start_stack, |
@@ -121,6 +185,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) | |||
121 | 185 | ||
122 | text_start = addr - image->sym_vvar_start; | 186 | text_start = addr - image->sym_vvar_start; |
123 | current->mm->context.vdso = (void __user *)text_start; | 187 | current->mm->context.vdso = (void __user *)text_start; |
188 | current->mm->context.vdso_image = image; | ||
124 | 189 | ||
125 | /* | 190 | /* |
126 | * MAYWRITE to allow gdb to COW and set breakpoints | 191 | * MAYWRITE to allow gdb to COW and set breakpoints |
@@ -130,7 +195,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) | |||
130 | image->size, | 195 | image->size, |
131 | VM_READ|VM_EXEC| | 196 | VM_READ|VM_EXEC| |
132 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, | 197 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
133 | &image->text_mapping); | 198 | &text_mapping); |
134 | 199 | ||
135 | if (IS_ERR(vma)) { | 200 | if (IS_ERR(vma)) { |
136 | ret = PTR_ERR(vma); | 201 | ret = PTR_ERR(vma); |
@@ -140,7 +205,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) | |||
140 | vma = _install_special_mapping(mm, | 205 | vma = _install_special_mapping(mm, |
141 | addr, | 206 | addr, |
142 | -image->sym_vvar_start, | 207 | -image->sym_vvar_start, |
143 | VM_READ|VM_MAYREAD, | 208 | VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| |
209 | VM_PFNMAP, | ||
144 | &vvar_mapping); | 210 | &vvar_mapping); |
145 | 211 | ||
146 | if (IS_ERR(vma)) { | 212 | if (IS_ERR(vma)) { |
@@ -148,41 +214,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) | |||
148 | goto up_fail; | 214 | goto up_fail; |
149 | } | 215 | } |
150 | 216 | ||
151 | if (image->sym_vvar_page) | ||
152 | ret = remap_pfn_range(vma, | ||
153 | text_start + image->sym_vvar_page, | ||
154 | __pa_symbol(&__vvar_page) >> PAGE_SHIFT, | ||
155 | PAGE_SIZE, | ||
156 | PAGE_READONLY); | ||
157 | |||
158 | if (ret) | ||
159 | goto up_fail; | ||
160 | |||
161 | #ifdef CONFIG_HPET_TIMER | ||
162 | if (hpet_address && image->sym_hpet_page) { | ||
163 | ret = io_remap_pfn_range(vma, | ||
164 | text_start + image->sym_hpet_page, | ||
165 | hpet_address >> PAGE_SHIFT, | ||
166 | PAGE_SIZE, | ||
167 | pgprot_noncached(PAGE_READONLY)); | ||
168 | |||
169 | if (ret) | ||
170 | goto up_fail; | ||
171 | } | ||
172 | #endif | ||
173 | |||
174 | pvti = pvclock_pvti_cpu0_va(); | ||
175 | if (pvti && image->sym_pvclock_page) { | ||
176 | ret = remap_pfn_range(vma, | ||
177 | text_start + image->sym_pvclock_page, | ||
178 | __pa(pvti) >> PAGE_SHIFT, | ||
179 | PAGE_SIZE, | ||
180 | PAGE_READONLY); | ||
181 | |||
182 | if (ret) | ||
183 | goto up_fail; | ||
184 | } | ||
185 | |||
186 | up_fail: | 217 | up_fail: |
187 | if (ret) | 218 | if (ret) |
188 | current->mm->context.vdso = NULL; | 219 | current->mm->context.vdso = NULL; |
@@ -254,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg) | |||
254 | #ifdef CONFIG_NUMA | 285 | #ifdef CONFIG_NUMA |
255 | node = cpu_to_node(cpu); | 286 | node = cpu_to_node(cpu); |
256 | #endif | 287 | #endif |
257 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) | 288 | if (static_cpu_has(X86_FEATURE_RDTSCP)) |
258 | write_rdtscp_aux((node << 12) | cpu); | 289 | write_rdtscp_aux((node << 12) | cpu); |
259 | 290 | ||
260 | /* | 291 | /* |
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c index 51e330416995..0fb3a104ac62 100644 --- a/arch/x86/entry/vsyscall/vsyscall_gtod.c +++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c | |||
@@ -16,6 +16,8 @@ | |||
16 | #include <asm/vgtod.h> | 16 | #include <asm/vgtod.h> |
17 | #include <asm/vvar.h> | 17 | #include <asm/vvar.h> |
18 | 18 | ||
19 | int vclocks_used __read_mostly; | ||
20 | |||
19 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); | 21 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); |
20 | 22 | ||
21 | void update_vsyscall_tz(void) | 23 | void update_vsyscall_tz(void) |
@@ -26,12 +28,17 @@ void update_vsyscall_tz(void) | |||
26 | 28 | ||
27 | void update_vsyscall(struct timekeeper *tk) | 29 | void update_vsyscall(struct timekeeper *tk) |
28 | { | 30 | { |
31 | int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; | ||
29 | struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; | 32 | struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; |
30 | 33 | ||
34 | /* Mark the new vclock used. */ | ||
35 | BUILD_BUG_ON(VCLOCK_MAX >= 32); | ||
36 | WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); | ||
37 | |||
31 | gtod_write_begin(vdata); | 38 | gtod_write_begin(vdata); |
32 | 39 | ||
33 | /* copy vsyscall data */ | 40 | /* copy vsyscall data */ |
34 | vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; | 41 | vdata->vclock_mode = vclock_mode; |
35 | vdata->cycle_last = tk->tkr_mono.cycle_last; | 42 | vdata->cycle_last = tk->tkr_mono.cycle_last; |
36 | vdata->mask = tk->tkr_mono.mask; | 43 | vdata->mask = tk->tkr_mono.mask; |
37 | vdata->mult = tk->tkr_mono.mult; | 44 | vdata->mult = tk->tkr_mono.mult; |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 7bfc85bbb8ff..99afb665a004 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -152,12 +152,6 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
152 | ".popsection" | 152 | ".popsection" |
153 | 153 | ||
154 | /* | 154 | /* |
155 | * This must be included *after* the definition of ALTERNATIVE due to | ||
156 | * <asm/arch_hweight.h> | ||
157 | */ | ||
158 | #include <asm/cpufeature.h> | ||
159 | |||
160 | /* | ||
161 | * Alternative instructions for different CPU types or capabilities. | 155 | * Alternative instructions for different CPU types or capabilities. |
162 | * | 156 | * |
163 | * This allows to use optimized instructions even on generic binary | 157 | * This allows to use optimized instructions even on generic binary |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c80f6b6f3da2..0899cfc8dfe8 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -6,7 +6,6 @@ | |||
6 | 6 | ||
7 | #include <asm/alternative.h> | 7 | #include <asm/alternative.h> |
8 | #include <asm/cpufeature.h> | 8 | #include <asm/cpufeature.h> |
9 | #include <asm/processor.h> | ||
10 | #include <asm/apicdef.h> | 9 | #include <asm/apicdef.h> |
11 | #include <linux/atomic.h> | 10 | #include <linux/atomic.h> |
12 | #include <asm/fixmap.h> | 11 | #include <asm/fixmap.h> |
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index 259a7c1ef709..02e799fa43d1 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_HWEIGHT_H | 1 | #ifndef _ASM_X86_HWEIGHT_H |
2 | #define _ASM_X86_HWEIGHT_H | 2 | #define _ASM_X86_HWEIGHT_H |
3 | 3 | ||
4 | #include <asm/cpufeatures.h> | ||
5 | |||
4 | #ifdef CONFIG_64BIT | 6 | #ifdef CONFIG_64BIT |
5 | /* popcnt %edi, %eax -- redundant REX prefix for alignment */ | 7 | /* popcnt %edi, %eax -- redundant REX prefix for alignment */ |
6 | #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" | 8 | #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index cfe3b954d5e4..7766d1cf096e 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -91,7 +91,7 @@ set_bit(long nr, volatile unsigned long *addr) | |||
91 | * If it's called on the same region of memory simultaneously, the effect | 91 | * If it's called on the same region of memory simultaneously, the effect |
92 | * may be that only one operation succeeds. | 92 | * may be that only one operation succeeds. |
93 | */ | 93 | */ |
94 | static inline void __set_bit(long nr, volatile unsigned long *addr) | 94 | static __always_inline void __set_bit(long nr, volatile unsigned long *addr) |
95 | { | 95 | { |
96 | asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); | 96 | asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); |
97 | } | 97 | } |
@@ -128,13 +128,13 @@ clear_bit(long nr, volatile unsigned long *addr) | |||
128 | * clear_bit() is atomic and implies release semantics before the memory | 128 | * clear_bit() is atomic and implies release semantics before the memory |
129 | * operation. It can be used for an unlock. | 129 | * operation. It can be used for an unlock. |
130 | */ | 130 | */ |
131 | static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) | 131 | static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr) |
132 | { | 132 | { |
133 | barrier(); | 133 | barrier(); |
134 | clear_bit(nr, addr); | 134 | clear_bit(nr, addr); |
135 | } | 135 | } |
136 | 136 | ||
137 | static inline void __clear_bit(long nr, volatile unsigned long *addr) | 137 | static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) |
138 | { | 138 | { |
139 | asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); | 139 | asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); |
140 | } | 140 | } |
@@ -151,7 +151,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr) | |||
151 | * No memory barrier is required here, because x86 cannot reorder stores past | 151 | * No memory barrier is required here, because x86 cannot reorder stores past |
152 | * older loads. Same principle as spin_unlock. | 152 | * older loads. Same principle as spin_unlock. |
153 | */ | 153 | */ |
154 | static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) | 154 | static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) |
155 | { | 155 | { |
156 | barrier(); | 156 | barrier(); |
157 | __clear_bit(nr, addr); | 157 | __clear_bit(nr, addr); |
@@ -166,7 +166,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) | |||
166 | * If it's called on the same region of memory simultaneously, the effect | 166 | * If it's called on the same region of memory simultaneously, the effect |
167 | * may be that only one operation succeeds. | 167 | * may be that only one operation succeeds. |
168 | */ | 168 | */ |
169 | static inline void __change_bit(long nr, volatile unsigned long *addr) | 169 | static __always_inline void __change_bit(long nr, volatile unsigned long *addr) |
170 | { | 170 | { |
171 | asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); | 171 | asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); |
172 | } | 172 | } |
@@ -180,7 +180,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr) | |||
180 | * Note that @nr may be almost arbitrarily large; this function is not | 180 | * Note that @nr may be almost arbitrarily large; this function is not |
181 | * restricted to acting on a single-word quantity. | 181 | * restricted to acting on a single-word quantity. |
182 | */ | 182 | */ |
183 | static inline void change_bit(long nr, volatile unsigned long *addr) | 183 | static __always_inline void change_bit(long nr, volatile unsigned long *addr) |
184 | { | 184 | { |
185 | if (IS_IMMEDIATE(nr)) { | 185 | if (IS_IMMEDIATE(nr)) { |
186 | asm volatile(LOCK_PREFIX "xorb %1,%0" | 186 | asm volatile(LOCK_PREFIX "xorb %1,%0" |
@@ -201,7 +201,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) | |||
201 | * This operation is atomic and cannot be reordered. | 201 | * This operation is atomic and cannot be reordered. |
202 | * It also implies a memory barrier. | 202 | * It also implies a memory barrier. |
203 | */ | 203 | */ |
204 | static inline int test_and_set_bit(long nr, volatile unsigned long *addr) | 204 | static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr) |
205 | { | 205 | { |
206 | GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); | 206 | GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); |
207 | } | 207 | } |
@@ -228,7 +228,7 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr) | |||
228 | * If two examples of this operation race, one can appear to succeed | 228 | * If two examples of this operation race, one can appear to succeed |
229 | * but actually fail. You must protect multiple accesses with a lock. | 229 | * but actually fail. You must protect multiple accesses with a lock. |
230 | */ | 230 | */ |
231 | static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) | 231 | static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr) |
232 | { | 232 | { |
233 | int oldbit; | 233 | int oldbit; |
234 | 234 | ||
@@ -247,7 +247,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) | |||
247 | * This operation is atomic and cannot be reordered. | 247 | * This operation is atomic and cannot be reordered. |
248 | * It also implies a memory barrier. | 248 | * It also implies a memory barrier. |
249 | */ | 249 | */ |
250 | static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) | 250 | static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr) |
251 | { | 251 | { |
252 | GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); | 252 | GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); |
253 | } | 253 | } |
@@ -268,7 +268,7 @@ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) | |||
268 | * accessed from a hypervisor on the same CPU if running in a VM: don't change | 268 | * accessed from a hypervisor on the same CPU if running in a VM: don't change |
269 | * this without also updating arch/x86/kernel/kvm.c | 269 | * this without also updating arch/x86/kernel/kvm.c |
270 | */ | 270 | */ |
271 | static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) | 271 | static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) |
272 | { | 272 | { |
273 | int oldbit; | 273 | int oldbit; |
274 | 274 | ||
@@ -280,7 +280,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) | |||
280 | } | 280 | } |
281 | 281 | ||
282 | /* WARNING: non atomic and it can be reordered! */ | 282 | /* WARNING: non atomic and it can be reordered! */ |
283 | static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) | 283 | static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr) |
284 | { | 284 | { |
285 | int oldbit; | 285 | int oldbit; |
286 | 286 | ||
@@ -300,7 +300,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) | |||
300 | * This operation is atomic and cannot be reordered. | 300 | * This operation is atomic and cannot be reordered. |
301 | * It also implies a memory barrier. | 301 | * It also implies a memory barrier. |
302 | */ | 302 | */ |
303 | static inline int test_and_change_bit(long nr, volatile unsigned long *addr) | 303 | static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr) |
304 | { | 304 | { |
305 | GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); | 305 | GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); |
306 | } | 306 | } |
@@ -311,7 +311,7 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo | |||
311 | (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; | 311 | (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; |
312 | } | 312 | } |
313 | 313 | ||
314 | static inline int variable_test_bit(long nr, volatile const unsigned long *addr) | 314 | static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr) |
315 | { | 315 | { |
316 | int oldbit; | 316 | int oldbit; |
317 | 317 | ||
@@ -343,7 +343,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); | |||
343 | * | 343 | * |
344 | * Undefined if no bit exists, so code should check against 0 first. | 344 | * Undefined if no bit exists, so code should check against 0 first. |
345 | */ | 345 | */ |
346 | static inline unsigned long __ffs(unsigned long word) | 346 | static __always_inline unsigned long __ffs(unsigned long word) |
347 | { | 347 | { |
348 | asm("rep; bsf %1,%0" | 348 | asm("rep; bsf %1,%0" |
349 | : "=r" (word) | 349 | : "=r" (word) |
@@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsigned long word) | |||
357 | * | 357 | * |
358 | * Undefined if no zero exists, so code should check against ~0UL first. | 358 | * Undefined if no zero exists, so code should check against ~0UL first. |
359 | */ | 359 | */ |
360 | static inline unsigned long ffz(unsigned long word) | 360 | static __always_inline unsigned long ffz(unsigned long word) |
361 | { | 361 | { |
362 | asm("rep; bsf %1,%0" | 362 | asm("rep; bsf %1,%0" |
363 | : "=r" (word) | 363 | : "=r" (word) |
@@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word) | |||
371 | * | 371 | * |
372 | * Undefined if no set bit exists, so code should check against 0 first. | 372 | * Undefined if no set bit exists, so code should check against 0 first. |
373 | */ | 373 | */ |
374 | static inline unsigned long __fls(unsigned long word) | 374 | static __always_inline unsigned long __fls(unsigned long word) |
375 | { | 375 | { |
376 | asm("bsr %1,%0" | 376 | asm("bsr %1,%0" |
377 | : "=r" (word) | 377 | : "=r" (word) |
@@ -393,7 +393,7 @@ static inline unsigned long __fls(unsigned long word) | |||
393 | * set bit if value is nonzero. The first (least significant) bit | 393 | * set bit if value is nonzero. The first (least significant) bit |
394 | * is at position 1. | 394 | * is at position 1. |
395 | */ | 395 | */ |
396 | static inline int ffs(int x) | 396 | static __always_inline int ffs(int x) |
397 | { | 397 | { |
398 | int r; | 398 | int r; |
399 | 399 | ||
@@ -434,7 +434,7 @@ static inline int ffs(int x) | |||
434 | * set bit if value is nonzero. The last (most significant) bit is | 434 | * set bit if value is nonzero. The last (most significant) bit is |
435 | * at position 32. | 435 | * at position 32. |
436 | */ | 436 | */ |
437 | static inline int fls(int x) | 437 | static __always_inline int fls(int x) |
438 | { | 438 | { |
439 | int r; | 439 | int r; |
440 | 440 | ||
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index eda81dc0f4ae..d194266acb28 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h | |||
@@ -3,10 +3,11 @@ | |||
3 | #ifndef _ASM_X86_CLOCKSOURCE_H | 3 | #ifndef _ASM_X86_CLOCKSOURCE_H |
4 | #define _ASM_X86_CLOCKSOURCE_H | 4 | #define _ASM_X86_CLOCKSOURCE_H |
5 | 5 | ||
6 | #define VCLOCK_NONE 0 /* No vDSO clock available. */ | 6 | #define VCLOCK_NONE 0 /* No vDSO clock available. */ |
7 | #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ | 7 | #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ |
8 | #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ | 8 | #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ |
9 | #define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */ | 9 | #define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */ |
10 | #define VCLOCK_MAX 3 | ||
10 | 11 | ||
11 | struct arch_clocksource_data { | 12 | struct arch_clocksource_data { |
12 | int vclock_mode; | 13 | int vclock_mode; |
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index ad19841eddfe..9733361fed6f 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define ASM_X86_CMPXCHG_H | 2 | #define ASM_X86_CMPXCHG_H |
3 | 3 | ||
4 | #include <linux/compiler.h> | 4 | #include <linux/compiler.h> |
5 | #include <asm/cpufeatures.h> | ||
5 | #include <asm/alternative.h> /* Provides LOCK_PREFIX */ | 6 | #include <asm/alternative.h> /* Provides LOCK_PREFIX */ |
6 | 7 | ||
7 | /* | 8 | /* |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 7ad8c9464297..68e4e8258b84 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -1,288 +1,7 @@ | |||
1 | /* | ||
2 | * Defines x86 CPU feature bits | ||
3 | */ | ||
4 | #ifndef _ASM_X86_CPUFEATURE_H | 1 | #ifndef _ASM_X86_CPUFEATURE_H |
5 | #define _ASM_X86_CPUFEATURE_H | 2 | #define _ASM_X86_CPUFEATURE_H |
6 | 3 | ||
7 | #ifndef _ASM_X86_REQUIRED_FEATURES_H | 4 | #include <asm/processor.h> |
8 | #include <asm/required-features.h> | ||
9 | #endif | ||
10 | |||
11 | #ifndef _ASM_X86_DISABLED_FEATURES_H | ||
12 | #include <asm/disabled-features.h> | ||
13 | #endif | ||
14 | |||
15 | #define NCAPINTS 16 /* N 32-bit words worth of info */ | ||
16 | #define NBUGINTS 1 /* N 32-bit bug flags */ | ||
17 | |||
18 | /* | ||
19 | * Note: If the comment begins with a quoted string, that string is used | ||
20 | * in /proc/cpuinfo instead of the macro name. If the string is "", | ||
21 | * this feature bit is not displayed in /proc/cpuinfo at all. | ||
22 | */ | ||
23 | |||
24 | /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ | ||
25 | #define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ | ||
26 | #define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ | ||
27 | #define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ | ||
28 | #define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ | ||
29 | #define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ | ||
30 | #define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ | ||
31 | #define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ | ||
32 | #define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ | ||
33 | #define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ | ||
34 | #define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ | ||
35 | #define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ | ||
36 | #define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ | ||
37 | #define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ | ||
38 | #define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ | ||
39 | #define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ | ||
40 | /* (plus FCMOVcc, FCOMI with FPU) */ | ||
41 | #define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ | ||
42 | #define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ | ||
43 | #define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ | ||
44 | #define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ | ||
45 | #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ | ||
46 | #define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ | ||
47 | #define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ | ||
48 | #define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ | ||
49 | #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ | ||
50 | #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ | ||
51 | #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ | ||
52 | #define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ | ||
53 | #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ | ||
54 | #define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ | ||
55 | #define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ | ||
56 | |||
57 | /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ | ||
58 | /* Don't duplicate feature flags which are redundant with Intel! */ | ||
59 | #define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ | ||
60 | #define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ | ||
61 | #define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ | ||
62 | #define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ | ||
63 | #define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ | ||
64 | #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ | ||
65 | #define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ | ||
66 | #define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ | ||
67 | #define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ | ||
68 | #define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ | ||
69 | |||
70 | /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ | ||
71 | #define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ | ||
72 | #define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ | ||
73 | #define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ | ||
74 | |||
75 | /* Other features, Linux-defined mapping, word 3 */ | ||
76 | /* This range is used for feature bits which conflict or are synthesized */ | ||
77 | #define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ | ||
78 | #define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ | ||
79 | #define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ | ||
80 | #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ | ||
81 | /* cpu types for specific tunings: */ | ||
82 | #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ | ||
83 | #define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ | ||
84 | #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ | ||
85 | #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ | ||
86 | #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ | ||
87 | #define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ | ||
88 | /* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */ | ||
89 | #define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ | ||
90 | #define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ | ||
91 | #define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ | ||
92 | #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ | ||
93 | #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ | ||
94 | #define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ | ||
95 | #define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ | ||
96 | #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ | ||
97 | /* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */ | ||
98 | #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ | ||
99 | #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ | ||
100 | #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ | ||
101 | #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ | ||
102 | #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ | ||
103 | /* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ | ||
104 | #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ | ||
105 | #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ | ||
106 | #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ | ||
107 | #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ | ||
108 | #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ | ||
109 | |||
110 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | ||
111 | #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ | ||
112 | #define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ | ||
113 | #define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ | ||
114 | #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ | ||
115 | #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ | ||
116 | #define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ | ||
117 | #define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ | ||
118 | #define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ | ||
119 | #define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ | ||
120 | #define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ | ||
121 | #define X86_FEATURE_CID ( 4*32+10) /* Context ID */ | ||
122 | #define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ | ||
123 | #define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ | ||
124 | #define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ | ||
125 | #define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ | ||
126 | #define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ | ||
127 | #define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ | ||
128 | #define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ | ||
129 | #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ | ||
130 | #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ | ||
131 | #define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ | ||
132 | #define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ | ||
133 | #define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ | ||
134 | #define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ | ||
135 | #define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ | ||
136 | #define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ | ||
137 | #define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ | ||
138 | #define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ | ||
139 | #define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ | ||
140 | #define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ | ||
141 | #define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ | ||
142 | |||
143 | /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ | ||
144 | #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ | ||
145 | #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ | ||
146 | #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ | ||
147 | #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ | ||
148 | #define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ | ||
149 | #define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ | ||
150 | #define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ | ||
151 | #define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ | ||
152 | #define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ | ||
153 | #define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ | ||
154 | |||
155 | /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ | ||
156 | #define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ | ||
157 | #define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ | ||
158 | #define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ | ||
159 | #define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ | ||
160 | #define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ | ||
161 | #define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ | ||
162 | #define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ | ||
163 | #define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ | ||
164 | #define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ | ||
165 | #define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ | ||
166 | #define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ | ||
167 | #define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ | ||
168 | #define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ | ||
169 | #define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ | ||
170 | #define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ | ||
171 | #define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ | ||
172 | #define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ | ||
173 | #define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ | ||
174 | #define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ | ||
175 | #define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ | ||
176 | #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ | ||
177 | #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ | ||
178 | #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ | ||
179 | #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ | ||
180 | #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ | ||
181 | |||
182 | /* | ||
183 | * Auxiliary flags: Linux defined - For features scattered in various | ||
184 | * CPUID levels like 0x6, 0xA etc, word 7. | ||
185 | * | ||
186 | * Reuse free bits when adding new feature flags! | ||
187 | */ | ||
188 | |||
189 | #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ | ||
190 | #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ | ||
191 | |||
192 | #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ | ||
193 | #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ | ||
194 | |||
195 | #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ | ||
196 | |||
197 | /* Virtualization flags: Linux defined, word 8 */ | ||
198 | #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ | ||
199 | #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ | ||
200 | #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ | ||
201 | #define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ | ||
202 | #define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ | ||
203 | |||
204 | #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ | ||
205 | #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ | ||
206 | |||
207 | |||
208 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | ||
209 | #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ | ||
210 | #define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ | ||
211 | #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ | ||
212 | #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ | ||
213 | #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ | ||
214 | #define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ | ||
215 | #define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ | ||
216 | #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | ||
217 | #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ | ||
218 | #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ | ||
219 | #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ | ||
220 | #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ | ||
221 | #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ | ||
222 | #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ | ||
223 | #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ | ||
224 | #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ | ||
225 | #define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ | ||
226 | #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ | ||
227 | #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ | ||
228 | #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ | ||
229 | #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ | ||
230 | #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ | ||
231 | #define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ | ||
232 | |||
233 | /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ | ||
234 | #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ | ||
235 | #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ | ||
236 | #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ | ||
237 | #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ | ||
238 | |||
239 | /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ | ||
240 | #define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ | ||
241 | |||
242 | /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ | ||
243 | #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ | ||
244 | |||
245 | /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ | ||
246 | #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ | ||
247 | |||
248 | /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ | ||
249 | #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ | ||
250 | #define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ | ||
251 | #define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ | ||
252 | #define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ | ||
253 | #define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ | ||
254 | #define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ | ||
255 | #define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ | ||
256 | #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ | ||
257 | #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ | ||
258 | #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ | ||
259 | |||
260 | /* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ | ||
261 | #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ | ||
262 | #define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ | ||
263 | #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ | ||
264 | #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ | ||
265 | #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ | ||
266 | #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ | ||
267 | #define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ | ||
268 | #define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ | ||
269 | #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ | ||
270 | #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ | ||
271 | |||
272 | /* | ||
273 | * BUG word(s) | ||
274 | */ | ||
275 | #define X86_BUG(x) (NCAPINTS*32 + (x)) | ||
276 | |||
277 | #define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ | ||
278 | #define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ | ||
279 | #define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ | ||
280 | #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ | ||
281 | #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ | ||
282 | #define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ | ||
283 | #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ | ||
284 | #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ | ||
285 | #define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ | ||
286 | 5 | ||
287 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 6 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
288 | 7 | ||
@@ -369,8 +88,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; | |||
369 | * is not relevant. | 88 | * is not relevant. |
370 | */ | 89 | */ |
371 | #define cpu_feature_enabled(bit) \ | 90 | #define cpu_feature_enabled(bit) \ |
372 | (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \ | 91 | (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit)) |
373 | cpu_has(&boot_cpu_data, bit)) | ||
374 | 92 | ||
375 | #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) | 93 | #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) |
376 | 94 | ||
@@ -406,106 +124,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; | |||
406 | #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) | 124 | #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) |
407 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) | 125 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) |
408 | /* | 126 | /* |
409 | * Do not add any more of those clumsy macros - use static_cpu_has_safe() for | 127 | * Do not add any more of those clumsy macros - use static_cpu_has() for |
410 | * fast paths and boot_cpu_has() otherwise! | 128 | * fast paths and boot_cpu_has() otherwise! |
411 | */ | 129 | */ |
412 | 130 | ||
413 | #if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS) | 131 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) |
414 | extern void warn_pre_alternatives(void); | ||
415 | extern bool __static_cpu_has_safe(u16 bit); | ||
416 | |||
417 | /* | 132 | /* |
418 | * Static testing of CPU features. Used the same as boot_cpu_has(). | 133 | * Static testing of CPU features. Used the same as boot_cpu_has(). |
419 | * These are only valid after alternatives have run, but will statically | 134 | * These will statically patch the target code for additional |
420 | * patch the target code for additional performance. | 135 | * performance. |
421 | */ | 136 | */ |
422 | static __always_inline __pure bool __static_cpu_has(u16 bit) | 137 | static __always_inline __pure bool _static_cpu_has(u16 bit) |
423 | { | ||
424 | #ifdef CC_HAVE_ASM_GOTO | ||
425 | |||
426 | #ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS | ||
427 | |||
428 | /* | ||
429 | * Catch too early usage of this before alternatives | ||
430 | * have run. | ||
431 | */ | ||
432 | asm_volatile_goto("1: jmp %l[t_warn]\n" | ||
433 | "2:\n" | ||
434 | ".section .altinstructions,\"a\"\n" | ||
435 | " .long 1b - .\n" | ||
436 | " .long 0\n" /* no replacement */ | ||
437 | " .word %P0\n" /* 1: do replace */ | ||
438 | " .byte 2b - 1b\n" /* source len */ | ||
439 | " .byte 0\n" /* replacement len */ | ||
440 | " .byte 0\n" /* pad len */ | ||
441 | ".previous\n" | ||
442 | /* skipping size check since replacement size = 0 */ | ||
443 | : : "i" (X86_FEATURE_ALWAYS) : : t_warn); | ||
444 | |||
445 | #endif | ||
446 | |||
447 | asm_volatile_goto("1: jmp %l[t_no]\n" | ||
448 | "2:\n" | ||
449 | ".section .altinstructions,\"a\"\n" | ||
450 | " .long 1b - .\n" | ||
451 | " .long 0\n" /* no replacement */ | ||
452 | " .word %P0\n" /* feature bit */ | ||
453 | " .byte 2b - 1b\n" /* source len */ | ||
454 | " .byte 0\n" /* replacement len */ | ||
455 | " .byte 0\n" /* pad len */ | ||
456 | ".previous\n" | ||
457 | /* skipping size check since replacement size = 0 */ | ||
458 | : : "i" (bit) : : t_no); | ||
459 | return true; | ||
460 | t_no: | ||
461 | return false; | ||
462 | |||
463 | #ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS | ||
464 | t_warn: | ||
465 | warn_pre_alternatives(); | ||
466 | return false; | ||
467 | #endif | ||
468 | |||
469 | #else /* CC_HAVE_ASM_GOTO */ | ||
470 | |||
471 | u8 flag; | ||
472 | /* Open-coded due to __stringify() in ALTERNATIVE() */ | ||
473 | asm volatile("1: movb $0,%0\n" | ||
474 | "2:\n" | ||
475 | ".section .altinstructions,\"a\"\n" | ||
476 | " .long 1b - .\n" | ||
477 | " .long 3f - .\n" | ||
478 | " .word %P1\n" /* feature bit */ | ||
479 | " .byte 2b - 1b\n" /* source len */ | ||
480 | " .byte 4f - 3f\n" /* replacement len */ | ||
481 | " .byte 0\n" /* pad len */ | ||
482 | ".previous\n" | ||
483 | ".section .discard,\"aw\",@progbits\n" | ||
484 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ | ||
485 | ".previous\n" | ||
486 | ".section .altinstr_replacement,\"ax\"\n" | ||
487 | "3: movb $1,%0\n" | ||
488 | "4:\n" | ||
489 | ".previous\n" | ||
490 | : "=qm" (flag) : "i" (bit)); | ||
491 | return flag; | ||
492 | |||
493 | #endif /* CC_HAVE_ASM_GOTO */ | ||
494 | } | ||
495 | |||
496 | #define static_cpu_has(bit) \ | ||
497 | ( \ | ||
498 | __builtin_constant_p(boot_cpu_has(bit)) ? \ | ||
499 | boot_cpu_has(bit) : \ | ||
500 | __builtin_constant_p(bit) ? \ | ||
501 | __static_cpu_has(bit) : \ | ||
502 | boot_cpu_has(bit) \ | ||
503 | ) | ||
504 | |||
505 | static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | ||
506 | { | 138 | { |
507 | #ifdef CC_HAVE_ASM_GOTO | 139 | asm_volatile_goto("1: jmp 6f\n" |
508 | asm_volatile_goto("1: jmp %l[t_dynamic]\n" | ||
509 | "2:\n" | 140 | "2:\n" |
510 | ".skip -(((5f-4f) - (2b-1b)) > 0) * " | 141 | ".skip -(((5f-4f) - (2b-1b)) > 0) * " |
511 | "((5f-4f) - (2b-1b)),0x90\n" | 142 | "((5f-4f) - (2b-1b)),0x90\n" |
@@ -530,66 +161,34 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | |||
530 | " .byte 0\n" /* repl len */ | 161 | " .byte 0\n" /* repl len */ |
531 | " .byte 0\n" /* pad len */ | 162 | " .byte 0\n" /* pad len */ |
532 | ".previous\n" | 163 | ".previous\n" |
533 | : : "i" (bit), "i" (X86_FEATURE_ALWAYS) | 164 | ".section .altinstr_aux,\"ax\"\n" |
534 | : : t_dynamic, t_no); | 165 | "6:\n" |
166 | " testb %[bitnum],%[cap_byte]\n" | ||
167 | " jnz %l[t_yes]\n" | ||
168 | " jmp %l[t_no]\n" | ||
169 | ".previous\n" | ||
170 | : : "i" (bit), "i" (X86_FEATURE_ALWAYS), | ||
171 | [bitnum] "i" (1 << (bit & 7)), | ||
172 | [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) | ||
173 | : : t_yes, t_no); | ||
174 | t_yes: | ||
535 | return true; | 175 | return true; |
536 | t_no: | 176 | t_no: |
537 | return false; | 177 | return false; |
538 | t_dynamic: | ||
539 | return __static_cpu_has_safe(bit); | ||
540 | #else | ||
541 | u8 flag; | ||
542 | /* Open-coded due to __stringify() in ALTERNATIVE() */ | ||
543 | asm volatile("1: movb $2,%0\n" | ||
544 | "2:\n" | ||
545 | ".section .altinstructions,\"a\"\n" | ||
546 | " .long 1b - .\n" /* src offset */ | ||
547 | " .long 3f - .\n" /* repl offset */ | ||
548 | " .word %P2\n" /* always replace */ | ||
549 | " .byte 2b - 1b\n" /* source len */ | ||
550 | " .byte 4f - 3f\n" /* replacement len */ | ||
551 | " .byte 0\n" /* pad len */ | ||
552 | ".previous\n" | ||
553 | ".section .discard,\"aw\",@progbits\n" | ||
554 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ | ||
555 | ".previous\n" | ||
556 | ".section .altinstr_replacement,\"ax\"\n" | ||
557 | "3: movb $0,%0\n" | ||
558 | "4:\n" | ||
559 | ".previous\n" | ||
560 | ".section .altinstructions,\"a\"\n" | ||
561 | " .long 1b - .\n" /* src offset */ | ||
562 | " .long 5f - .\n" /* repl offset */ | ||
563 | " .word %P1\n" /* feature bit */ | ||
564 | " .byte 4b - 3b\n" /* src len */ | ||
565 | " .byte 6f - 5f\n" /* repl len */ | ||
566 | " .byte 0\n" /* pad len */ | ||
567 | ".previous\n" | ||
568 | ".section .discard,\"aw\",@progbits\n" | ||
569 | " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ | ||
570 | ".previous\n" | ||
571 | ".section .altinstr_replacement,\"ax\"\n" | ||
572 | "5: movb $1,%0\n" | ||
573 | "6:\n" | ||
574 | ".previous\n" | ||
575 | : "=qm" (flag) | ||
576 | : "i" (bit), "i" (X86_FEATURE_ALWAYS)); | ||
577 | return (flag == 2 ? __static_cpu_has_safe(bit) : flag); | ||
578 | #endif /* CC_HAVE_ASM_GOTO */ | ||
579 | } | 178 | } |
580 | 179 | ||
581 | #define static_cpu_has_safe(bit) \ | 180 | #define static_cpu_has(bit) \ |
582 | ( \ | 181 | ( \ |
583 | __builtin_constant_p(boot_cpu_has(bit)) ? \ | 182 | __builtin_constant_p(boot_cpu_has(bit)) ? \ |
584 | boot_cpu_has(bit) : \ | 183 | boot_cpu_has(bit) : \ |
585 | _static_cpu_has_safe(bit) \ | 184 | _static_cpu_has(bit) \ |
586 | ) | 185 | ) |
587 | #else | 186 | #else |
588 | /* | 187 | /* |
589 | * gcc 3.x is too stupid to do the static test; fall back to dynamic. | 188 | * Fall back to dynamic for gcc versions which don't support asm goto. Should be |
189 | * a minority now anyway. | ||
590 | */ | 190 | */ |
591 | #define static_cpu_has(bit) boot_cpu_has(bit) | 191 | #define static_cpu_has(bit) boot_cpu_has(bit) |
592 | #define static_cpu_has_safe(bit) boot_cpu_has(bit) | ||
593 | #endif | 192 | #endif |
594 | 193 | ||
595 | #define cpu_has_bug(c, bit) cpu_has(c, (bit)) | 194 | #define cpu_has_bug(c, bit) cpu_has(c, (bit)) |
@@ -597,7 +196,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | |||
597 | #define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) | 196 | #define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)) |
598 | 197 | ||
599 | #define static_cpu_has_bug(bit) static_cpu_has((bit)) | 198 | #define static_cpu_has_bug(bit) static_cpu_has((bit)) |
600 | #define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit)) | ||
601 | #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) | 199 | #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) |
602 | 200 | ||
603 | #define MAX_CPU_FEATURES (NCAPINTS * 32) | 201 | #define MAX_CPU_FEATURES (NCAPINTS * 32) |
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h new file mode 100644 index 000000000000..074b7604bd51 --- /dev/null +++ b/arch/x86/include/asm/cpufeatures.h | |||
@@ -0,0 +1,300 @@ | |||
1 | #ifndef _ASM_X86_CPUFEATURES_H | ||
2 | #define _ASM_X86_CPUFEATURES_H | ||
3 | |||
4 | #ifndef _ASM_X86_REQUIRED_FEATURES_H | ||
5 | #include <asm/required-features.h> | ||
6 | #endif | ||
7 | |||
8 | #ifndef _ASM_X86_DISABLED_FEATURES_H | ||
9 | #include <asm/disabled-features.h> | ||
10 | #endif | ||
11 | |||
12 | /* | ||
13 | * Defines x86 CPU feature bits | ||
14 | */ | ||
15 | #define NCAPINTS 16 /* N 32-bit words worth of info */ | ||
16 | #define NBUGINTS 1 /* N 32-bit bug flags */ | ||
17 | |||
18 | /* | ||
19 | * Note: If the comment begins with a quoted string, that string is used | ||
20 | * in /proc/cpuinfo instead of the macro name. If the string is "", | ||
21 | * this feature bit is not displayed in /proc/cpuinfo at all. | ||
22 | */ | ||
23 | |||
24 | /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ | ||
25 | #define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ | ||
26 | #define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ | ||
27 | #define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ | ||
28 | #define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ | ||
29 | #define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ | ||
30 | #define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ | ||
31 | #define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ | ||
32 | #define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ | ||
33 | #define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ | ||
34 | #define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ | ||
35 | #define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ | ||
36 | #define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ | ||
37 | #define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ | ||
38 | #define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ | ||
39 | #define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ | ||
40 | /* (plus FCMOVcc, FCOMI with FPU) */ | ||
41 | #define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ | ||
42 | #define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ | ||
43 | #define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ | ||
44 | #define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ | ||
45 | #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ | ||
46 | #define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ | ||
47 | #define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ | ||
48 | #define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ | ||
49 | #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ | ||
50 | #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ | ||
51 | #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ | ||
52 | #define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ | ||
53 | #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ | ||
54 | #define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ | ||
55 | #define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ | ||
56 | |||
57 | /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ | ||
58 | /* Don't duplicate feature flags which are redundant with Intel! */ | ||
59 | #define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ | ||
60 | #define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ | ||
61 | #define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ | ||
62 | #define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ | ||
63 | #define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ | ||
64 | #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ | ||
65 | #define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ | ||
66 | #define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ | ||
67 | #define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ | ||
68 | #define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ | ||
69 | |||
70 | /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ | ||
71 | #define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ | ||
72 | #define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ | ||
73 | #define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ | ||
74 | |||
75 | /* Other features, Linux-defined mapping, word 3 */ | ||
76 | /* This range is used for feature bits which conflict or are synthesized */ | ||
77 | #define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ | ||
78 | #define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ | ||
79 | #define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ | ||
80 | #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ | ||
81 | /* cpu types for specific tunings: */ | ||
82 | #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ | ||
83 | #define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ | ||
84 | #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ | ||
85 | #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ | ||
86 | #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ | ||
87 | #define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ | ||
88 | #define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ | ||
89 | #define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ | ||
90 | #define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ | ||
91 | #define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ | ||
92 | #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ | ||
93 | #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ | ||
94 | #define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ | ||
95 | #define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ | ||
96 | #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ | ||
97 | /* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */ | ||
98 | #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ | ||
99 | #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ | ||
100 | #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ | ||
101 | #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ | ||
102 | #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ | ||
103 | /* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ | ||
104 | #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ | ||
105 | #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ | ||
106 | #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ | ||
107 | #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ | ||
108 | #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ | ||
109 | #define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ | ||
110 | |||
111 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | ||
112 | #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ | ||
113 | #define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ | ||
114 | #define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ | ||
115 | #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ | ||
116 | #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ | ||
117 | #define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ | ||
118 | #define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ | ||
119 | #define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ | ||
120 | #define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ | ||
121 | #define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ | ||
122 | #define X86_FEATURE_CID ( 4*32+10) /* Context ID */ | ||
123 | #define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ | ||
124 | #define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ | ||
125 | #define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ | ||
126 | #define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ | ||
127 | #define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ | ||
128 | #define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ | ||
129 | #define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ | ||
130 | #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ | ||
131 | #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ | ||
132 | #define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ | ||
133 | #define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ | ||
134 | #define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ | ||
135 | #define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ | ||
136 | #define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ | ||
137 | #define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ | ||
138 | #define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ | ||
139 | #define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ | ||
140 | #define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ | ||
141 | #define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ | ||
142 | #define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ | ||
143 | |||
144 | /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ | ||
145 | #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ | ||
146 | #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ | ||
147 | #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ | ||
148 | #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ | ||
149 | #define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ | ||
150 | #define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ | ||
151 | #define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ | ||
152 | #define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ | ||
153 | #define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ | ||
154 | #define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ | ||
155 | |||
156 | /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ | ||
157 | #define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ | ||
158 | #define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ | ||
159 | #define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ | ||
160 | #define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ | ||
161 | #define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ | ||
162 | #define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ | ||
163 | #define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ | ||
164 | #define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ | ||
165 | #define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ | ||
166 | #define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ | ||
167 | #define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ | ||
168 | #define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ | ||
169 | #define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ | ||
170 | #define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ | ||
171 | #define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ | ||
172 | #define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ | ||
173 | #define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ | ||
174 | #define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ | ||
175 | #define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ | ||
176 | #define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ | ||
177 | #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ | ||
178 | #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ | ||
179 | #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ | ||
180 | #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ | ||
181 | #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ | ||
182 | |||
183 | /* | ||
184 | * Auxiliary flags: Linux defined - For features scattered in various | ||
185 | * CPUID levels like 0x6, 0xA etc, word 7. | ||
186 | * | ||
187 | * Reuse free bits when adding new feature flags! | ||
188 | */ | ||
189 | |||
190 | #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ | ||
191 | #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ | ||
192 | |||
193 | #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ | ||
194 | #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ | ||
195 | |||
196 | #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ | ||
197 | |||
198 | /* Virtualization flags: Linux defined, word 8 */ | ||
199 | #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ | ||
200 | #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ | ||
201 | #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ | ||
202 | #define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ | ||
203 | #define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ | ||
204 | |||
205 | #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ | ||
206 | #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ | ||
207 | |||
208 | |||
209 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | ||
210 | #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ | ||
211 | #define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ | ||
212 | #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ | ||
213 | #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ | ||
214 | #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ | ||
215 | #define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ | ||
216 | #define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ | ||
217 | #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | ||
218 | #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ | ||
219 | #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ | ||
220 | #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ | ||
221 | #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ | ||
222 | #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ | ||
223 | #define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ | ||
224 | #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ | ||
225 | #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ | ||
226 | #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ | ||
227 | #define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ | ||
228 | #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ | ||
229 | #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ | ||
230 | #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ | ||
231 | #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ | ||
232 | #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ | ||
233 | #define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ | ||
234 | #define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ | ||
235 | #define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ | ||
236 | |||
237 | /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ | ||
238 | #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ | ||
239 | #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ | ||
240 | #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ | ||
241 | #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ | ||
242 | |||
243 | /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ | ||
244 | #define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ | ||
245 | |||
246 | /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ | ||
247 | #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ | ||
248 | |||
249 | /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ | ||
250 | #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ | ||
251 | |||
252 | /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ | ||
253 | #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ | ||
254 | #define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ | ||
255 | #define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ | ||
256 | #define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ | ||
257 | #define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ | ||
258 | #define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ | ||
259 | #define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ | ||
260 | #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ | ||
261 | #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ | ||
262 | #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ | ||
263 | |||
264 | /* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ | ||
265 | #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ | ||
266 | #define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ | ||
267 | #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ | ||
268 | #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ | ||
269 | #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ | ||
270 | #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ | ||
271 | #define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ | ||
272 | #define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ | ||
273 | #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ | ||
274 | #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ | ||
275 | #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ | ||
276 | |||
277 | /* | ||
278 | * BUG word(s) | ||
279 | */ | ||
280 | #define X86_BUG(x) (NCAPINTS*32 + (x)) | ||
281 | |||
282 | #define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ | ||
283 | #define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ | ||
284 | #define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ | ||
285 | #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ | ||
286 | #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ | ||
287 | #define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ | ||
288 | #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ | ||
289 | #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ | ||
290 | #define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ | ||
291 | |||
292 | #ifdef CONFIG_X86_32 | ||
293 | /* | ||
294 | * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional | ||
295 | * to avoid confusion. | ||
296 | */ | ||
297 | #define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ | ||
298 | #endif | ||
299 | |||
300 | #endif /* _ASM_X86_CPUFEATURES_H */ | ||
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 278441f39856..eb5deb42484d 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h | |||
@@ -98,4 +98,27 @@ struct desc_ptr { | |||
98 | 98 | ||
99 | #endif /* !__ASSEMBLY__ */ | 99 | #endif /* !__ASSEMBLY__ */ |
100 | 100 | ||
101 | /* Access rights as returned by LAR */ | ||
102 | #define AR_TYPE_RODATA (0 * (1 << 9)) | ||
103 | #define AR_TYPE_RWDATA (1 * (1 << 9)) | ||
104 | #define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9)) | ||
105 | #define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9)) | ||
106 | #define AR_TYPE_XOCODE (4 * (1 << 9)) | ||
107 | #define AR_TYPE_XRCODE (5 * (1 << 9)) | ||
108 | #define AR_TYPE_XOCODE_CONF (6 * (1 << 9)) | ||
109 | #define AR_TYPE_XRCODE_CONF (7 * (1 << 9)) | ||
110 | #define AR_TYPE_MASK (7 * (1 << 9)) | ||
111 | |||
112 | #define AR_DPL0 (0 * (1 << 13)) | ||
113 | #define AR_DPL3 (3 * (1 << 13)) | ||
114 | #define AR_DPL_MASK (3 * (1 << 13)) | ||
115 | |||
116 | #define AR_A (1 << 8) /* "Accessed" */ | ||
117 | #define AR_S (1 << 12) /* If clear, "System" segment */ | ||
118 | #define AR_P (1 << 15) /* "Present" */ | ||
119 | #define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */ | ||
120 | #define AR_L (1 << 21) /* "Long mode" for code segments */ | ||
121 | #define AR_DB (1 << 22) /* D/B, effect depends on type */ | ||
122 | #define AR_G (1 << 23) /* "Granularity" (limit in pages) */ | ||
123 | |||
101 | #endif /* _ASM_X86_DESC_DEFS_H */ | 124 | #endif /* _ASM_X86_DESC_DEFS_H */ |
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index 535192f6bfad..3c69fed215c5 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h | |||
@@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len) | |||
15 | /* Use early IO mappings for DMI because it's initialized early */ | 15 | /* Use early IO mappings for DMI because it's initialized early */ |
16 | #define dmi_early_remap early_ioremap | 16 | #define dmi_early_remap early_ioremap |
17 | #define dmi_early_unmap early_iounmap | 17 | #define dmi_early_unmap early_iounmap |
18 | #define dmi_remap ioremap | 18 | #define dmi_remap ioremap_cache |
19 | #define dmi_unmap iounmap | 19 | #define dmi_unmap iounmap |
20 | 20 | ||
21 | #endif /* _ASM_X86_DMI_H */ | 21 | #endif /* _ASM_X86_DMI_H */ |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6d7d0e52ed5a..8554f960e21b 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve); | |||
138 | extern int fixmaps_set; | 138 | extern int fixmaps_set; |
139 | 139 | ||
140 | extern pte_t *kmap_pte; | 140 | extern pte_t *kmap_pte; |
141 | extern pgprot_t kmap_prot; | 141 | #define kmap_prot PAGE_KERNEL |
142 | extern pte_t *pkmap_page_table; | 142 | extern pte_t *pkmap_page_table; |
143 | 143 | ||
144 | void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); | 144 | void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); |
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 0fd440df63f1..a2124343edf5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/user.h> | 17 | #include <asm/user.h> |
18 | #include <asm/fpu/api.h> | 18 | #include <asm/fpu/api.h> |
19 | #include <asm/fpu/xstate.h> | 19 | #include <asm/fpu/xstate.h> |
20 | #include <asm/cpufeature.h> | ||
20 | 21 | ||
21 | /* | 22 | /* |
22 | * High level FPU state handling functions: | 23 | * High level FPU state handling functions: |
@@ -58,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void); | |||
58 | */ | 59 | */ |
59 | static __always_inline __pure bool use_eager_fpu(void) | 60 | static __always_inline __pure bool use_eager_fpu(void) |
60 | { | 61 | { |
61 | return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); | 62 | return static_cpu_has(X86_FEATURE_EAGER_FPU); |
62 | } | 63 | } |
63 | 64 | ||
64 | static __always_inline __pure bool use_xsaveopt(void) | 65 | static __always_inline __pure bool use_xsaveopt(void) |
65 | { | 66 | { |
66 | return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); | 67 | return static_cpu_has(X86_FEATURE_XSAVEOPT); |
67 | } | 68 | } |
68 | 69 | ||
69 | static __always_inline __pure bool use_xsave(void) | 70 | static __always_inline __pure bool use_xsave(void) |
70 | { | 71 | { |
71 | return static_cpu_has_safe(X86_FEATURE_XSAVE); | 72 | return static_cpu_has(X86_FEATURE_XSAVE); |
72 | } | 73 | } |
73 | 74 | ||
74 | static __always_inline __pure bool use_fxsr(void) | 75 | static __always_inline __pure bool use_fxsr(void) |
75 | { | 76 | { |
76 | return static_cpu_has_safe(X86_FEATURE_FXSR); | 77 | return static_cpu_has(X86_FEATURE_FXSR); |
77 | } | 78 | } |
78 | 79 | ||
79 | /* | 80 | /* |
@@ -300,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) | |||
300 | 301 | ||
301 | WARN_ON(system_state != SYSTEM_BOOTING); | 302 | WARN_ON(system_state != SYSTEM_BOOTING); |
302 | 303 | ||
303 | if (static_cpu_has_safe(X86_FEATURE_XSAVES)) | 304 | if (static_cpu_has(X86_FEATURE_XSAVES)) |
304 | XSTATE_OP(XSAVES, xstate, lmask, hmask, err); | 305 | XSTATE_OP(XSAVES, xstate, lmask, hmask, err); |
305 | else | 306 | else |
306 | XSTATE_OP(XSAVE, xstate, lmask, hmask, err); | 307 | XSTATE_OP(XSAVE, xstate, lmask, hmask, err); |
@@ -322,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) | |||
322 | 323 | ||
323 | WARN_ON(system_state != SYSTEM_BOOTING); | 324 | WARN_ON(system_state != SYSTEM_BOOTING); |
324 | 325 | ||
325 | if (static_cpu_has_safe(X86_FEATURE_XSAVES)) | 326 | if (static_cpu_has(X86_FEATURE_XSAVES)) |
326 | XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); | 327 | XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); |
327 | else | 328 | else |
328 | XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); | 329 | XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); |
@@ -460,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) | |||
460 | * pending. Clear the x87 state here by setting it to fixed values. | 461 | * pending. Clear the x87 state here by setting it to fixed values. |
461 | * "m" is a random variable that should be in L1. | 462 | * "m" is a random variable that should be in L1. |
462 | */ | 463 | */ |
463 | if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { | 464 | if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { |
464 | asm volatile( | 465 | asm volatile( |
465 | "fnclex\n\t" | 466 | "fnclex\n\t" |
466 | "emms\n\t" | 467 | "emms\n\t" |
@@ -589,7 +590,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) | |||
589 | * If the task has used the math, pre-load the FPU on xsave processors | 590 | * If the task has used the math, pre-load the FPU on xsave processors |
590 | * or if the past 5 consecutive context-switches used math. | 591 | * or if the past 5 consecutive context-switches used math. |
591 | */ | 592 | */ |
592 | fpu.preload = new_fpu->fpstate_active && | 593 | fpu.preload = static_cpu_has(X86_FEATURE_FPU) && |
594 | new_fpu->fpstate_active && | ||
593 | (use_eager_fpu() || new_fpu->counter > 5); | 595 | (use_eager_fpu() || new_fpu->counter > 5); |
594 | 596 | ||
595 | if (old_fpu->fpregs_active) { | 597 | if (old_fpu->fpregs_active) { |
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 793179cf8e21..6e4d170726b7 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h | |||
@@ -1,23 +1,44 @@ | |||
1 | #ifdef __ASSEMBLY__ | 1 | #ifndef _ASM_X86_FRAME_H |
2 | #define _ASM_X86_FRAME_H | ||
2 | 3 | ||
3 | #include <asm/asm.h> | 4 | #include <asm/asm.h> |
4 | 5 | ||
5 | /* The annotation hides the frame from the unwinder and makes it look | 6 | /* |
6 | like a ordinary ebp save/restore. This avoids some special cases for | 7 | * These are stack frame creation macros. They should be used by every |
7 | frame pointer later */ | 8 | * callable non-leaf asm function to make kernel stack traces more reliable. |
9 | */ | ||
10 | |||
8 | #ifdef CONFIG_FRAME_POINTER | 11 | #ifdef CONFIG_FRAME_POINTER |
9 | .macro FRAME | 12 | |
10 | __ASM_SIZE(push,) %__ASM_REG(bp) | 13 | #ifdef __ASSEMBLY__ |
11 | __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp) | 14 | |
12 | .endm | 15 | .macro FRAME_BEGIN |
13 | .macro ENDFRAME | 16 | push %_ASM_BP |
14 | __ASM_SIZE(pop,) %__ASM_REG(bp) | 17 | _ASM_MOV %_ASM_SP, %_ASM_BP |
15 | .endm | 18 | .endm |
16 | #else | 19 | |
17 | .macro FRAME | 20 | .macro FRAME_END |
18 | .endm | 21 | pop %_ASM_BP |
19 | .macro ENDFRAME | 22 | .endm |
20 | .endm | 23 | |
21 | #endif | 24 | #else /* !__ASSEMBLY__ */ |
22 | 25 | ||
23 | #endif /* __ASSEMBLY__ */ | 26 | #define FRAME_BEGIN \ |
27 | "push %" _ASM_BP "\n" \ | ||
28 | _ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n" | ||
29 | |||
30 | #define FRAME_END "pop %" _ASM_BP "\n" | ||
31 | |||
32 | #endif /* __ASSEMBLY__ */ | ||
33 | |||
34 | #define FRAME_OFFSET __ASM_SEL(4, 8) | ||
35 | |||
36 | #else /* !CONFIG_FRAME_POINTER */ | ||
37 | |||
38 | #define FRAME_BEGIN | ||
39 | #define FRAME_END | ||
40 | #define FRAME_OFFSET 0 | ||
41 | |||
42 | #endif /* CONFIG_FRAME_POINTER */ | ||
43 | |||
44 | #endif /* _ASM_X86_FRAME_H */ | ||
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h index cd2ce4068441..ebea2c9d2cdc 100644 --- a/arch/x86/include/asm/imr.h +++ b/arch/x86/include/asm/imr.h | |||
@@ -53,7 +53,7 @@ | |||
53 | #define IMR_MASK (IMR_ALIGN - 1) | 53 | #define IMR_MASK (IMR_ALIGN - 1) |
54 | 54 | ||
55 | int imr_add_range(phys_addr_t base, size_t size, | 55 | int imr_add_range(phys_addr_t base, size_t size, |
56 | unsigned int rmask, unsigned int wmask, bool lock); | 56 | unsigned int rmask, unsigned int wmask); |
57 | 57 | ||
58 | int imr_remove_range(phys_addr_t base, size_t size); | 58 | int imr_remove_range(phys_addr_t base, size_t size); |
59 | 59 | ||
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index cfc9a0d2d07c..a4fe16e42b7b 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h | |||
@@ -57,67 +57,13 @@ static inline void __xapic_wait_icr_idle(void) | |||
57 | cpu_relax(); | 57 | cpu_relax(); |
58 | } | 58 | } |
59 | 59 | ||
60 | static inline void | 60 | void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); |
61 | __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) | ||
62 | { | ||
63 | /* | ||
64 | * Subtle. In the case of the 'never do double writes' workaround | ||
65 | * we have to lock out interrupts to be safe. As we don't care | ||
66 | * of the value read we use an atomic rmw access to avoid costly | ||
67 | * cli/sti. Otherwise we use an even cheaper single atomic write | ||
68 | * to the APIC. | ||
69 | */ | ||
70 | unsigned int cfg; | ||
71 | |||
72 | /* | ||
73 | * Wait for idle. | ||
74 | */ | ||
75 | __xapic_wait_icr_idle(); | ||
76 | |||
77 | /* | ||
78 | * No need to touch the target chip field | ||
79 | */ | ||
80 | cfg = __prepare_ICR(shortcut, vector, dest); | ||
81 | |||
82 | /* | ||
83 | * Send the IPI. The write to APIC_ICR fires this off. | ||
84 | */ | ||
85 | native_apic_mem_write(APIC_ICR, cfg); | ||
86 | } | ||
87 | 61 | ||
88 | /* | 62 | /* |
89 | * This is used to send an IPI with no shorthand notation (the destination is | 63 | * This is used to send an IPI with no shorthand notation (the destination is |
90 | * specified in bits 56 to 63 of the ICR). | 64 | * specified in bits 56 to 63 of the ICR). |
91 | */ | 65 | */ |
92 | static inline void | 66 | void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest); |
93 | __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) | ||
94 | { | ||
95 | unsigned long cfg; | ||
96 | |||
97 | /* | ||
98 | * Wait for idle. | ||
99 | */ | ||
100 | if (unlikely(vector == NMI_VECTOR)) | ||
101 | safe_apic_wait_icr_idle(); | ||
102 | else | ||
103 | __xapic_wait_icr_idle(); | ||
104 | |||
105 | /* | ||
106 | * prepare target chip field | ||
107 | */ | ||
108 | cfg = __prepare_ICR2(mask); | ||
109 | native_apic_mem_write(APIC_ICR2, cfg); | ||
110 | |||
111 | /* | ||
112 | * program the ICR | ||
113 | */ | ||
114 | cfg = __prepare_ICR(0, vector, dest); | ||
115 | |||
116 | /* | ||
117 | * Send the IPI. The write to APIC_ICR fires this off. | ||
118 | */ | ||
119 | native_apic_mem_write(APIC_ICR, cfg); | ||
120 | } | ||
121 | 67 | ||
122 | extern void default_send_IPI_single(int cpu, int vector); | 68 | extern void default_send_IPI_single(int cpu, int vector); |
123 | extern void default_send_IPI_single_phys(int cpu, int vector); | 69 | extern void default_send_IPI_single_phys(int cpu, int vector); |
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 78162f8e248b..d0afb05c84fc 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _ASM_IRQ_WORK_H | 1 | #ifndef _ASM_IRQ_WORK_H |
2 | #define _ASM_IRQ_WORK_H | 2 | #define _ASM_IRQ_WORK_H |
3 | 3 | ||
4 | #include <asm/processor.h> | 4 | #include <asm/cpufeature.h> |
5 | 5 | ||
6 | static inline bool arch_irq_work_has_interrupt(void) | 6 | static inline bool arch_irq_work_has_interrupt(void) |
7 | { | 7 | { |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index cfff34172be0..92b6f651fa4f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -135,6 +135,7 @@ struct mca_config { | |||
135 | bool ignore_ce; | 135 | bool ignore_ce; |
136 | bool disabled; | 136 | bool disabled; |
137 | bool ser; | 137 | bool ser; |
138 | bool recovery; | ||
138 | bool bios_cmci_threshold; | 139 | bool bios_cmci_threshold; |
139 | u8 banks; | 140 | u8 banks; |
140 | s8 bootlog; | 141 | s8 bootlog; |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 1e1b07a5a738..9d3a96c4da78 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #include <asm/cpu.h> | 4 | #include <asm/cpu.h> |
5 | #include <linux/earlycpio.h> | 5 | #include <linux/earlycpio.h> |
6 | #include <linux/initrd.h> | ||
6 | 7 | ||
7 | #define native_rdmsr(msr, val1, val2) \ | 8 | #define native_rdmsr(msr, val1, val2) \ |
8 | do { \ | 9 | do { \ |
@@ -143,4 +144,29 @@ static inline void reload_early_microcode(void) { } | |||
143 | static inline bool | 144 | static inline bool |
144 | get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } | 145 | get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } |
145 | #endif | 146 | #endif |
147 | |||
148 | static inline unsigned long get_initrd_start(void) | ||
149 | { | ||
150 | #ifdef CONFIG_BLK_DEV_INITRD | ||
151 | return initrd_start; | ||
152 | #else | ||
153 | return 0; | ||
154 | #endif | ||
155 | } | ||
156 | |||
157 | static inline unsigned long get_initrd_start_addr(void) | ||
158 | { | ||
159 | #ifdef CONFIG_BLK_DEV_INITRD | ||
160 | #ifdef CONFIG_X86_32 | ||
161 | unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); | ||
162 | |||
163 | return (unsigned long)__pa_nodebug(*initrd_start_p); | ||
164 | #else | ||
165 | return get_initrd_start(); | ||
166 | #endif | ||
167 | #else /* CONFIG_BLK_DEV_INITRD */ | ||
168 | return 0; | ||
169 | #endif | ||
170 | } | ||
171 | |||
146 | #endif /* _ASM_X86_MICROCODE_H */ | 172 | #endif /* _ASM_X86_MICROCODE_H */ |
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 8559b0102ea1..603417f8dd6c 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h | |||
@@ -40,7 +40,6 @@ struct extended_sigtable { | |||
40 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | 40 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) |
41 | #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) | 41 | #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) |
42 | #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) | 42 | #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) |
43 | #define DWSIZE (sizeof(u32)) | ||
44 | 43 | ||
45 | #define get_totalsize(mc) \ | 44 | #define get_totalsize(mc) \ |
46 | (((struct microcode_intel *)mc)->hdr.datasize ? \ | 45 | (((struct microcode_intel *)mc)->hdr.datasize ? \ |
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 55234d5e7160..1ea0baef1175 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h | |||
@@ -19,7 +19,8 @@ typedef struct { | |||
19 | #endif | 19 | #endif |
20 | 20 | ||
21 | struct mutex lock; | 21 | struct mutex lock; |
22 | void __user *vdso; | 22 | void __user *vdso; /* vdso base address */ |
23 | const struct vdso_image *vdso_image; /* vdso image in use */ | ||
23 | 24 | ||
24 | atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */ | 25 | atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */ |
25 | } mm_context_t; | 26 | } mm_context_t; |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b05402ef3b84..984ab75bf621 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -1,7 +1,12 @@ | |||
1 | #ifndef _ASM_X86_MSR_INDEX_H | 1 | #ifndef _ASM_X86_MSR_INDEX_H |
2 | #define _ASM_X86_MSR_INDEX_H | 2 | #define _ASM_X86_MSR_INDEX_H |
3 | 3 | ||
4 | /* CPU model specific register (MSR) numbers */ | 4 | /* |
5 | * CPU model specific register (MSR) numbers. | ||
6 | * | ||
7 | * Do not add new entries to this file unless the definitions are shared | ||
8 | * between multiple compilation units. | ||
9 | */ | ||
5 | 10 | ||
6 | /* x86-64 specific MSRs */ | 11 | /* x86-64 specific MSRs */ |
7 | #define MSR_EFER 0xc0000080 /* extended feature register */ | 12 | #define MSR_EFER 0xc0000080 /* extended feature register */ |
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index c70689b5e5aa..0deeb2d26df7 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | 5 | ||
6 | #include <asm/cpufeature.h> | ||
7 | |||
6 | #define MWAIT_SUBSTATE_MASK 0xf | 8 | #define MWAIT_SUBSTATE_MASK 0xf |
7 | #define MWAIT_CSTATE_MASK 0xf | 9 | #define MWAIT_CSTATE_MASK 0xf |
8 | #define MWAIT_SUBSTATE_SIZE 4 | 10 | #define MWAIT_SUBSTATE_SIZE 4 |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 813384ef811a..983738ac014c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -13,7 +13,7 @@ struct vm86; | |||
13 | #include <asm/types.h> | 13 | #include <asm/types.h> |
14 | #include <uapi/asm/sigcontext.h> | 14 | #include <uapi/asm/sigcontext.h> |
15 | #include <asm/current.h> | 15 | #include <asm/current.h> |
16 | #include <asm/cpufeature.h> | 16 | #include <asm/cpufeatures.h> |
17 | #include <asm/page.h> | 17 | #include <asm/page.h> |
18 | #include <asm/pgtable_types.h> | 18 | #include <asm/pgtable_types.h> |
19 | #include <asm/percpu.h> | 19 | #include <asm/percpu.h> |
@@ -24,7 +24,6 @@ struct vm86; | |||
24 | #include <asm/fpu/types.h> | 24 | #include <asm/fpu/types.h> |
25 | 25 | ||
26 | #include <linux/personality.h> | 26 | #include <linux/personality.h> |
27 | #include <linux/cpumask.h> | ||
28 | #include <linux/cache.h> | 27 | #include <linux/cache.h> |
29 | #include <linux/threads.h> | 28 | #include <linux/threads.h> |
30 | #include <linux/math64.h> | 29 | #include <linux/math64.h> |
@@ -300,10 +299,13 @@ struct tss_struct { | |||
300 | */ | 299 | */ |
301 | unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; | 300 | unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; |
302 | 301 | ||
302 | #ifdef CONFIG_X86_32 | ||
303 | /* | 303 | /* |
304 | * Space for the temporary SYSENTER stack: | 304 | * Space for the temporary SYSENTER stack. |
305 | */ | 305 | */ |
306 | unsigned long SYSENTER_stack_canary; | ||
306 | unsigned long SYSENTER_stack[64]; | 307 | unsigned long SYSENTER_stack[64]; |
308 | #endif | ||
307 | 309 | ||
308 | } ____cacheline_aligned; | 310 | } ____cacheline_aligned; |
309 | 311 | ||
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index a4a77286cb1d..9b9b30b19441 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
@@ -7,12 +7,23 @@ | |||
7 | 7 | ||
8 | void syscall_init(void); | 8 | void syscall_init(void); |
9 | 9 | ||
10 | #ifdef CONFIG_X86_64 | ||
10 | void entry_SYSCALL_64(void); | 11 | void entry_SYSCALL_64(void); |
11 | void entry_SYSCALL_compat(void); | 12 | #endif |
13 | |||
14 | #ifdef CONFIG_X86_32 | ||
12 | void entry_INT80_32(void); | 15 | void entry_INT80_32(void); |
13 | void entry_INT80_compat(void); | ||
14 | void entry_SYSENTER_32(void); | 16 | void entry_SYSENTER_32(void); |
17 | void __begin_SYSENTER_singlestep_region(void); | ||
18 | void __end_SYSENTER_singlestep_region(void); | ||
19 | #endif | ||
20 | |||
21 | #ifdef CONFIG_IA32_EMULATION | ||
15 | void entry_SYSENTER_compat(void); | 22 | void entry_SYSENTER_compat(void); |
23 | void __end_entry_SYSENTER_compat(void); | ||
24 | void entry_SYSCALL_compat(void); | ||
25 | void entry_INT80_compat(void); | ||
26 | #endif | ||
16 | 27 | ||
17 | void x86_configure_nx(void); | 28 | void x86_configure_nx(void); |
18 | void x86_report_nx(void); | 29 | void x86_report_nx(void); |
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index 89db46752a8f..452c88b8ad06 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h | |||
@@ -13,7 +13,6 @@ | |||
13 | X86_EFLAGS_CF | X86_EFLAGS_RF) | 13 | X86_EFLAGS_CF | X86_EFLAGS_RF) |
14 | 14 | ||
15 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); | 15 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); |
16 | int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc); | ||
17 | int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | 16 | int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, |
18 | struct pt_regs *regs, unsigned long mask); | 17 | struct pt_regs *regs, unsigned long mask); |
19 | 18 | ||
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index ba665ebd17bb..db333300bd4b 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h | |||
@@ -15,7 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/stringify.h> | 16 | #include <linux/stringify.h> |
17 | #include <asm/nops.h> | 17 | #include <asm/nops.h> |
18 | #include <asm/cpufeature.h> | 18 | #include <asm/cpufeatures.h> |
19 | 19 | ||
20 | /* "Raw" instruction opcodes */ | 20 | /* "Raw" instruction opcodes */ |
21 | #define __ASM_CLAC .byte 0x0f,0x01,0xca | 21 | #define __ASM_CLAC .byte 0x0f,0x01,0xca |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index dfcf0727623b..20a3de5cb3b0 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -16,7 +16,6 @@ | |||
16 | #endif | 16 | #endif |
17 | #include <asm/thread_info.h> | 17 | #include <asm/thread_info.h> |
18 | #include <asm/cpumask.h> | 18 | #include <asm/cpumask.h> |
19 | #include <asm/cpufeature.h> | ||
20 | 19 | ||
21 | extern int smp_num_siblings; | 20 | extern int smp_num_siblings; |
22 | extern unsigned int num_processors; | 21 | extern unsigned int num_processors; |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index c7b551028740..82866697fcf1 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -49,7 +49,7 @@ | |||
49 | */ | 49 | */ |
50 | #ifndef __ASSEMBLY__ | 50 | #ifndef __ASSEMBLY__ |
51 | struct task_struct; | 51 | struct task_struct; |
52 | #include <asm/processor.h> | 52 | #include <asm/cpufeature.h> |
53 | #include <linux/atomic.h> | 53 | #include <linux/atomic.h> |
54 | 54 | ||
55 | struct thread_info { | 55 | struct thread_info { |
@@ -134,10 +134,13 @@ struct thread_info { | |||
134 | #define _TIF_ADDR32 (1 << TIF_ADDR32) | 134 | #define _TIF_ADDR32 (1 << TIF_ADDR32) |
135 | #define _TIF_X32 (1 << TIF_X32) | 135 | #define _TIF_X32 (1 << TIF_X32) |
136 | 136 | ||
137 | /* work to do in syscall_trace_enter() */ | 137 | /* |
138 | * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for | ||
139 | * enter_from_user_mode() | ||
140 | */ | ||
138 | #define _TIF_WORK_SYSCALL_ENTRY \ | 141 | #define _TIF_WORK_SYSCALL_ENTRY \ |
139 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ | 142 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ |
140 | _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ | 143 | _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ |
141 | _TIF_NOHZ) | 144 | _TIF_NOHZ) |
142 | 145 | ||
143 | /* work to do on any return to user space */ | 146 | /* work to do on any return to user space */ |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6df2029405a3..c24b4224d439 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -5,8 +5,57 @@ | |||
5 | #include <linux/sched.h> | 5 | #include <linux/sched.h> |
6 | 6 | ||
7 | #include <asm/processor.h> | 7 | #include <asm/processor.h> |
8 | #include <asm/cpufeature.h> | ||
8 | #include <asm/special_insns.h> | 9 | #include <asm/special_insns.h> |
9 | 10 | ||
11 | static inline void __invpcid(unsigned long pcid, unsigned long addr, | ||
12 | unsigned long type) | ||
13 | { | ||
14 | struct { u64 d[2]; } desc = { { pcid, addr } }; | ||
15 | |||
16 | /* | ||
17 | * The memory clobber is because the whole point is to invalidate | ||
18 | * stale TLB entries and, especially if we're flushing global | ||
19 | * mappings, we don't want the compiler to reorder any subsequent | ||
20 | * memory accesses before the TLB flush. | ||
21 | * | ||
22 | * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and | ||
23 | * invpcid (%rcx), %rax in long mode. | ||
24 | */ | ||
25 | asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" | ||
26 | : : "m" (desc), "a" (type), "c" (&desc) : "memory"); | ||
27 | } | ||
28 | |||
29 | #define INVPCID_TYPE_INDIV_ADDR 0 | ||
30 | #define INVPCID_TYPE_SINGLE_CTXT 1 | ||
31 | #define INVPCID_TYPE_ALL_INCL_GLOBAL 2 | ||
32 | #define INVPCID_TYPE_ALL_NON_GLOBAL 3 | ||
33 | |||
34 | /* Flush all mappings for a given pcid and addr, not including globals. */ | ||
35 | static inline void invpcid_flush_one(unsigned long pcid, | ||
36 | unsigned long addr) | ||
37 | { | ||
38 | __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); | ||
39 | } | ||
40 | |||
41 | /* Flush all mappings for a given PCID, not including globals. */ | ||
42 | static inline void invpcid_flush_single_context(unsigned long pcid) | ||
43 | { | ||
44 | __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); | ||
45 | } | ||
46 | |||
47 | /* Flush all mappings, including globals, for all PCIDs. */ | ||
48 | static inline void invpcid_flush_all(void) | ||
49 | { | ||
50 | __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); | ||
51 | } | ||
52 | |||
53 | /* Flush all mappings for all PCIDs except globals. */ | ||
54 | static inline void invpcid_flush_all_nonglobals(void) | ||
55 | { | ||
56 | __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); | ||
57 | } | ||
58 | |||
10 | #ifdef CONFIG_PARAVIRT | 59 | #ifdef CONFIG_PARAVIRT |
11 | #include <asm/paravirt.h> | 60 | #include <asm/paravirt.h> |
12 | #else | 61 | #else |
@@ -104,6 +153,15 @@ static inline void __native_flush_tlb_global(void) | |||
104 | { | 153 | { |
105 | unsigned long flags; | 154 | unsigned long flags; |
106 | 155 | ||
156 | if (static_cpu_has(X86_FEATURE_INVPCID)) { | ||
157 | /* | ||
158 | * Using INVPCID is considerably faster than a pair of writes | ||
159 | * to CR4 sandwiched inside an IRQ flag save/restore. | ||
160 | */ | ||
161 | invpcid_flush_all(); | ||
162 | return; | ||
163 | } | ||
164 | |||
107 | /* | 165 | /* |
108 | * Read-modify-write to CR4 - protect it from preemption and | 166 | * Read-modify-write to CR4 - protect it from preemption and |
109 | * from interrupts. (Use the raw variant because this code can | 167 | * from interrupts. (Use the raw variant because this code can |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 6d7c5479bcea..174c4212780a 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -29,6 +29,8 @@ static inline cycles_t get_cycles(void) | |||
29 | return rdtsc(); | 29 | return rdtsc(); |
30 | } | 30 | } |
31 | 31 | ||
32 | extern struct system_counterval_t convert_art_to_tsc(cycle_t art); | ||
33 | |||
32 | extern void tsc_init(void); | 34 | extern void tsc_init(void); |
33 | extern void mark_tsc_unstable(char *reason); | 35 | extern void mark_tsc_unstable(char *reason); |
34 | extern int unsynchronized_tsc(void); | 36 | extern int unsynchronized_tsc(void); |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index b89c34c4019b..307698688fa1 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -8,7 +8,7 @@ | |||
8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
9 | #include <linux/lockdep.h> | 9 | #include <linux/lockdep.h> |
10 | #include <asm/alternative.h> | 10 | #include <asm/alternative.h> |
11 | #include <asm/cpufeature.h> | 11 | #include <asm/cpufeatures.h> |
12 | #include <asm/page.h> | 12 | #include <asm/page.h> |
13 | 13 | ||
14 | /* | 14 | /* |
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index deabaf9759b6..43dc55be524e 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h | |||
@@ -13,9 +13,6 @@ struct vdso_image { | |||
13 | void *data; | 13 | void *data; |
14 | unsigned long size; /* Always a multiple of PAGE_SIZE */ | 14 | unsigned long size; /* Always a multiple of PAGE_SIZE */ |
15 | 15 | ||
16 | /* text_mapping.pages is big enough for data/size page pointers */ | ||
17 | struct vm_special_mapping text_mapping; | ||
18 | |||
19 | unsigned long alt, alt_len; | 16 | unsigned long alt, alt_len; |
20 | 17 | ||
21 | long sym_vvar_start; /* Negative offset to the vvar area */ | 18 | long sym_vvar_start; /* Negative offset to the vvar area */ |
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index f556c4843aa1..e728699db774 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h | |||
@@ -37,6 +37,12 @@ struct vsyscall_gtod_data { | |||
37 | }; | 37 | }; |
38 | extern struct vsyscall_gtod_data vsyscall_gtod_data; | 38 | extern struct vsyscall_gtod_data vsyscall_gtod_data; |
39 | 39 | ||
40 | extern int vclocks_used; | ||
41 | static inline bool vclock_was_used(int vclock) | ||
42 | { | ||
43 | return READ_ONCE(vclocks_used) & (1 << vclock); | ||
44 | } | ||
45 | |||
40 | static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) | 46 | static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) |
41 | { | 47 | { |
42 | unsigned ret; | 48 | unsigned ret; |
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index d485232f1e9f..62d4111c1c54 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h | |||
@@ -256,7 +256,7 @@ struct sigcontext_64 { | |||
256 | __u16 cs; | 256 | __u16 cs; |
257 | __u16 gs; | 257 | __u16 gs; |
258 | __u16 fs; | 258 | __u16 fs; |
259 | __u16 __pad0; | 259 | __u16 ss; |
260 | __u64 err; | 260 | __u64 err; |
261 | __u64 trapno; | 261 | __u64 trapno; |
262 | __u64 oldmask; | 262 | __u64 oldmask; |
@@ -341,9 +341,37 @@ struct sigcontext { | |||
341 | __u64 rip; | 341 | __u64 rip; |
342 | __u64 eflags; /* RFLAGS */ | 342 | __u64 eflags; /* RFLAGS */ |
343 | __u16 cs; | 343 | __u16 cs; |
344 | |||
345 | /* | ||
346 | * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), | ||
347 | * Linux saved and restored fs and gs in these slots. This | ||
348 | * was counterproductive, as fsbase and gsbase were never | ||
349 | * saved, so arch_prctl was presumably unreliable. | ||
350 | * | ||
351 | * These slots should never be reused without extreme caution: | ||
352 | * | ||
353 | * - Some DOSEMU versions stash fs and gs in these slots manually, | ||
354 | * thus overwriting anything the kernel expects to be preserved | ||
355 | * in these slots. | ||
356 | * | ||
357 | * - If these slots are ever needed for any other purpose, | ||
358 | * there is some risk that very old 64-bit binaries could get | ||
359 | * confused. I doubt that many such binaries still work, | ||
360 | * though, since the same patch in 2.5.64 also removed the | ||
361 | * 64-bit set_thread_area syscall, so it appears that there | ||
362 | * is no TLS API beyond modify_ldt that works in both pre- | ||
363 | * and post-2.5.64 kernels. | ||
364 | * | ||
365 | * If the kernel ever adds explicit fs, gs, fsbase, and gsbase | ||
366 | * save/restore, it will most likely need to be opt-in and use | ||
367 | * different context slots. | ||
368 | */ | ||
344 | __u16 gs; | 369 | __u16 gs; |
345 | __u16 fs; | 370 | __u16 fs; |
346 | __u16 __pad0; | 371 | union { |
372 | __u16 ss; /* If UC_SIGCONTEXT_SS */ | ||
373 | __u16 __pad0; /* Alias name for old (!UC_SIGCONTEXT_SS) user-space */ | ||
374 | }; | ||
347 | __u64 err; | 375 | __u64 err; |
348 | __u64 trapno; | 376 | __u64 trapno; |
349 | __u64 oldmask; | 377 | __u64 oldmask; |
diff --git a/arch/x86/include/uapi/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h index b7c29c8017f2..e3d1ec90616e 100644 --- a/arch/x86/include/uapi/asm/ucontext.h +++ b/arch/x86/include/uapi/asm/ucontext.h | |||
@@ -1,11 +1,54 @@ | |||
1 | #ifndef _ASM_X86_UCONTEXT_H | 1 | #ifndef _ASM_X86_UCONTEXT_H |
2 | #define _ASM_X86_UCONTEXT_H | 2 | #define _ASM_X86_UCONTEXT_H |
3 | 3 | ||
4 | #define UC_FP_XSTATE 0x1 /* indicates the presence of extended state | 4 | /* |
5 | * information in the memory layout pointed | 5 | * Indicates the presence of extended state information in the memory |
6 | * by the fpstate pointer in the ucontext's | 6 | * layout pointed by the fpstate pointer in the ucontext's sigcontext |
7 | * sigcontext struct (uc_mcontext). | 7 | * struct (uc_mcontext). |
8 | */ | 8 | */ |
9 | #define UC_FP_XSTATE 0x1 | ||
10 | |||
11 | #ifdef __x86_64__ | ||
12 | /* | ||
13 | * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on | ||
14 | * kernels that save SS in the sigcontext. All kernels that set | ||
15 | * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp | ||
16 | * regardless of SS (i.e. they implement espfix). | ||
17 | * | ||
18 | * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS | ||
19 | * when delivering a signal that came from 64-bit code. | ||
20 | * | ||
21 | * Sigreturn restores SS as follows: | ||
22 | * | ||
23 | * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || | ||
24 | * saved CS is not 64-bit) | ||
25 | * new SS = saved SS (will fail IRET and signal if invalid) | ||
26 | * else | ||
27 | * new SS = a flat 32-bit data segment | ||
28 | * | ||
29 | * This behavior serves three purposes: | ||
30 | * | ||
31 | * - Legacy programs that construct a 64-bit sigcontext from scratch | ||
32 | * with zero or garbage in the SS slot (e.g. old CRIU) and call | ||
33 | * sigreturn will still work. | ||
34 | * | ||
35 | * - Old DOSEMU versions sometimes catch a signal from a segmented | ||
36 | * context, delete the old SS segment (with modify_ldt), and change | ||
37 | * the saved CS to a 64-bit segment. These DOSEMU versions expect | ||
38 | * sigreturn to send them back to 64-bit mode without killing them, | ||
39 | * despite the fact that the SS selector when the signal was raised is | ||
40 | * no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel | ||
41 | * will fix up SS for these DOSEMU versions. | ||
42 | * | ||
43 | * - Old and new programs that catch a signal and return without | ||
44 | * modifying the saved context will end up in exactly the state they | ||
45 | * started in, even if they were running in a segmented context when | ||
46 | * the signal was raised.. Old kernels would lose track of the | ||
47 | * previous SS value. | ||
48 | */ | ||
49 | #define UC_SIGCONTEXT_SS 0x2 | ||
50 | #define UC_STRICT_RESTORE_SS 0x4 | ||
51 | #endif | ||
9 | 52 | ||
10 | #include <asm-generic/ucontext.h> | 53 | #include <asm-generic/ucontext.h> |
11 | 54 | ||
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 9968f30cca3e..76f89e2b245a 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -53,7 +53,7 @@ void flat_init_apic_ldr(void) | |||
53 | apic_write(APIC_LDR, val); | 53 | apic_write(APIC_LDR, val); |
54 | } | 54 | } |
55 | 55 | ||
56 | static inline void _flat_send_IPI_mask(unsigned long mask, int vector) | 56 | static void _flat_send_IPI_mask(unsigned long mask, int vector) |
57 | { | 57 | { |
58 | unsigned long flags; | 58 | unsigned long flags; |
59 | 59 | ||
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c80c02c6ec49..ab5c2c685a3c 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x) | |||
30 | unsigned long value; | 30 | unsigned long value; |
31 | unsigned int id = (x >> 24) & 0xff; | 31 | unsigned int id = (x >> 24) & 0xff; |
32 | 32 | ||
33 | if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { | 33 | if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { |
34 | rdmsrl(MSR_FAM10H_NODE_ID, value); | 34 | rdmsrl(MSR_FAM10H_NODE_ID, value); |
35 | id |= (value << 2) & 0xff00; | 35 | id |= (value << 2) & 0xff00; |
36 | } | 36 | } |
@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) | |||
178 | this_cpu_write(cpu_llc_id, node); | 178 | this_cpu_write(cpu_llc_id, node); |
179 | 179 | ||
180 | /* Account for nodes per socket in multi-core-module processors */ | 180 | /* Account for nodes per socket in multi-core-module processors */ |
181 | if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { | 181 | if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { |
182 | rdmsrl(MSR_FAM10H_NODE_ID, val); | 182 | rdmsrl(MSR_FAM10H_NODE_ID, val); |
183 | nodes = ((val >> 3) & 7) + 1; | 183 | nodes = ((val >> 3) & 7) + 1; |
184 | } | 184 | } |
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index eb45fc9b6124..28bde88b0085 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c | |||
@@ -18,6 +18,66 @@ | |||
18 | #include <asm/proto.h> | 18 | #include <asm/proto.h> |
19 | #include <asm/ipi.h> | 19 | #include <asm/ipi.h> |
20 | 20 | ||
21 | void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) | ||
22 | { | ||
23 | /* | ||
24 | * Subtle. In the case of the 'never do double writes' workaround | ||
25 | * we have to lock out interrupts to be safe. As we don't care | ||
26 | * of the value read we use an atomic rmw access to avoid costly | ||
27 | * cli/sti. Otherwise we use an even cheaper single atomic write | ||
28 | * to the APIC. | ||
29 | */ | ||
30 | unsigned int cfg; | ||
31 | |||
32 | /* | ||
33 | * Wait for idle. | ||
34 | */ | ||
35 | __xapic_wait_icr_idle(); | ||
36 | |||
37 | /* | ||
38 | * No need to touch the target chip field | ||
39 | */ | ||
40 | cfg = __prepare_ICR(shortcut, vector, dest); | ||
41 | |||
42 | /* | ||
43 | * Send the IPI. The write to APIC_ICR fires this off. | ||
44 | */ | ||
45 | native_apic_mem_write(APIC_ICR, cfg); | ||
46 | } | ||
47 | |||
48 | /* | ||
49 | * This is used to send an IPI with no shorthand notation (the destination is | ||
50 | * specified in bits 56 to 63 of the ICR). | ||
51 | */ | ||
52 | void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) | ||
53 | { | ||
54 | unsigned long cfg; | ||
55 | |||
56 | /* | ||
57 | * Wait for idle. | ||
58 | */ | ||
59 | if (unlikely(vector == NMI_VECTOR)) | ||
60 | safe_apic_wait_icr_idle(); | ||
61 | else | ||
62 | __xapic_wait_icr_idle(); | ||
63 | |||
64 | /* | ||
65 | * prepare target chip field | ||
66 | */ | ||
67 | cfg = __prepare_ICR2(mask); | ||
68 | native_apic_mem_write(APIC_ICR2, cfg); | ||
69 | |||
70 | /* | ||
71 | * program the ICR | ||
72 | */ | ||
73 | cfg = __prepare_ICR(0, vector, dest); | ||
74 | |||
75 | /* | ||
76 | * Send the IPI. The write to APIC_ICR fires this off. | ||
77 | */ | ||
78 | native_apic_mem_write(APIC_ICR, cfg); | ||
79 | } | ||
80 | |||
21 | void default_send_IPI_single_phys(int cpu, int vector) | 81 | void default_send_IPI_single_phys(int cpu, int vector) |
22 | { | 82 | { |
23 | unsigned long flags; | 83 | unsigned long flags; |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 84a7524b202c..5c042466f274 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -59,7 +59,6 @@ void common(void) { | |||
59 | 59 | ||
60 | #ifdef CONFIG_PARAVIRT | 60 | #ifdef CONFIG_PARAVIRT |
61 | BLANK(); | 61 | BLANK(); |
62 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); | ||
63 | OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); | 62 | OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); |
64 | OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); | 63 | OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); |
65 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); | 64 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 6ce39025f467..ecdc1d217dc0 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/lguest.h> | 7 | #include <linux/lguest.h> |
8 | #include "../../../drivers/lguest/lg.h" | 8 | #include "../../../drivers/lguest/lg.h" |
9 | 9 | ||
10 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, | 10 | #define __SYSCALL_I386(nr, sym, qual) [nr] = 1, |
11 | static char syscalls[] = { | 11 | static char syscalls[] = { |
12 | #include <asm/syscalls_32.h> | 12 | #include <asm/syscalls_32.h> |
13 | }; | 13 | }; |
@@ -52,6 +52,11 @@ void foo(void) | |||
52 | DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - | 52 | DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - |
53 | offsetofend(struct tss_struct, SYSENTER_stack)); | 53 | offsetofend(struct tss_struct, SYSENTER_stack)); |
54 | 54 | ||
55 | /* Offset from cpu_tss to SYSENTER_stack */ | ||
56 | OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); | ||
57 | /* Size of SYSENTER_stack */ | ||
58 | DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); | ||
59 | |||
55 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) | 60 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) |
56 | BLANK(); | 61 | BLANK(); |
57 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); | 62 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f2edafb5f24e..d875f97d4e0b 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -4,17 +4,11 @@ | |||
4 | 4 | ||
5 | #include <asm/ia32.h> | 5 | #include <asm/ia32.h> |
6 | 6 | ||
7 | #define __SYSCALL_64(nr, sym, compat) [nr] = 1, | 7 | #define __SYSCALL_64(nr, sym, qual) [nr] = 1, |
8 | #define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, | ||
9 | #ifdef CONFIG_X86_X32_ABI | ||
10 | # define __SYSCALL_X32(nr, sym, compat) [nr] = 1, | ||
11 | #else | ||
12 | # define __SYSCALL_X32(nr, sym, compat) /* nothing */ | ||
13 | #endif | ||
14 | static char syscalls_64[] = { | 8 | static char syscalls_64[] = { |
15 | #include <asm/syscalls_64.h> | 9 | #include <asm/syscalls_64.h> |
16 | }; | 10 | }; |
17 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, | 11 | #define __SYSCALL_I386(nr, sym, qual) [nr] = 1, |
18 | static char syscalls_ia32[] = { | 12 | static char syscalls_ia32[] = { |
19 | #include <asm/syscalls_32.h> | 13 | #include <asm/syscalls_32.h> |
20 | }; | 14 | }; |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 7a60424d63fa..0d373d7affc8 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -42,7 +42,7 @@ ifdef CONFIG_X86_FEATURE_NAMES | |||
42 | quiet_cmd_mkcapflags = MKCAP $@ | 42 | quiet_cmd_mkcapflags = MKCAP $@ |
43 | cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ | 43 | cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ |
44 | 44 | ||
45 | cpufeature = $(src)/../../include/asm/cpufeature.h | 45 | cpufeature = $(src)/../../include/asm/cpufeatures.h |
46 | 46 | ||
47 | targets += capflags.c | 47 | targets += capflags.c |
48 | $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE | 48 | $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE |
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index ce197bb7c129..1661d8ec9280 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
@@ -1,7 +1,7 @@ | |||
1 | #include <linux/bitops.h> | 1 | #include <linux/bitops.h> |
2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
3 | 3 | ||
4 | #include <asm/processor.h> | 4 | #include <asm/cpufeature.h> |
5 | #include <asm/e820.h> | 5 | #include <asm/e820.h> |
6 | #include <asm/mtrr.h> | 6 | #include <asm/mtrr.h> |
7 | #include <asm/msr.h> | 7 | #include <asm/msr.h> |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 81cf716f6f97..249461f95851 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s) | |||
162 | } | 162 | } |
163 | __setup("nompx", x86_mpx_setup); | 163 | __setup("nompx", x86_mpx_setup); |
164 | 164 | ||
165 | static int __init x86_noinvpcid_setup(char *s) | ||
166 | { | ||
167 | /* noinvpcid doesn't accept parameters */ | ||
168 | if (s) | ||
169 | return -EINVAL; | ||
170 | |||
171 | /* do not emit a message if the feature is not present */ | ||
172 | if (!boot_cpu_has(X86_FEATURE_INVPCID)) | ||
173 | return 0; | ||
174 | |||
175 | setup_clear_cpu_cap(X86_FEATURE_INVPCID); | ||
176 | pr_info("noinvpcid: INVPCID feature disabled\n"); | ||
177 | return 0; | ||
178 | } | ||
179 | early_param("noinvpcid", x86_noinvpcid_setup); | ||
180 | |||
165 | #ifdef CONFIG_X86_32 | 181 | #ifdef CONFIG_X86_32 |
166 | static int cachesize_override = -1; | 182 | static int cachesize_override = -1; |
167 | static int disable_x86_serial_nr = 1; | 183 | static int disable_x86_serial_nr = 1; |
@@ -801,6 +817,31 @@ static void detect_nopl(struct cpuinfo_x86 *c) | |||
801 | #else | 817 | #else |
802 | set_cpu_cap(c, X86_FEATURE_NOPL); | 818 | set_cpu_cap(c, X86_FEATURE_NOPL); |
803 | #endif | 819 | #endif |
820 | |||
821 | /* | ||
822 | * ESPFIX is a strange bug. All real CPUs have it. Paravirt | ||
823 | * systems that run Linux at CPL > 0 may or may not have the | ||
824 | * issue, but, even if they have the issue, there's absolutely | ||
825 | * nothing we can do about it because we can't use the real IRET | ||
826 | * instruction. | ||
827 | * | ||
828 | * NB: For the time being, only 32-bit kernels support | ||
829 | * X86_BUG_ESPFIX as such. 64-bit kernels directly choose | ||
830 | * whether to apply espfix using paravirt hooks. If any | ||
831 | * non-paravirt system ever shows up that does *not* have the | ||
832 | * ESPFIX issue, we can change this. | ||
833 | */ | ||
834 | #ifdef CONFIG_X86_32 | ||
835 | #ifdef CONFIG_PARAVIRT | ||
836 | do { | ||
837 | extern void native_iret(void); | ||
838 | if (pv_cpu_ops.iret == native_iret) | ||
839 | set_cpu_bug(c, X86_BUG_ESPFIX); | ||
840 | } while (0); | ||
841 | #else | ||
842 | set_cpu_bug(c, X86_BUG_ESPFIX); | ||
843 | #endif | ||
844 | #endif | ||
804 | } | 845 | } |
805 | 846 | ||
806 | static void generic_identify(struct cpuinfo_x86 *c) | 847 | static void generic_identify(struct cpuinfo_x86 *c) |
@@ -1475,20 +1516,6 @@ void cpu_init(void) | |||
1475 | } | 1516 | } |
1476 | #endif | 1517 | #endif |
1477 | 1518 | ||
1478 | #ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS | ||
1479 | void warn_pre_alternatives(void) | ||
1480 | { | ||
1481 | WARN(1, "You're using static_cpu_has before alternatives have run!\n"); | ||
1482 | } | ||
1483 | EXPORT_SYMBOL_GPL(warn_pre_alternatives); | ||
1484 | #endif | ||
1485 | |||
1486 | inline bool __static_cpu_has_safe(u16 bit) | ||
1487 | { | ||
1488 | return boot_cpu_has(bit); | ||
1489 | } | ||
1490 | EXPORT_SYMBOL_GPL(__static_cpu_has_safe); | ||
1491 | |||
1492 | static void bsp_resume(void) | 1519 | static void bsp_resume(void) |
1493 | { | 1520 | { |
1494 | if (this_cpu->c_bsp_resume) | 1521 | if (this_cpu->c_bsp_resume) |
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 187bb583d0df..6adef9cac23e 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/timer.h> | 8 | #include <linux/timer.h> |
9 | #include <asm/pci-direct.h> | 9 | #include <asm/pci-direct.h> |
10 | #include <asm/tsc.h> | 10 | #include <asm/tsc.h> |
11 | #include <asm/cpufeature.h> | ||
11 | 12 | ||
12 | #include "cpu.h" | 13 | #include "cpu.h" |
13 | 14 | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 38766c2b5b00..1f7fdb91a818 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -8,7 +8,7 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/cpufeature.h> |
12 | #include <asm/pgtable.h> | 12 | #include <asm/pgtable.h> |
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/bugs.h> | 14 | #include <asm/bugs.h> |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 6ed779efff26..de6626c18e42 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -14,7 +14,7 @@ | |||
14 | #include <linux/sysfs.h> | 14 | #include <linux/sysfs.h> |
15 | #include <linux/pci.h> | 15 | #include <linux/pci.h> |
16 | 16 | ||
17 | #include <asm/processor.h> | 17 | #include <asm/cpufeature.h> |
18 | #include <asm/amd_nb.h> | 18 | #include <asm/amd_nb.h> |
19 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
20 | 20 | ||
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index afa9f0d487ea..fbb5e90557a5 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c | |||
@@ -1,5 +1,5 @@ | |||
1 | #include <asm/cpu_device_id.h> | 1 | #include <asm/cpu_device_id.h> |
2 | #include <asm/processor.h> | 2 | #include <asm/cpufeature.h> |
3 | #include <linux/cpu.h> | 3 | #include <linux/cpu.h> |
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 524f2a8492d7..f0c921b03e42 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -1578,6 +1578,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1578 | 1578 | ||
1579 | if (c->x86 == 6 && c->x86_model == 45) | 1579 | if (c->x86 == 6 && c->x86_model == 45) |
1580 | quirk_no_way_out = quirk_sandybridge_ifu; | 1580 | quirk_no_way_out = quirk_sandybridge_ifu; |
1581 | /* | ||
1582 | * MCG_CAP.MCG_SER_P is necessary but not sufficient to know | ||
1583 | * whether this processor will actually generate recoverable | ||
1584 | * machine checks. Check to see if this is an E7 model Xeon. | ||
1585 | * We can't do a model number check because E5 and E7 use the | ||
1586 | * same model number. E5 doesn't support recovery, E7 does. | ||
1587 | */ | ||
1588 | if (mca_cfg.recovery || (mca_cfg.ser && | ||
1589 | !strncmp(c->x86_model_id, | ||
1590 | "Intel(R) Xeon(R) CPU E7-", 24))) | ||
1591 | set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY); | ||
1581 | } | 1592 | } |
1582 | if (cfg->monarch_timeout < 0) | 1593 | if (cfg->monarch_timeout < 0) |
1583 | cfg->monarch_timeout = 0; | 1594 | cfg->monarch_timeout = 0; |
@@ -2030,6 +2041,8 @@ static int __init mcheck_enable(char *str) | |||
2030 | cfg->bootlog = (str[0] == 'b'); | 2041 | cfg->bootlog = (str[0] == 'b'); |
2031 | else if (!strcmp(str, "bios_cmci_threshold")) | 2042 | else if (!strcmp(str, "bios_cmci_threshold")) |
2032 | cfg->bios_cmci_threshold = true; | 2043 | cfg->bios_cmci_threshold = true; |
2044 | else if (!strcmp(str, "recovery")) | ||
2045 | cfg->recovery = true; | ||
2033 | else if (isdigit(str[0])) { | 2046 | else if (isdigit(str[0])) { |
2034 | if (get_option(&str, &cfg->tolerant) == 2) | 2047 | if (get_option(&str, &cfg->tolerant) == 2) |
2035 | get_option(&str, &(cfg->monarch_timeout)); | 2048 | get_option(&str, &(cfg->monarch_timeout)); |
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 75d3aab5f7b2..8581963894c7 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c | |||
@@ -431,10 +431,6 @@ int __init save_microcode_in_initrd_amd(void) | |||
431 | else | 431 | else |
432 | container = cont_va; | 432 | container = cont_va; |
433 | 433 | ||
434 | if (ucode_new_rev) | ||
435 | pr_info("microcode: updated early to new patch_level=0x%08x\n", | ||
436 | ucode_new_rev); | ||
437 | |||
438 | eax = cpuid_eax(0x00000001); | 434 | eax = cpuid_eax(0x00000001); |
439 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); | 435 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); |
440 | 436 | ||
@@ -469,8 +465,7 @@ void reload_ucode_amd(void) | |||
469 | if (mc && rev < mc->hdr.patch_id) { | 465 | if (mc && rev < mc->hdr.patch_id) { |
470 | if (!__apply_microcode_amd(mc)) { | 466 | if (!__apply_microcode_amd(mc)) { |
471 | ucode_new_rev = mc->hdr.patch_id; | 467 | ucode_new_rev = mc->hdr.patch_id; |
472 | pr_info("microcode: reload patch_level=0x%08x\n", | 468 | pr_info("reload patch_level=0x%08x\n", ucode_new_rev); |
473 | ucode_new_rev); | ||
474 | } | 469 | } |
475 | } | 470 | } |
476 | } | 471 | } |
@@ -793,15 +788,13 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) | |||
793 | return -EINVAL; | 788 | return -EINVAL; |
794 | } | 789 | } |
795 | 790 | ||
796 | patch->data = kzalloc(patch_size, GFP_KERNEL); | 791 | patch->data = kmemdup(fw + SECTION_HDR_SIZE, patch_size, GFP_KERNEL); |
797 | if (!patch->data) { | 792 | if (!patch->data) { |
798 | pr_err("Patch data allocation failure.\n"); | 793 | pr_err("Patch data allocation failure.\n"); |
799 | kfree(patch); | 794 | kfree(patch); |
800 | return -EINVAL; | 795 | return -EINVAL; |
801 | } | 796 | } |
802 | 797 | ||
803 | /* All looks ok, copy patch... */ | ||
804 | memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); | ||
805 | INIT_LIST_HEAD(&patch->plist); | 798 | INIT_LIST_HEAD(&patch->plist); |
806 | patch->patch_id = mc_hdr->patch_id; | 799 | patch->patch_id = mc_hdr->patch_id; |
807 | patch->equiv_cpu = proc_id; | 800 | patch->equiv_cpu = proc_id; |
@@ -957,6 +950,10 @@ struct microcode_ops * __init init_amd_microcode(void) | |||
957 | return NULL; | 950 | return NULL; |
958 | } | 951 | } |
959 | 952 | ||
953 | if (ucode_new_rev) | ||
954 | pr_info_once("microcode updated early to new patch_level=0x%08x\n", | ||
955 | ucode_new_rev); | ||
956 | |||
960 | return µcode_amd_ops; | 957 | return µcode_amd_ops; |
961 | } | 958 | } |
962 | 959 | ||
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index faec7120c508..ac360bfbbdb6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c | |||
@@ -43,16 +43,8 @@ | |||
43 | #define MICROCODE_VERSION "2.01" | 43 | #define MICROCODE_VERSION "2.01" |
44 | 44 | ||
45 | static struct microcode_ops *microcode_ops; | 45 | static struct microcode_ops *microcode_ops; |
46 | |||
47 | static bool dis_ucode_ldr; | 46 | static bool dis_ucode_ldr; |
48 | 47 | ||
49 | static int __init disable_loader(char *str) | ||
50 | { | ||
51 | dis_ucode_ldr = true; | ||
52 | return 1; | ||
53 | } | ||
54 | __setup("dis_ucode_ldr", disable_loader); | ||
55 | |||
56 | /* | 48 | /* |
57 | * Synchronization. | 49 | * Synchronization. |
58 | * | 50 | * |
@@ -81,15 +73,16 @@ struct cpu_info_ctx { | |||
81 | 73 | ||
82 | static bool __init check_loader_disabled_bsp(void) | 74 | static bool __init check_loader_disabled_bsp(void) |
83 | { | 75 | { |
76 | static const char *__dis_opt_str = "dis_ucode_ldr"; | ||
77 | |||
84 | #ifdef CONFIG_X86_32 | 78 | #ifdef CONFIG_X86_32 |
85 | const char *cmdline = (const char *)__pa_nodebug(boot_command_line); | 79 | const char *cmdline = (const char *)__pa_nodebug(boot_command_line); |
86 | const char *opt = "dis_ucode_ldr"; | 80 | const char *option = (const char *)__pa_nodebug(__dis_opt_str); |
87 | const char *option = (const char *)__pa_nodebug(opt); | ||
88 | bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr); | 81 | bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr); |
89 | 82 | ||
90 | #else /* CONFIG_X86_64 */ | 83 | #else /* CONFIG_X86_64 */ |
91 | const char *cmdline = boot_command_line; | 84 | const char *cmdline = boot_command_line; |
92 | const char *option = "dis_ucode_ldr"; | 85 | const char *option = __dis_opt_str; |
93 | bool *res = &dis_ucode_ldr; | 86 | bool *res = &dis_ucode_ldr; |
94 | #endif | 87 | #endif |
95 | 88 | ||
@@ -479,7 +472,7 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) | |||
479 | enum ucode_state ustate; | 472 | enum ucode_state ustate; |
480 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 473 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
481 | 474 | ||
482 | if (uci && uci->valid) | 475 | if (uci->valid) |
483 | return UCODE_OK; | 476 | return UCODE_OK; |
484 | 477 | ||
485 | if (collect_cpu_info(cpu)) | 478 | if (collect_cpu_info(cpu)) |
@@ -630,7 +623,7 @@ int __init microcode_init(void) | |||
630 | struct cpuinfo_x86 *c = &boot_cpu_data; | 623 | struct cpuinfo_x86 *c = &boot_cpu_data; |
631 | int error; | 624 | int error; |
632 | 625 | ||
633 | if (paravirt_enabled() || dis_ucode_ldr) | 626 | if (dis_ucode_ldr) |
634 | return -EINVAL; | 627 | return -EINVAL; |
635 | 628 | ||
636 | if (c->x86_vendor == X86_VENDOR_INTEL) | 629 | if (c->x86_vendor == X86_VENDOR_INTEL) |
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index ee81c544ee0d..cbb3cf09b065 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c | |||
@@ -39,9 +39,15 @@ | |||
39 | #include <asm/setup.h> | 39 | #include <asm/setup.h> |
40 | #include <asm/msr.h> | 40 | #include <asm/msr.h> |
41 | 41 | ||
42 | static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; | 42 | /* |
43 | * Temporary microcode blobs pointers storage. We note here the pointers to | ||
44 | * microcode blobs we've got from whatever storage (detached initrd, builtin). | ||
45 | * Later on, we put those into final storage mc_saved_data.mc_saved. | ||
46 | */ | ||
47 | static unsigned long mc_tmp_ptrs[MAX_UCODE_COUNT]; | ||
48 | |||
43 | static struct mc_saved_data { | 49 | static struct mc_saved_data { |
44 | unsigned int mc_saved_count; | 50 | unsigned int num_saved; |
45 | struct microcode_intel **mc_saved; | 51 | struct microcode_intel **mc_saved; |
46 | } mc_saved_data; | 52 | } mc_saved_data; |
47 | 53 | ||
@@ -78,53 +84,50 @@ load_microcode_early(struct microcode_intel **saved, | |||
78 | } | 84 | } |
79 | 85 | ||
80 | static inline void | 86 | static inline void |
81 | copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd, | 87 | copy_ptrs(struct microcode_intel **mc_saved, unsigned long *mc_ptrs, |
82 | unsigned long off, int num_saved) | 88 | unsigned long off, int num_saved) |
83 | { | 89 | { |
84 | int i; | 90 | int i; |
85 | 91 | ||
86 | for (i = 0; i < num_saved; i++) | 92 | for (i = 0; i < num_saved; i++) |
87 | mc_saved[i] = (struct microcode_intel *)(initrd[i] + off); | 93 | mc_saved[i] = (struct microcode_intel *)(mc_ptrs[i] + off); |
88 | } | 94 | } |
89 | 95 | ||
90 | #ifdef CONFIG_X86_32 | 96 | #ifdef CONFIG_X86_32 |
91 | static void | 97 | static void |
92 | microcode_phys(struct microcode_intel **mc_saved_tmp, | 98 | microcode_phys(struct microcode_intel **mc_saved_tmp, struct mc_saved_data *mcs) |
93 | struct mc_saved_data *mc_saved_data) | ||
94 | { | 99 | { |
95 | int i; | 100 | int i; |
96 | struct microcode_intel ***mc_saved; | 101 | struct microcode_intel ***mc_saved; |
97 | 102 | ||
98 | mc_saved = (struct microcode_intel ***) | 103 | mc_saved = (struct microcode_intel ***)__pa_nodebug(&mcs->mc_saved); |
99 | __pa_nodebug(&mc_saved_data->mc_saved); | 104 | |
100 | for (i = 0; i < mc_saved_data->mc_saved_count; i++) { | 105 | for (i = 0; i < mcs->num_saved; i++) { |
101 | struct microcode_intel *p; | 106 | struct microcode_intel *p; |
102 | 107 | ||
103 | p = *(struct microcode_intel **) | 108 | p = *(struct microcode_intel **)__pa_nodebug(mcs->mc_saved + i); |
104 | __pa_nodebug(mc_saved_data->mc_saved + i); | ||
105 | mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); | 109 | mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); |
106 | } | 110 | } |
107 | } | 111 | } |
108 | #endif | 112 | #endif |
109 | 113 | ||
110 | static enum ucode_state | 114 | static enum ucode_state |
111 | load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, | 115 | load_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, |
112 | unsigned long initrd_start, struct ucode_cpu_info *uci) | 116 | unsigned long offset, struct ucode_cpu_info *uci) |
113 | { | 117 | { |
114 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; | 118 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; |
115 | unsigned int count = mc_saved_data->mc_saved_count; | 119 | unsigned int count = mcs->num_saved; |
116 | 120 | ||
117 | if (!mc_saved_data->mc_saved) { | 121 | if (!mcs->mc_saved) { |
118 | copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count); | 122 | copy_ptrs(mc_saved_tmp, mc_ptrs, offset, count); |
119 | 123 | ||
120 | return load_microcode_early(mc_saved_tmp, count, uci); | 124 | return load_microcode_early(mc_saved_tmp, count, uci); |
121 | } else { | 125 | } else { |
122 | #ifdef CONFIG_X86_32 | 126 | #ifdef CONFIG_X86_32 |
123 | microcode_phys(mc_saved_tmp, mc_saved_data); | 127 | microcode_phys(mc_saved_tmp, mcs); |
124 | return load_microcode_early(mc_saved_tmp, count, uci); | 128 | return load_microcode_early(mc_saved_tmp, count, uci); |
125 | #else | 129 | #else |
126 | return load_microcode_early(mc_saved_data->mc_saved, | 130 | return load_microcode_early(mcs->mc_saved, count, uci); |
127 | count, uci); | ||
128 | #endif | 131 | #endif |
129 | } | 132 | } |
130 | } | 133 | } |
@@ -175,25 +178,25 @@ matching_model_microcode(struct microcode_header_intel *mc_header, | |||
175 | } | 178 | } |
176 | 179 | ||
177 | static int | 180 | static int |
178 | save_microcode(struct mc_saved_data *mc_saved_data, | 181 | save_microcode(struct mc_saved_data *mcs, |
179 | struct microcode_intel **mc_saved_src, | 182 | struct microcode_intel **mc_saved_src, |
180 | unsigned int mc_saved_count) | 183 | unsigned int num_saved) |
181 | { | 184 | { |
182 | int i, j; | 185 | int i, j; |
183 | struct microcode_intel **saved_ptr; | 186 | struct microcode_intel **saved_ptr; |
184 | int ret; | 187 | int ret; |
185 | 188 | ||
186 | if (!mc_saved_count) | 189 | if (!num_saved) |
187 | return -EINVAL; | 190 | return -EINVAL; |
188 | 191 | ||
189 | /* | 192 | /* |
190 | * Copy new microcode data. | 193 | * Copy new microcode data. |
191 | */ | 194 | */ |
192 | saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL); | 195 | saved_ptr = kcalloc(num_saved, sizeof(struct microcode_intel *), GFP_KERNEL); |
193 | if (!saved_ptr) | 196 | if (!saved_ptr) |
194 | return -ENOMEM; | 197 | return -ENOMEM; |
195 | 198 | ||
196 | for (i = 0; i < mc_saved_count; i++) { | 199 | for (i = 0; i < num_saved; i++) { |
197 | struct microcode_header_intel *mc_hdr; | 200 | struct microcode_header_intel *mc_hdr; |
198 | struct microcode_intel *mc; | 201 | struct microcode_intel *mc; |
199 | unsigned long size; | 202 | unsigned long size; |
@@ -207,20 +210,18 @@ save_microcode(struct mc_saved_data *mc_saved_data, | |||
207 | mc_hdr = &mc->hdr; | 210 | mc_hdr = &mc->hdr; |
208 | size = get_totalsize(mc_hdr); | 211 | size = get_totalsize(mc_hdr); |
209 | 212 | ||
210 | saved_ptr[i] = kmalloc(size, GFP_KERNEL); | 213 | saved_ptr[i] = kmemdup(mc, size, GFP_KERNEL); |
211 | if (!saved_ptr[i]) { | 214 | if (!saved_ptr[i]) { |
212 | ret = -ENOMEM; | 215 | ret = -ENOMEM; |
213 | goto err; | 216 | goto err; |
214 | } | 217 | } |
215 | |||
216 | memcpy(saved_ptr[i], mc, size); | ||
217 | } | 218 | } |
218 | 219 | ||
219 | /* | 220 | /* |
220 | * Point to newly saved microcode. | 221 | * Point to newly saved microcode. |
221 | */ | 222 | */ |
222 | mc_saved_data->mc_saved = saved_ptr; | 223 | mcs->mc_saved = saved_ptr; |
223 | mc_saved_data->mc_saved_count = mc_saved_count; | 224 | mcs->num_saved = num_saved; |
224 | 225 | ||
225 | return 0; | 226 | return 0; |
226 | 227 | ||
@@ -284,22 +285,20 @@ static unsigned int _save_mc(struct microcode_intel **mc_saved, | |||
284 | * BSP can stay in the platform. | 285 | * BSP can stay in the platform. |
285 | */ | 286 | */ |
286 | static enum ucode_state __init | 287 | static enum ucode_state __init |
287 | get_matching_model_microcode(int cpu, unsigned long start, | 288 | get_matching_model_microcode(unsigned long start, void *data, size_t size, |
288 | void *data, size_t size, | 289 | struct mc_saved_data *mcs, unsigned long *mc_ptrs, |
289 | struct mc_saved_data *mc_saved_data, | ||
290 | unsigned long *mc_saved_in_initrd, | ||
291 | struct ucode_cpu_info *uci) | 290 | struct ucode_cpu_info *uci) |
292 | { | 291 | { |
293 | u8 *ucode_ptr = data; | 292 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; |
294 | unsigned int leftover = size; | 293 | struct microcode_header_intel *mc_header; |
294 | unsigned int num_saved = mcs->num_saved; | ||
295 | enum ucode_state state = UCODE_OK; | 295 | enum ucode_state state = UCODE_OK; |
296 | unsigned int leftover = size; | ||
297 | u8 *ucode_ptr = data; | ||
296 | unsigned int mc_size; | 298 | unsigned int mc_size; |
297 | struct microcode_header_intel *mc_header; | ||
298 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; | ||
299 | unsigned int mc_saved_count = mc_saved_data->mc_saved_count; | ||
300 | int i; | 299 | int i; |
301 | 300 | ||
302 | while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { | 301 | while (leftover && num_saved < ARRAY_SIZE(mc_saved_tmp)) { |
303 | 302 | ||
304 | if (leftover < sizeof(mc_header)) | 303 | if (leftover < sizeof(mc_header)) |
305 | break; | 304 | break; |
@@ -318,32 +317,31 @@ get_matching_model_microcode(int cpu, unsigned long start, | |||
318 | * the platform, we need to find and save microcode patches | 317 | * the platform, we need to find and save microcode patches |
319 | * with the same family and model as the BSP. | 318 | * with the same family and model as the BSP. |
320 | */ | 319 | */ |
321 | if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != | 320 | if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != UCODE_OK) { |
322 | UCODE_OK) { | ||
323 | ucode_ptr += mc_size; | 321 | ucode_ptr += mc_size; |
324 | continue; | 322 | continue; |
325 | } | 323 | } |
326 | 324 | ||
327 | mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count); | 325 | num_saved = _save_mc(mc_saved_tmp, ucode_ptr, num_saved); |
328 | 326 | ||
329 | ucode_ptr += mc_size; | 327 | ucode_ptr += mc_size; |
330 | } | 328 | } |
331 | 329 | ||
332 | if (leftover) { | 330 | if (leftover) { |
333 | state = UCODE_ERROR; | 331 | state = UCODE_ERROR; |
334 | goto out; | 332 | return state; |
335 | } | 333 | } |
336 | 334 | ||
337 | if (mc_saved_count == 0) { | 335 | if (!num_saved) { |
338 | state = UCODE_NFOUND; | 336 | state = UCODE_NFOUND; |
339 | goto out; | 337 | return state; |
340 | } | 338 | } |
341 | 339 | ||
342 | for (i = 0; i < mc_saved_count; i++) | 340 | for (i = 0; i < num_saved; i++) |
343 | mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; | 341 | mc_ptrs[i] = (unsigned long)mc_saved_tmp[i] - start; |
342 | |||
343 | mcs->num_saved = num_saved; | ||
344 | 344 | ||
345 | mc_saved_data->mc_saved_count = mc_saved_count; | ||
346 | out: | ||
347 | return state; | 345 | return state; |
348 | } | 346 | } |
349 | 347 | ||
@@ -373,7 +371,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) | |||
373 | native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); | 371 | native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); |
374 | csig.pf = 1 << ((val[1] >> 18) & 7); | 372 | csig.pf = 1 << ((val[1] >> 18) & 7); |
375 | } | 373 | } |
376 | native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); | 374 | native_wrmsrl(MSR_IA32_UCODE_REV, 0); |
377 | 375 | ||
378 | /* As documented in the SDM: Do a CPUID 1 here */ | 376 | /* As documented in the SDM: Do a CPUID 1 here */ |
379 | sync_core(); | 377 | sync_core(); |
@@ -396,11 +394,11 @@ static void show_saved_mc(void) | |||
396 | unsigned int sig, pf, rev, total_size, data_size, date; | 394 | unsigned int sig, pf, rev, total_size, data_size, date; |
397 | struct ucode_cpu_info uci; | 395 | struct ucode_cpu_info uci; |
398 | 396 | ||
399 | if (mc_saved_data.mc_saved_count == 0) { | 397 | if (!mc_saved_data.num_saved) { |
400 | pr_debug("no microcode data saved.\n"); | 398 | pr_debug("no microcode data saved.\n"); |
401 | return; | 399 | return; |
402 | } | 400 | } |
403 | pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); | 401 | pr_debug("Total microcode saved: %d\n", mc_saved_data.num_saved); |
404 | 402 | ||
405 | collect_cpu_info_early(&uci); | 403 | collect_cpu_info_early(&uci); |
406 | 404 | ||
@@ -409,7 +407,7 @@ static void show_saved_mc(void) | |||
409 | rev = uci.cpu_sig.rev; | 407 | rev = uci.cpu_sig.rev; |
410 | pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev); | 408 | pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev); |
411 | 409 | ||
412 | for (i = 0; i < mc_saved_data.mc_saved_count; i++) { | 410 | for (i = 0; i < mc_saved_data.num_saved; i++) { |
413 | struct microcode_header_intel *mc_saved_header; | 411 | struct microcode_header_intel *mc_saved_header; |
414 | struct extended_sigtable *ext_header; | 412 | struct extended_sigtable *ext_header; |
415 | int ext_sigcount; | 413 | int ext_sigcount; |
@@ -465,7 +463,7 @@ int save_mc_for_early(u8 *mc) | |||
465 | { | 463 | { |
466 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; | 464 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; |
467 | unsigned int mc_saved_count_init; | 465 | unsigned int mc_saved_count_init; |
468 | unsigned int mc_saved_count; | 466 | unsigned int num_saved; |
469 | struct microcode_intel **mc_saved; | 467 | struct microcode_intel **mc_saved; |
470 | int ret = 0; | 468 | int ret = 0; |
471 | int i; | 469 | int i; |
@@ -476,23 +474,23 @@ int save_mc_for_early(u8 *mc) | |||
476 | */ | 474 | */ |
477 | mutex_lock(&x86_cpu_microcode_mutex); | 475 | mutex_lock(&x86_cpu_microcode_mutex); |
478 | 476 | ||
479 | mc_saved_count_init = mc_saved_data.mc_saved_count; | 477 | mc_saved_count_init = mc_saved_data.num_saved; |
480 | mc_saved_count = mc_saved_data.mc_saved_count; | 478 | num_saved = mc_saved_data.num_saved; |
481 | mc_saved = mc_saved_data.mc_saved; | 479 | mc_saved = mc_saved_data.mc_saved; |
482 | 480 | ||
483 | if (mc_saved && mc_saved_count) | 481 | if (mc_saved && num_saved) |
484 | memcpy(mc_saved_tmp, mc_saved, | 482 | memcpy(mc_saved_tmp, mc_saved, |
485 | mc_saved_count * sizeof(struct microcode_intel *)); | 483 | num_saved * sizeof(struct microcode_intel *)); |
486 | /* | 484 | /* |
487 | * Save the microcode patch mc in mc_save_tmp structure if it's a newer | 485 | * Save the microcode patch mc in mc_save_tmp structure if it's a newer |
488 | * version. | 486 | * version. |
489 | */ | 487 | */ |
490 | mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count); | 488 | num_saved = _save_mc(mc_saved_tmp, mc, num_saved); |
491 | 489 | ||
492 | /* | 490 | /* |
493 | * Save the mc_save_tmp in global mc_saved_data. | 491 | * Save the mc_save_tmp in global mc_saved_data. |
494 | */ | 492 | */ |
495 | ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); | 493 | ret = save_microcode(&mc_saved_data, mc_saved_tmp, num_saved); |
496 | if (ret) { | 494 | if (ret) { |
497 | pr_err("Cannot save microcode patch.\n"); | 495 | pr_err("Cannot save microcode patch.\n"); |
498 | goto out; | 496 | goto out; |
@@ -536,7 +534,7 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp) | |||
536 | 534 | ||
537 | static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; | 535 | static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; |
538 | static __init enum ucode_state | 536 | static __init enum ucode_state |
539 | scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, | 537 | scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, |
540 | unsigned long start, unsigned long size, | 538 | unsigned long start, unsigned long size, |
541 | struct ucode_cpu_info *uci) | 539 | struct ucode_cpu_info *uci) |
542 | { | 540 | { |
@@ -551,14 +549,18 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, | |||
551 | cd.data = NULL; | 549 | cd.data = NULL; |
552 | cd.size = 0; | 550 | cd.size = 0; |
553 | 551 | ||
554 | cd = find_cpio_data(p, (void *)start, size, &offset); | 552 | /* try built-in microcode if no initrd */ |
555 | if (!cd.data) { | 553 | if (!size) { |
556 | if (!load_builtin_intel_microcode(&cd)) | 554 | if (!load_builtin_intel_microcode(&cd)) |
557 | return UCODE_ERROR; | 555 | return UCODE_ERROR; |
556 | } else { | ||
557 | cd = find_cpio_data(p, (void *)start, size, &offset); | ||
558 | if (!cd.data) | ||
559 | return UCODE_ERROR; | ||
558 | } | 560 | } |
559 | 561 | ||
560 | return get_matching_model_microcode(0, start, cd.data, cd.size, | 562 | return get_matching_model_microcode(start, cd.data, cd.size, |
561 | mc_saved_data, initrd, uci); | 563 | mcs, mc_ptrs, uci); |
562 | } | 564 | } |
563 | 565 | ||
564 | /* | 566 | /* |
@@ -567,14 +569,11 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, | |||
567 | static void | 569 | static void |
568 | print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) | 570 | print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) |
569 | { | 571 | { |
570 | int cpu = smp_processor_id(); | 572 | pr_info_once("microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", |
571 | 573 | uci->cpu_sig.rev, | |
572 | pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", | 574 | date & 0xffff, |
573 | cpu, | 575 | date >> 24, |
574 | uci->cpu_sig.rev, | 576 | (date >> 16) & 0xff); |
575 | date & 0xffff, | ||
576 | date >> 24, | ||
577 | (date >> 16) & 0xff); | ||
578 | } | 577 | } |
579 | 578 | ||
580 | #ifdef CONFIG_X86_32 | 579 | #ifdef CONFIG_X86_32 |
@@ -603,19 +602,19 @@ void show_ucode_info_early(void) | |||
603 | */ | 602 | */ |
604 | static void print_ucode(struct ucode_cpu_info *uci) | 603 | static void print_ucode(struct ucode_cpu_info *uci) |
605 | { | 604 | { |
606 | struct microcode_intel *mc_intel; | 605 | struct microcode_intel *mc; |
607 | int *delay_ucode_info_p; | 606 | int *delay_ucode_info_p; |
608 | int *current_mc_date_p; | 607 | int *current_mc_date_p; |
609 | 608 | ||
610 | mc_intel = uci->mc; | 609 | mc = uci->mc; |
611 | if (mc_intel == NULL) | 610 | if (!mc) |
612 | return; | 611 | return; |
613 | 612 | ||
614 | delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); | 613 | delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); |
615 | current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); | 614 | current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); |
616 | 615 | ||
617 | *delay_ucode_info_p = 1; | 616 | *delay_ucode_info_p = 1; |
618 | *current_mc_date_p = mc_intel->hdr.date; | 617 | *current_mc_date_p = mc->hdr.date; |
619 | } | 618 | } |
620 | #else | 619 | #else |
621 | 620 | ||
@@ -630,37 +629,35 @@ static inline void flush_tlb_early(void) | |||
630 | 629 | ||
631 | static inline void print_ucode(struct ucode_cpu_info *uci) | 630 | static inline void print_ucode(struct ucode_cpu_info *uci) |
632 | { | 631 | { |
633 | struct microcode_intel *mc_intel; | 632 | struct microcode_intel *mc; |
634 | 633 | ||
635 | mc_intel = uci->mc; | 634 | mc = uci->mc; |
636 | if (mc_intel == NULL) | 635 | if (!mc) |
637 | return; | 636 | return; |
638 | 637 | ||
639 | print_ucode_info(uci, mc_intel->hdr.date); | 638 | print_ucode_info(uci, mc->hdr.date); |
640 | } | 639 | } |
641 | #endif | 640 | #endif |
642 | 641 | ||
643 | static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) | 642 | static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) |
644 | { | 643 | { |
645 | struct microcode_intel *mc_intel; | 644 | struct microcode_intel *mc; |
646 | unsigned int val[2]; | 645 | unsigned int val[2]; |
647 | 646 | ||
648 | mc_intel = uci->mc; | 647 | mc = uci->mc; |
649 | if (mc_intel == NULL) | 648 | if (!mc) |
650 | return 0; | 649 | return 0; |
651 | 650 | ||
652 | /* write microcode via MSR 0x79 */ | 651 | /* write microcode via MSR 0x79 */ |
653 | native_wrmsr(MSR_IA32_UCODE_WRITE, | 652 | native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); |
654 | (unsigned long) mc_intel->bits, | 653 | native_wrmsrl(MSR_IA32_UCODE_REV, 0); |
655 | (unsigned long) mc_intel->bits >> 16 >> 16); | ||
656 | native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
657 | 654 | ||
658 | /* As documented in the SDM: Do a CPUID 1 here */ | 655 | /* As documented in the SDM: Do a CPUID 1 here */ |
659 | sync_core(); | 656 | sync_core(); |
660 | 657 | ||
661 | /* get the current revision from MSR 0x8B */ | 658 | /* get the current revision from MSR 0x8B */ |
662 | native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | 659 | native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); |
663 | if (val[1] != mc_intel->hdr.rev) | 660 | if (val[1] != mc->hdr.rev) |
664 | return -1; | 661 | return -1; |
665 | 662 | ||
666 | #ifdef CONFIG_X86_64 | 663 | #ifdef CONFIG_X86_64 |
@@ -672,25 +669,26 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) | |||
672 | if (early) | 669 | if (early) |
673 | print_ucode(uci); | 670 | print_ucode(uci); |
674 | else | 671 | else |
675 | print_ucode_info(uci, mc_intel->hdr.date); | 672 | print_ucode_info(uci, mc->hdr.date); |
676 | 673 | ||
677 | return 0; | 674 | return 0; |
678 | } | 675 | } |
679 | 676 | ||
680 | /* | 677 | /* |
681 | * This function converts microcode patch offsets previously stored in | 678 | * This function converts microcode patch offsets previously stored in |
682 | * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. | 679 | * mc_tmp_ptrs to pointers and stores the pointers in mc_saved_data. |
683 | */ | 680 | */ |
684 | int __init save_microcode_in_initrd_intel(void) | 681 | int __init save_microcode_in_initrd_intel(void) |
685 | { | 682 | { |
686 | unsigned int count = mc_saved_data.mc_saved_count; | 683 | unsigned int count = mc_saved_data.num_saved; |
687 | struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; | 684 | struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; |
688 | int ret = 0; | 685 | int ret = 0; |
689 | 686 | ||
690 | if (count == 0) | 687 | if (!count) |
691 | return ret; | 688 | return ret; |
692 | 689 | ||
693 | copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count); | 690 | copy_ptrs(mc_saved, mc_tmp_ptrs, get_initrd_start(), count); |
691 | |||
694 | ret = save_microcode(&mc_saved_data, mc_saved, count); | 692 | ret = save_microcode(&mc_saved_data, mc_saved, count); |
695 | if (ret) | 693 | if (ret) |
696 | pr_err("Cannot save microcode patches from initrd.\n"); | 694 | pr_err("Cannot save microcode patches from initrd.\n"); |
@@ -701,8 +699,7 @@ int __init save_microcode_in_initrd_intel(void) | |||
701 | } | 699 | } |
702 | 700 | ||
703 | static void __init | 701 | static void __init |
704 | _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, | 702 | _load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs, |
705 | unsigned long *initrd, | ||
706 | unsigned long start, unsigned long size) | 703 | unsigned long start, unsigned long size) |
707 | { | 704 | { |
708 | struct ucode_cpu_info uci; | 705 | struct ucode_cpu_info uci; |
@@ -710,11 +707,11 @@ _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, | |||
710 | 707 | ||
711 | collect_cpu_info_early(&uci); | 708 | collect_cpu_info_early(&uci); |
712 | 709 | ||
713 | ret = scan_microcode(mc_saved_data, initrd, start, size, &uci); | 710 | ret = scan_microcode(mcs, mc_ptrs, start, size, &uci); |
714 | if (ret != UCODE_OK) | 711 | if (ret != UCODE_OK) |
715 | return; | 712 | return; |
716 | 713 | ||
717 | ret = load_microcode(mc_saved_data, initrd, start, &uci); | 714 | ret = load_microcode(mcs, mc_ptrs, start, &uci); |
718 | if (ret != UCODE_OK) | 715 | if (ret != UCODE_OK) |
719 | return; | 716 | return; |
720 | 717 | ||
@@ -728,53 +725,49 @@ void __init load_ucode_intel_bsp(void) | |||
728 | struct boot_params *p; | 725 | struct boot_params *p; |
729 | 726 | ||
730 | p = (struct boot_params *)__pa_nodebug(&boot_params); | 727 | p = (struct boot_params *)__pa_nodebug(&boot_params); |
731 | start = p->hdr.ramdisk_image; | ||
732 | size = p->hdr.ramdisk_size; | 728 | size = p->hdr.ramdisk_size; |
733 | 729 | ||
734 | _load_ucode_intel_bsp( | 730 | /* |
735 | (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), | 731 | * Set start only if we have an initrd image. We cannot use initrd_start |
736 | (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), | 732 | * because it is not set that early yet. |
737 | start, size); | 733 | */ |
734 | start = (size ? p->hdr.ramdisk_image : 0); | ||
735 | |||
736 | _load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data), | ||
737 | (unsigned long *)__pa_nodebug(&mc_tmp_ptrs), | ||
738 | start, size); | ||
738 | #else | 739 | #else |
739 | start = boot_params.hdr.ramdisk_image + PAGE_OFFSET; | ||
740 | size = boot_params.hdr.ramdisk_size; | 740 | size = boot_params.hdr.ramdisk_size; |
741 | start = (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0); | ||
741 | 742 | ||
742 | _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size); | 743 | _load_ucode_intel_bsp(&mc_saved_data, mc_tmp_ptrs, start, size); |
743 | #endif | 744 | #endif |
744 | } | 745 | } |
745 | 746 | ||
746 | void load_ucode_intel_ap(void) | 747 | void load_ucode_intel_ap(void) |
747 | { | 748 | { |
748 | struct mc_saved_data *mc_saved_data_p; | 749 | unsigned long *mcs_tmp_p; |
750 | struct mc_saved_data *mcs_p; | ||
749 | struct ucode_cpu_info uci; | 751 | struct ucode_cpu_info uci; |
750 | unsigned long *mc_saved_in_initrd_p; | ||
751 | unsigned long initrd_start_addr; | ||
752 | enum ucode_state ret; | 752 | enum ucode_state ret; |
753 | #ifdef CONFIG_X86_32 | 753 | #ifdef CONFIG_X86_32 |
754 | unsigned long *initrd_start_p; | ||
755 | 754 | ||
756 | mc_saved_in_initrd_p = | 755 | mcs_tmp_p = (unsigned long *)__pa_nodebug(mc_tmp_ptrs); |
757 | (unsigned long *)__pa_nodebug(mc_saved_in_initrd); | 756 | mcs_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); |
758 | mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); | ||
759 | initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); | ||
760 | initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p); | ||
761 | #else | 757 | #else |
762 | mc_saved_data_p = &mc_saved_data; | 758 | mcs_tmp_p = mc_tmp_ptrs; |
763 | mc_saved_in_initrd_p = mc_saved_in_initrd; | 759 | mcs_p = &mc_saved_data; |
764 | initrd_start_addr = initrd_start; | ||
765 | #endif | 760 | #endif |
766 | 761 | ||
767 | /* | 762 | /* |
768 | * If there is no valid ucode previously saved in memory, no need to | 763 | * If there is no valid ucode previously saved in memory, no need to |
769 | * update ucode on this AP. | 764 | * update ucode on this AP. |
770 | */ | 765 | */ |
771 | if (mc_saved_data_p->mc_saved_count == 0) | 766 | if (!mcs_p->num_saved) |
772 | return; | 767 | return; |
773 | 768 | ||
774 | collect_cpu_info_early(&uci); | 769 | collect_cpu_info_early(&uci); |
775 | ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, | 770 | ret = load_microcode(mcs_p, mcs_tmp_p, get_initrd_start_addr(), &uci); |
776 | initrd_start_addr, &uci); | ||
777 | |||
778 | if (ret != UCODE_OK) | 771 | if (ret != UCODE_OK) |
779 | return; | 772 | return; |
780 | 773 | ||
@@ -786,13 +779,13 @@ void reload_ucode_intel(void) | |||
786 | struct ucode_cpu_info uci; | 779 | struct ucode_cpu_info uci; |
787 | enum ucode_state ret; | 780 | enum ucode_state ret; |
788 | 781 | ||
789 | if (!mc_saved_data.mc_saved_count) | 782 | if (!mc_saved_data.num_saved) |
790 | return; | 783 | return; |
791 | 784 | ||
792 | collect_cpu_info_early(&uci); | 785 | collect_cpu_info_early(&uci); |
793 | 786 | ||
794 | ret = load_microcode_early(mc_saved_data.mc_saved, | 787 | ret = load_microcode_early(mc_saved_data.mc_saved, |
795 | mc_saved_data.mc_saved_count, &uci); | 788 | mc_saved_data.num_saved, &uci); |
796 | if (ret != UCODE_OK) | 789 | if (ret != UCODE_OK) |
797 | return; | 790 | return; |
798 | 791 | ||
@@ -825,7 +818,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | |||
825 | * return 0 - no update found | 818 | * return 0 - no update found |
826 | * return 1 - found update | 819 | * return 1 - found update |
827 | */ | 820 | */ |
828 | static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) | 821 | static int get_matching_mc(struct microcode_intel *mc, int cpu) |
829 | { | 822 | { |
830 | struct cpu_signature cpu_sig; | 823 | struct cpu_signature cpu_sig; |
831 | unsigned int csig, cpf, crev; | 824 | unsigned int csig, cpf, crev; |
@@ -836,39 +829,36 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) | |||
836 | cpf = cpu_sig.pf; | 829 | cpf = cpu_sig.pf; |
837 | crev = cpu_sig.rev; | 830 | crev = cpu_sig.rev; |
838 | 831 | ||
839 | return has_newer_microcode(mc_intel, csig, cpf, crev); | 832 | return has_newer_microcode(mc, csig, cpf, crev); |
840 | } | 833 | } |
841 | 834 | ||
842 | static int apply_microcode_intel(int cpu) | 835 | static int apply_microcode_intel(int cpu) |
843 | { | 836 | { |
844 | struct microcode_intel *mc_intel; | 837 | struct microcode_intel *mc; |
845 | struct ucode_cpu_info *uci; | 838 | struct ucode_cpu_info *uci; |
839 | struct cpuinfo_x86 *c; | ||
846 | unsigned int val[2]; | 840 | unsigned int val[2]; |
847 | int cpu_num = raw_smp_processor_id(); | ||
848 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); | ||
849 | |||
850 | uci = ucode_cpu_info + cpu; | ||
851 | mc_intel = uci->mc; | ||
852 | 841 | ||
853 | /* We should bind the task to the CPU */ | 842 | /* We should bind the task to the CPU */ |
854 | BUG_ON(cpu_num != cpu); | 843 | if (WARN_ON(raw_smp_processor_id() != cpu)) |
844 | return -1; | ||
855 | 845 | ||
856 | if (mc_intel == NULL) | 846 | uci = ucode_cpu_info + cpu; |
847 | mc = uci->mc; | ||
848 | if (!mc) | ||
857 | return 0; | 849 | return 0; |
858 | 850 | ||
859 | /* | 851 | /* |
860 | * Microcode on this CPU could be updated earlier. Only apply the | 852 | * Microcode on this CPU could be updated earlier. Only apply the |
861 | * microcode patch in mc_intel when it is newer than the one on this | 853 | * microcode patch in mc when it is newer than the one on this |
862 | * CPU. | 854 | * CPU. |
863 | */ | 855 | */ |
864 | if (get_matching_mc(mc_intel, cpu) == 0) | 856 | if (!get_matching_mc(mc, cpu)) |
865 | return 0; | 857 | return 0; |
866 | 858 | ||
867 | /* write microcode via MSR 0x79 */ | 859 | /* write microcode via MSR 0x79 */ |
868 | wrmsr(MSR_IA32_UCODE_WRITE, | 860 | wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); |
869 | (unsigned long) mc_intel->bits, | 861 | wrmsrl(MSR_IA32_UCODE_REV, 0); |
870 | (unsigned long) mc_intel->bits >> 16 >> 16); | ||
871 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
872 | 862 | ||
873 | /* As documented in the SDM: Do a CPUID 1 here */ | 863 | /* As documented in the SDM: Do a CPUID 1 here */ |
874 | sync_core(); | 864 | sync_core(); |
@@ -876,16 +866,19 @@ static int apply_microcode_intel(int cpu) | |||
876 | /* get the current revision from MSR 0x8B */ | 866 | /* get the current revision from MSR 0x8B */ |
877 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | 867 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); |
878 | 868 | ||
879 | if (val[1] != mc_intel->hdr.rev) { | 869 | if (val[1] != mc->hdr.rev) { |
880 | pr_err("CPU%d update to revision 0x%x failed\n", | 870 | pr_err("CPU%d update to revision 0x%x failed\n", |
881 | cpu_num, mc_intel->hdr.rev); | 871 | cpu, mc->hdr.rev); |
882 | return -1; | 872 | return -1; |
883 | } | 873 | } |
874 | |||
884 | pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n", | 875 | pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n", |
885 | cpu_num, val[1], | 876 | cpu, val[1], |
886 | mc_intel->hdr.date & 0xffff, | 877 | mc->hdr.date & 0xffff, |
887 | mc_intel->hdr.date >> 24, | 878 | mc->hdr.date >> 24, |
888 | (mc_intel->hdr.date >> 16) & 0xff); | 879 | (mc->hdr.date >> 16) & 0xff); |
880 | |||
881 | c = &cpu_data(cpu); | ||
889 | 882 | ||
890 | uci->cpu_sig.rev = val[1]; | 883 | uci->cpu_sig.rev = val[1]; |
891 | c->microcode = val[1]; | 884 | c->microcode = val[1]; |
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c index b96896bcbdaf..2ce1a7dc45b7 100644 --- a/arch/x86/kernel/cpu/microcode/intel_lib.c +++ b/arch/x86/kernel/cpu/microcode/intel_lib.c | |||
@@ -49,7 +49,7 @@ int microcode_sanity_check(void *mc, int print_err) | |||
49 | unsigned long total_size, data_size, ext_table_size; | 49 | unsigned long total_size, data_size, ext_table_size; |
50 | struct microcode_header_intel *mc_header = mc; | 50 | struct microcode_header_intel *mc_header = mc; |
51 | struct extended_sigtable *ext_header = NULL; | 51 | struct extended_sigtable *ext_header = NULL; |
52 | int sum, orig_sum, ext_sigcount = 0, i; | 52 | u32 sum, orig_sum, ext_sigcount = 0, i; |
53 | struct extended_signature *ext_sig; | 53 | struct extended_signature *ext_sig; |
54 | 54 | ||
55 | total_size = get_totalsize(mc_header); | 55 | total_size = get_totalsize(mc_header); |
@@ -57,69 +57,85 @@ int microcode_sanity_check(void *mc, int print_err) | |||
57 | 57 | ||
58 | if (data_size + MC_HEADER_SIZE > total_size) { | 58 | if (data_size + MC_HEADER_SIZE > total_size) { |
59 | if (print_err) | 59 | if (print_err) |
60 | pr_err("error! Bad data size in microcode data file\n"); | 60 | pr_err("Error: bad microcode data file size.\n"); |
61 | return -EINVAL; | 61 | return -EINVAL; |
62 | } | 62 | } |
63 | 63 | ||
64 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { | 64 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { |
65 | if (print_err) | 65 | if (print_err) |
66 | pr_err("error! Unknown microcode update format\n"); | 66 | pr_err("Error: invalid/unknown microcode update format.\n"); |
67 | return -EINVAL; | 67 | return -EINVAL; |
68 | } | 68 | } |
69 | |||
69 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | 70 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); |
70 | if (ext_table_size) { | 71 | if (ext_table_size) { |
72 | u32 ext_table_sum = 0; | ||
73 | u32 *ext_tablep; | ||
74 | |||
71 | if ((ext_table_size < EXT_HEADER_SIZE) | 75 | if ((ext_table_size < EXT_HEADER_SIZE) |
72 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { | 76 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { |
73 | if (print_err) | 77 | if (print_err) |
74 | pr_err("error! Small exttable size in microcode data file\n"); | 78 | pr_err("Error: truncated extended signature table.\n"); |
75 | return -EINVAL; | 79 | return -EINVAL; |
76 | } | 80 | } |
81 | |||
77 | ext_header = mc + MC_HEADER_SIZE + data_size; | 82 | ext_header = mc + MC_HEADER_SIZE + data_size; |
78 | if (ext_table_size != exttable_size(ext_header)) { | 83 | if (ext_table_size != exttable_size(ext_header)) { |
79 | if (print_err) | 84 | if (print_err) |
80 | pr_err("error! Bad exttable size in microcode data file\n"); | 85 | pr_err("Error: extended signature table size mismatch.\n"); |
81 | return -EFAULT; | 86 | return -EFAULT; |
82 | } | 87 | } |
88 | |||
83 | ext_sigcount = ext_header->count; | 89 | ext_sigcount = ext_header->count; |
84 | } | ||
85 | 90 | ||
86 | /* check extended table checksum */ | 91 | /* |
87 | if (ext_table_size) { | 92 | * Check extended table checksum: the sum of all dwords that |
88 | int ext_table_sum = 0; | 93 | * comprise a valid table must be 0. |
89 | int *ext_tablep = (int *)ext_header; | 94 | */ |
95 | ext_tablep = (u32 *)ext_header; | ||
90 | 96 | ||
91 | i = ext_table_size / DWSIZE; | 97 | i = ext_table_size / sizeof(u32); |
92 | while (i--) | 98 | while (i--) |
93 | ext_table_sum += ext_tablep[i]; | 99 | ext_table_sum += ext_tablep[i]; |
100 | |||
94 | if (ext_table_sum) { | 101 | if (ext_table_sum) { |
95 | if (print_err) | 102 | if (print_err) |
96 | pr_warn("aborting, bad extended signature table checksum\n"); | 103 | pr_warn("Bad extended signature table checksum, aborting.\n"); |
97 | return -EINVAL; | 104 | return -EINVAL; |
98 | } | 105 | } |
99 | } | 106 | } |
100 | 107 | ||
101 | /* calculate the checksum */ | 108 | /* |
109 | * Calculate the checksum of update data and header. The checksum of | ||
110 | * valid update data and header including the extended signature table | ||
111 | * must be 0. | ||
112 | */ | ||
102 | orig_sum = 0; | 113 | orig_sum = 0; |
103 | i = (MC_HEADER_SIZE + data_size) / DWSIZE; | 114 | i = (MC_HEADER_SIZE + data_size) / sizeof(u32); |
104 | while (i--) | 115 | while (i--) |
105 | orig_sum += ((int *)mc)[i]; | 116 | orig_sum += ((u32 *)mc)[i]; |
117 | |||
106 | if (orig_sum) { | 118 | if (orig_sum) { |
107 | if (print_err) | 119 | if (print_err) |
108 | pr_err("aborting, bad checksum\n"); | 120 | pr_err("Bad microcode data checksum, aborting.\n"); |
109 | return -EINVAL; | 121 | return -EINVAL; |
110 | } | 122 | } |
123 | |||
111 | if (!ext_table_size) | 124 | if (!ext_table_size) |
112 | return 0; | 125 | return 0; |
113 | /* check extended signature checksum */ | 126 | |
127 | /* | ||
128 | * Check extended signature checksum: 0 => valid. | ||
129 | */ | ||
114 | for (i = 0; i < ext_sigcount; i++) { | 130 | for (i = 0; i < ext_sigcount; i++) { |
115 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE + | 131 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE + |
116 | EXT_SIGNATURE_SIZE * i; | 132 | EXT_SIGNATURE_SIZE * i; |
117 | sum = orig_sum | 133 | |
118 | - (mc_header->sig + mc_header->pf + mc_header->cksum) | 134 | sum = (mc_header->sig + mc_header->pf + mc_header->cksum) - |
119 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); | 135 | (ext_sig->sig + ext_sig->pf + ext_sig->cksum); |
120 | if (sum) { | 136 | if (sum) { |
121 | if (print_err) | 137 | if (print_err) |
122 | pr_err("aborting, bad checksum\n"); | 138 | pr_err("Bad extended signature checksum, aborting.\n"); |
123 | return -EINVAL; | 139 | return -EINVAL; |
124 | } | 140 | } |
125 | } | 141 | } |
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 3f20710a5b23..6988c74409a8 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh | |||
@@ -1,6 +1,6 @@ | |||
1 | #!/bin/sh | 1 | #!/bin/sh |
2 | # | 2 | # |
3 | # Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h | 3 | # Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h |
4 | # | 4 | # |
5 | 5 | ||
6 | IN=$1 | 6 | IN=$1 |
@@ -49,8 +49,8 @@ dump_array() | |||
49 | trap 'rm "$OUT"' EXIT | 49 | trap 'rm "$OUT"' EXIT |
50 | 50 | ||
51 | ( | 51 | ( |
52 | echo "#ifndef _ASM_X86_CPUFEATURE_H" | 52 | echo "#ifndef _ASM_X86_CPUFEATURES_H" |
53 | echo "#include <asm/cpufeature.h>" | 53 | echo "#include <asm/cpufeatures.h>" |
54 | echo "#endif" | 54 | echo "#endif" |
55 | echo "" | 55 | echo "" |
56 | 56 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index ba80d68f683e..10f8d4796240 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -47,7 +47,7 @@ | |||
47 | #include <linux/smp.h> | 47 | #include <linux/smp.h> |
48 | #include <linux/syscore_ops.h> | 48 | #include <linux/syscore_ops.h> |
49 | 49 | ||
50 | #include <asm/processor.h> | 50 | #include <asm/cpufeature.h> |
51 | #include <asm/e820.h> | 51 | #include <asm/e820.h> |
52 | #include <asm/mtrr.h> | 52 | #include <asm/mtrr.h> |
53 | #include <asm/msr.h> | 53 | #include <asm/msr.h> |
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index e3b4d1841175..34178564be2a 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c | |||
@@ -1,6 +1,6 @@ | |||
1 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
2 | #include <linux/mm.h> | 2 | #include <linux/mm.h> |
3 | #include <asm/processor.h> | 3 | #include <asm/cpufeature.h> |
4 | #include <asm/msr.h> | 4 | #include <asm/msr.h> |
5 | #include "cpu.h" | 5 | #include "cpu.h" |
6 | 6 | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 837365f10912..621b501f8935 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/e820.h> | 24 | #include <asm/e820.h> |
25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
26 | #include <asm/setup.h> | 26 | #include <asm/setup.h> |
27 | #include <asm/cpufeature.h> | ||
27 | 28 | ||
28 | /* | 29 | /* |
29 | * The e820 map is the map that gets modified e.g. with command line parameters | 30 | * The e820 map is the map that gets modified e.g. with command line parameters |
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d5804adfa6da..0b1b9abd4d5f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c | |||
@@ -114,6 +114,10 @@ void __kernel_fpu_begin(void) | |||
114 | kernel_fpu_disable(); | 114 | kernel_fpu_disable(); |
115 | 115 | ||
116 | if (fpu->fpregs_active) { | 116 | if (fpu->fpregs_active) { |
117 | /* | ||
118 | * Ignore return value -- we don't care if reg state | ||
119 | * is clobbered. | ||
120 | */ | ||
117 | copy_fpregs_to_fpstate(fpu); | 121 | copy_fpregs_to_fpstate(fpu); |
118 | } else { | 122 | } else { |
119 | this_cpu_write(fpu_fpregs_owner_ctx, NULL); | 123 | this_cpu_write(fpu_fpregs_owner_ctx, NULL); |
@@ -189,8 +193,12 @@ void fpu__save(struct fpu *fpu) | |||
189 | 193 | ||
190 | preempt_disable(); | 194 | preempt_disable(); |
191 | if (fpu->fpregs_active) { | 195 | if (fpu->fpregs_active) { |
192 | if (!copy_fpregs_to_fpstate(fpu)) | 196 | if (!copy_fpregs_to_fpstate(fpu)) { |
193 | fpregs_deactivate(fpu); | 197 | if (use_eager_fpu()) |
198 | copy_kernel_to_fpregs(&fpu->state); | ||
199 | else | ||
200 | fpregs_deactivate(fpu); | ||
201 | } | ||
194 | } | 202 | } |
195 | preempt_enable(); | 203 | preempt_enable(); |
196 | } | 204 | } |
@@ -223,14 +231,15 @@ void fpstate_init(union fpregs_state *state) | |||
223 | } | 231 | } |
224 | EXPORT_SYMBOL_GPL(fpstate_init); | 232 | EXPORT_SYMBOL_GPL(fpstate_init); |
225 | 233 | ||
226 | /* | 234 | int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) |
227 | * Copy the current task's FPU state to a new task's FPU context. | ||
228 | * | ||
229 | * In both the 'eager' and the 'lazy' case we save hardware registers | ||
230 | * directly to the destination buffer. | ||
231 | */ | ||
232 | static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) | ||
233 | { | 235 | { |
236 | dst_fpu->counter = 0; | ||
237 | dst_fpu->fpregs_active = 0; | ||
238 | dst_fpu->last_cpu = -1; | ||
239 | |||
240 | if (!src_fpu->fpstate_active || !cpu_has_fpu) | ||
241 | return 0; | ||
242 | |||
234 | WARN_ON_FPU(src_fpu != ¤t->thread.fpu); | 243 | WARN_ON_FPU(src_fpu != ¤t->thread.fpu); |
235 | 244 | ||
236 | /* | 245 | /* |
@@ -243,10 +252,9 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) | |||
243 | /* | 252 | /* |
244 | * Save current FPU registers directly into the child | 253 | * Save current FPU registers directly into the child |
245 | * FPU context, without any memory-to-memory copying. | 254 | * FPU context, without any memory-to-memory copying. |
246 | * | 255 | * In lazy mode, if the FPU context isn't loaded into |
247 | * If the FPU context got destroyed in the process (FNSAVE | 256 | * fpregs, CR0.TS will be set and do_device_not_available |
248 | * done on old CPUs) then copy it back into the source | 257 | * will load the FPU context. |
249 | * context and mark the current task for lazy restore. | ||
250 | * | 258 | * |
251 | * We have to do all this with preemption disabled, | 259 | * We have to do all this with preemption disabled, |
252 | * mostly because of the FNSAVE case, because in that | 260 | * mostly because of the FNSAVE case, because in that |
@@ -259,19 +267,13 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) | |||
259 | preempt_disable(); | 267 | preempt_disable(); |
260 | if (!copy_fpregs_to_fpstate(dst_fpu)) { | 268 | if (!copy_fpregs_to_fpstate(dst_fpu)) { |
261 | memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); | 269 | memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); |
262 | fpregs_deactivate(src_fpu); | 270 | |
271 | if (use_eager_fpu()) | ||
272 | copy_kernel_to_fpregs(&src_fpu->state); | ||
273 | else | ||
274 | fpregs_deactivate(src_fpu); | ||
263 | } | 275 | } |
264 | preempt_enable(); | 276 | preempt_enable(); |
265 | } | ||
266 | |||
267 | int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) | ||
268 | { | ||
269 | dst_fpu->counter = 0; | ||
270 | dst_fpu->fpregs_active = 0; | ||
271 | dst_fpu->last_cpu = -1; | ||
272 | |||
273 | if (src_fpu->fpstate_active && cpu_has_fpu) | ||
274 | fpu_copy(dst_fpu, src_fpu); | ||
275 | 277 | ||
276 | return 0; | 278 | return 0; |
277 | } | 279 | } |
@@ -425,7 +427,7 @@ void fpu__clear(struct fpu *fpu) | |||
425 | { | 427 | { |
426 | WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ | 428 | WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ |
427 | 429 | ||
428 | if (!use_eager_fpu()) { | 430 | if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { |
429 | /* FPU state will be reallocated lazily at the first use. */ | 431 | /* FPU state will be reallocated lazily at the first use. */ |
430 | fpu__drop(fpu); | 432 | fpu__drop(fpu); |
431 | } else { | 433 | } else { |
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index bd08fb77073d..54c86fffbf9f 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c | |||
@@ -262,7 +262,10 @@ static void __init fpu__init_system_xstate_size_legacy(void) | |||
262 | * not only saved the restores along the way, but we also have the | 262 | * not only saved the restores along the way, but we also have the |
263 | * FPU ready to be used for the original task. | 263 | * FPU ready to be used for the original task. |
264 | * | 264 | * |
265 | * 'eager' switching is used on modern CPUs, there we switch the FPU | 265 | * 'lazy' is deprecated because it's almost never a performance win |
266 | * and it's much more complicated than 'eager'. | ||
267 | * | ||
268 | * 'eager' switching is by default on all CPUs, there we switch the FPU | ||
266 | * state during every context switch, regardless of whether the task | 269 | * state during every context switch, regardless of whether the task |
267 | * has used FPU instructions in that time slice or not. This is done | 270 | * has used FPU instructions in that time slice or not. This is done |
268 | * because modern FPU context saving instructions are able to optimize | 271 | * because modern FPU context saving instructions are able to optimize |
@@ -273,7 +276,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) | |||
273 | * to use 'eager' restores, if we detect that a task is using the FPU | 276 | * to use 'eager' restores, if we detect that a task is using the FPU |
274 | * frequently. See the fpu->counter logic in fpu/internal.h for that. ] | 277 | * frequently. See the fpu->counter logic in fpu/internal.h for that. ] |
275 | */ | 278 | */ |
276 | static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; | 279 | static enum { ENABLE, DISABLE } eagerfpu = ENABLE; |
277 | 280 | ||
278 | /* | 281 | /* |
279 | * Find supported xfeatures based on cpu features and command-line input. | 282 | * Find supported xfeatures based on cpu features and command-line input. |
@@ -344,15 +347,9 @@ static void __init fpu__init_system_ctx_switch(void) | |||
344 | */ | 347 | */ |
345 | static void __init fpu__init_parse_early_param(void) | 348 | static void __init fpu__init_parse_early_param(void) |
346 | { | 349 | { |
347 | /* | ||
348 | * No need to check "eagerfpu=auto" again, since it is the | ||
349 | * initial default. | ||
350 | */ | ||
351 | if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { | 350 | if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { |
352 | eagerfpu = DISABLE; | 351 | eagerfpu = DISABLE; |
353 | fpu__clear_eager_fpu_features(); | 352 | fpu__clear_eager_fpu_features(); |
354 | } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) { | ||
355 | eagerfpu = ENABLE; | ||
356 | } | 353 | } |
357 | 354 | ||
358 | if (cmdline_find_option_bool(boot_command_line, "no387")) | 355 | if (cmdline_find_option_bool(boot_command_line, "no387")) |
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d425cda5ae6d..6e8354f5a593 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c | |||
@@ -51,6 +51,9 @@ void fpu__xstate_clear_all_cpu_caps(void) | |||
51 | setup_clear_cpu_cap(X86_FEATURE_AVX512PF); | 51 | setup_clear_cpu_cap(X86_FEATURE_AVX512PF); |
52 | setup_clear_cpu_cap(X86_FEATURE_AVX512ER); | 52 | setup_clear_cpu_cap(X86_FEATURE_AVX512ER); |
53 | setup_clear_cpu_cap(X86_FEATURE_AVX512CD); | 53 | setup_clear_cpu_cap(X86_FEATURE_AVX512CD); |
54 | setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); | ||
55 | setup_clear_cpu_cap(X86_FEATURE_AVX512BW); | ||
56 | setup_clear_cpu_cap(X86_FEATURE_AVX512VL); | ||
54 | setup_clear_cpu_cap(X86_FEATURE_MPX); | 57 | setup_clear_cpu_cap(X86_FEATURE_MPX); |
55 | setup_clear_cpu_cap(X86_FEATURE_XGETBV1); | 58 | setup_clear_cpu_cap(X86_FEATURE_XGETBV1); |
56 | } | 59 | } |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 05c9e3f5b6d7..702547ce33c9 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -697,9 +697,8 @@ static inline void tramp_free(void *tramp) { } | |||
697 | #endif | 697 | #endif |
698 | 698 | ||
699 | /* Defined as markers to the end of the ftrace default trampolines */ | 699 | /* Defined as markers to the end of the ftrace default trampolines */ |
700 | extern void ftrace_caller_end(void); | ||
701 | extern void ftrace_regs_caller_end(void); | 700 | extern void ftrace_regs_caller_end(void); |
702 | extern void ftrace_return(void); | 701 | extern void ftrace_epilogue(void); |
703 | extern void ftrace_caller_op_ptr(void); | 702 | extern void ftrace_caller_op_ptr(void); |
704 | extern void ftrace_regs_caller_op_ptr(void); | 703 | extern void ftrace_regs_caller_op_ptr(void); |
705 | 704 | ||
@@ -746,7 +745,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) | |||
746 | op_offset = (unsigned long)ftrace_regs_caller_op_ptr; | 745 | op_offset = (unsigned long)ftrace_regs_caller_op_ptr; |
747 | } else { | 746 | } else { |
748 | start_offset = (unsigned long)ftrace_caller; | 747 | start_offset = (unsigned long)ftrace_caller; |
749 | end_offset = (unsigned long)ftrace_caller_end; | 748 | end_offset = (unsigned long)ftrace_epilogue; |
750 | op_offset = (unsigned long)ftrace_caller_op_ptr; | 749 | op_offset = (unsigned long)ftrace_caller_op_ptr; |
751 | } | 750 | } |
752 | 751 | ||
@@ -754,7 +753,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) | |||
754 | 753 | ||
755 | /* | 754 | /* |
756 | * Allocate enough size to store the ftrace_caller code, | 755 | * Allocate enough size to store the ftrace_caller code, |
757 | * the jmp to ftrace_return, as well as the address of | 756 | * the jmp to ftrace_epilogue, as well as the address of |
758 | * the ftrace_ops this trampoline is used for. | 757 | * the ftrace_ops this trampoline is used for. |
759 | */ | 758 | */ |
760 | trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *)); | 759 | trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *)); |
@@ -772,8 +771,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) | |||
772 | 771 | ||
773 | ip = (unsigned long)trampoline + size; | 772 | ip = (unsigned long)trampoline + size; |
774 | 773 | ||
775 | /* The trampoline ends with a jmp to ftrace_return */ | 774 | /* The trampoline ends with a jmp to ftrace_epilogue */ |
776 | jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return); | 775 | jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue); |
777 | memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE); | 776 | memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE); |
778 | 777 | ||
779 | /* | 778 | /* |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2c0f3407bd1f..1f4422d5c8d0 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -40,13 +40,8 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); | |||
40 | /* Wipe all early page tables except for the kernel symbol map */ | 40 | /* Wipe all early page tables except for the kernel symbol map */ |
41 | static void __init reset_early_page_tables(void) | 41 | static void __init reset_early_page_tables(void) |
42 | { | 42 | { |
43 | unsigned long i; | 43 | memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1)); |
44 | |||
45 | for (i = 0; i < PTRS_PER_PGD-1; i++) | ||
46 | early_level4_pgt[i].pgd = 0; | ||
47 | |||
48 | next_early_pgt = 0; | 44 | next_early_pgt = 0; |
49 | |||
50 | write_cr3(__pa_nodebug(early_level4_pgt)); | 45 | write_cr3(__pa_nodebug(early_level4_pgt)); |
51 | } | 46 | } |
52 | 47 | ||
@@ -54,7 +49,6 @@ static void __init reset_early_page_tables(void) | |||
54 | int __init early_make_pgtable(unsigned long address) | 49 | int __init early_make_pgtable(unsigned long address) |
55 | { | 50 | { |
56 | unsigned long physaddr = address - __PAGE_OFFSET; | 51 | unsigned long physaddr = address - __PAGE_OFFSET; |
57 | unsigned long i; | ||
58 | pgdval_t pgd, *pgd_p; | 52 | pgdval_t pgd, *pgd_p; |
59 | pudval_t pud, *pud_p; | 53 | pudval_t pud, *pud_p; |
60 | pmdval_t pmd, *pmd_p; | 54 | pmdval_t pmd, *pmd_p; |
@@ -81,8 +75,7 @@ again: | |||
81 | } | 75 | } |
82 | 76 | ||
83 | pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; | 77 | pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; |
84 | for (i = 0; i < PTRS_PER_PUD; i++) | 78 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); |
85 | pud_p[i] = 0; | ||
86 | *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; | 79 | *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; |
87 | } | 80 | } |
88 | pud_p += pud_index(address); | 81 | pud_p += pud_index(address); |
@@ -97,8 +90,7 @@ again: | |||
97 | } | 90 | } |
98 | 91 | ||
99 | pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; | 92 | pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; |
100 | for (i = 0; i < PTRS_PER_PMD; i++) | 93 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); |
101 | pmd_p[i] = 0; | ||
102 | *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; | 94 | *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; |
103 | } | 95 | } |
104 | pmd = (physaddr & PMD_MASK) + early_pmd_flags; | 96 | pmd = (physaddr & PMD_MASK) + early_pmd_flags; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 6bc9ae24b6d2..54cdbd2003fe 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <asm/setup.h> | 19 | #include <asm/setup.h> |
20 | #include <asm/processor-flags.h> | 20 | #include <asm/processor-flags.h> |
21 | #include <asm/msr-index.h> | 21 | #include <asm/msr-index.h> |
22 | #include <asm/cpufeature.h> | 22 | #include <asm/cpufeatures.h> |
23 | #include <asm/percpu.h> | 23 | #include <asm/percpu.h> |
24 | #include <asm/nops.h> | 24 | #include <asm/nops.h> |
25 | #include <asm/bootparam.h> | 25 | #include <asm/bootparam.h> |
@@ -389,6 +389,12 @@ default_entry: | |||
389 | /* Make changes effective */ | 389 | /* Make changes effective */ |
390 | wrmsr | 390 | wrmsr |
391 | 391 | ||
392 | /* | ||
393 | * And make sure that all the mappings we set up have NX set from | ||
394 | * the beginning. | ||
395 | */ | ||
396 | orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4) | ||
397 | |||
392 | enable_paging: | 398 | enable_paging: |
393 | 399 | ||
394 | /* | 400 | /* |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ffdc0e860390..22fbf9df61bb 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -38,7 +38,6 @@ | |||
38 | #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) | 38 | #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) |
39 | 39 | ||
40 | L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) | 40 | L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) |
41 | L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET) | ||
42 | L4_START_KERNEL = pgd_index(__START_KERNEL_map) | 41 | L4_START_KERNEL = pgd_index(__START_KERNEL_map) |
43 | L3_START_KERNEL = pud_index(__START_KERNEL_map) | 42 | L3_START_KERNEL = pud_index(__START_KERNEL_map) |
44 | 43 | ||
@@ -76,9 +75,7 @@ startup_64: | |||
76 | subq $_text - __START_KERNEL_map, %rbp | 75 | subq $_text - __START_KERNEL_map, %rbp |
77 | 76 | ||
78 | /* Is the address not 2M aligned? */ | 77 | /* Is the address not 2M aligned? */ |
79 | movq %rbp, %rax | 78 | testl $~PMD_PAGE_MASK, %ebp |
80 | andl $~PMD_PAGE_MASK, %eax | ||
81 | testl %eax, %eax | ||
82 | jnz bad_address | 79 | jnz bad_address |
83 | 80 | ||
84 | /* | 81 | /* |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index b8e6ff5cd5d0..be0ebbb6d1d1 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/pm.h> | 12 | #include <linux/pm.h> |
13 | #include <linux/io.h> | 13 | #include <linux/io.h> |
14 | 14 | ||
15 | #include <asm/cpufeature.h> | ||
15 | #include <asm/irqdomain.h> | 16 | #include <asm/irqdomain.h> |
16 | #include <asm/fixmap.h> | 17 | #include <asm/fixmap.h> |
17 | #include <asm/hpet.h> | 18 | #include <asm/hpet.h> |
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 87e1762e2bca..ed48a9f465f8 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S | |||
@@ -168,12 +168,14 @@ GLOBAL(ftrace_call) | |||
168 | restore_mcount_regs | 168 | restore_mcount_regs |
169 | 169 | ||
170 | /* | 170 | /* |
171 | * The copied trampoline must call ftrace_return as it | 171 | * The copied trampoline must call ftrace_epilogue as it |
172 | * still may need to call the function graph tracer. | 172 | * still may need to call the function graph tracer. |
173 | * | ||
174 | * The code up to this label is copied into trampolines so | ||
175 | * think twice before adding any new code or changing the | ||
176 | * layout here. | ||
173 | */ | 177 | */ |
174 | GLOBAL(ftrace_caller_end) | 178 | GLOBAL(ftrace_epilogue) |
175 | |||
176 | GLOBAL(ftrace_return) | ||
177 | 179 | ||
178 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 180 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
179 | GLOBAL(ftrace_graph_call) | 181 | GLOBAL(ftrace_graph_call) |
@@ -244,14 +246,14 @@ GLOBAL(ftrace_regs_call) | |||
244 | popfq | 246 | popfq |
245 | 247 | ||
246 | /* | 248 | /* |
247 | * As this jmp to ftrace_return can be a short jump | 249 | * As this jmp to ftrace_epilogue can be a short jump |
248 | * it must not be copied into the trampoline. | 250 | * it must not be copied into the trampoline. |
249 | * The trampoline will add the code to jump | 251 | * The trampoline will add the code to jump |
250 | * to the return. | 252 | * to the return. |
251 | */ | 253 | */ |
252 | GLOBAL(ftrace_regs_caller_end) | 254 | GLOBAL(ftrace_regs_caller_end) |
253 | 255 | ||
254 | jmp ftrace_return | 256 | jmp ftrace_epilogue |
255 | 257 | ||
256 | END(ftrace_regs_caller) | 258 | END(ftrace_regs_caller) |
257 | 259 | ||
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 64f9616f93f1..7f3550acde1b 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -40,7 +40,7 @@ | |||
40 | #include <linux/uaccess.h> | 40 | #include <linux/uaccess.h> |
41 | #include <linux/gfp.h> | 41 | #include <linux/gfp.h> |
42 | 42 | ||
43 | #include <asm/processor.h> | 43 | #include <asm/cpufeature.h> |
44 | #include <asm/msr.h> | 44 | #include <asm/msr.h> |
45 | 45 | ||
46 | static struct class *msr_class; | 46 | static struct class *msr_class; |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9decee2bfdbe..2915d54e9dd5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -57,6 +57,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { | |||
57 | */ | 57 | */ |
58 | .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, | 58 | .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, |
59 | #endif | 59 | #endif |
60 | #ifdef CONFIG_X86_32 | ||
61 | .SYSENTER_stack_canary = STACK_END_MAGIC, | ||
62 | #endif | ||
60 | }; | 63 | }; |
61 | EXPORT_PER_CPU_SYMBOL(cpu_tss); | 64 | EXPORT_PER_CPU_SYMBOL(cpu_tss); |
62 | 65 | ||
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index cb6282c3638f..548ddf7d6fd2 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -61,7 +61,38 @@ | |||
61 | regs->seg = GET_SEG(seg) | 3; \ | 61 | regs->seg = GET_SEG(seg) | 3; \ |
62 | } while (0) | 62 | } while (0) |
63 | 63 | ||
64 | int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) | 64 | #ifdef CONFIG_X86_64 |
65 | /* | ||
66 | * If regs->ss will cause an IRET fault, change it. Otherwise leave it | ||
67 | * alone. Using this generally makes no sense unless | ||
68 | * user_64bit_mode(regs) would return true. | ||
69 | */ | ||
70 | static void force_valid_ss(struct pt_regs *regs) | ||
71 | { | ||
72 | u32 ar; | ||
73 | asm volatile ("lar %[old_ss], %[ar]\n\t" | ||
74 | "jz 1f\n\t" /* If invalid: */ | ||
75 | "xorl %[ar], %[ar]\n\t" /* set ar = 0 */ | ||
76 | "1:" | ||
77 | : [ar] "=r" (ar) | ||
78 | : [old_ss] "rm" ((u16)regs->ss)); | ||
79 | |||
80 | /* | ||
81 | * For a valid 64-bit user context, we need DPL 3, type | ||
82 | * read-write data or read-write exp-down data, and S and P | ||
83 | * set. We can't use VERW because VERW doesn't check the | ||
84 | * P bit. | ||
85 | */ | ||
86 | ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK; | ||
87 | if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) && | ||
88 | ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN)) | ||
89 | regs->ss = __USER_DS; | ||
90 | } | ||
91 | #endif | ||
92 | |||
93 | static int restore_sigcontext(struct pt_regs *regs, | ||
94 | struct sigcontext __user *sc, | ||
95 | unsigned long uc_flags) | ||
65 | { | 96 | { |
66 | unsigned long buf_val; | 97 | unsigned long buf_val; |
67 | void __user *buf; | 98 | void __user *buf; |
@@ -94,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) | |||
94 | COPY(r15); | 125 | COPY(r15); |
95 | #endif /* CONFIG_X86_64 */ | 126 | #endif /* CONFIG_X86_64 */ |
96 | 127 | ||
97 | #ifdef CONFIG_X86_32 | ||
98 | COPY_SEG_CPL3(cs); | 128 | COPY_SEG_CPL3(cs); |
99 | COPY_SEG_CPL3(ss); | 129 | COPY_SEG_CPL3(ss); |
100 | #else /* !CONFIG_X86_32 */ | 130 | |
101 | /* Kernel saves and restores only the CS segment register on signals, | 131 | #ifdef CONFIG_X86_64 |
102 | * which is the bare minimum needed to allow mixed 32/64-bit code. | 132 | /* |
103 | * App's signal handler can save/restore other segments if needed. */ | 133 | * Fix up SS if needed for the benefit of old DOSEMU and |
104 | COPY_SEG_CPL3(cs); | 134 | * CRIU. |
105 | #endif /* CONFIG_X86_32 */ | 135 | */ |
136 | if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && | ||
137 | user_64bit_mode(regs))) | ||
138 | force_valid_ss(regs); | ||
139 | #endif | ||
106 | 140 | ||
107 | get_user_ex(tmpflags, &sc->flags); | 141 | get_user_ex(tmpflags, &sc->flags); |
108 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | 142 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
@@ -165,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
165 | put_user_ex(regs->cs, &sc->cs); | 199 | put_user_ex(regs->cs, &sc->cs); |
166 | put_user_ex(0, &sc->gs); | 200 | put_user_ex(0, &sc->gs); |
167 | put_user_ex(0, &sc->fs); | 201 | put_user_ex(0, &sc->fs); |
202 | put_user_ex(regs->ss, &sc->ss); | ||
168 | #endif /* CONFIG_X86_32 */ | 203 | #endif /* CONFIG_X86_32 */ |
169 | 204 | ||
170 | put_user_ex(fpstate, &sc->fpstate); | 205 | put_user_ex(fpstate, &sc->fpstate); |
@@ -403,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, | |||
403 | return 0; | 438 | return 0; |
404 | } | 439 | } |
405 | #else /* !CONFIG_X86_32 */ | 440 | #else /* !CONFIG_X86_32 */ |
441 | static unsigned long frame_uc_flags(struct pt_regs *regs) | ||
442 | { | ||
443 | unsigned long flags; | ||
444 | |||
445 | if (cpu_has_xsave) | ||
446 | flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS; | ||
447 | else | ||
448 | flags = UC_SIGCONTEXT_SS; | ||
449 | |||
450 | if (likely(user_64bit_mode(regs))) | ||
451 | flags |= UC_STRICT_RESTORE_SS; | ||
452 | |||
453 | return flags; | ||
454 | } | ||
455 | |||
406 | static int __setup_rt_frame(int sig, struct ksignal *ksig, | 456 | static int __setup_rt_frame(int sig, struct ksignal *ksig, |
407 | sigset_t *set, struct pt_regs *regs) | 457 | sigset_t *set, struct pt_regs *regs) |
408 | { | 458 | { |
@@ -422,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, | |||
422 | 472 | ||
423 | put_user_try { | 473 | put_user_try { |
424 | /* Create the ucontext. */ | 474 | /* Create the ucontext. */ |
425 | if (cpu_has_xsave) | 475 | put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); |
426 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
427 | else | ||
428 | put_user_ex(0, &frame->uc.uc_flags); | ||
429 | put_user_ex(0, &frame->uc.uc_link); | 476 | put_user_ex(0, &frame->uc.uc_link); |
430 | save_altstack_ex(&frame->uc.uc_stack, regs->sp); | 477 | save_altstack_ex(&frame->uc.uc_stack, regs->sp); |
431 | 478 | ||
@@ -459,10 +506,28 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, | |||
459 | 506 | ||
460 | regs->sp = (unsigned long)frame; | 507 | regs->sp = (unsigned long)frame; |
461 | 508 | ||
462 | /* Set up the CS register to run signal handlers in 64-bit mode, | 509 | /* |
463 | even if the handler happens to be interrupting 32-bit code. */ | 510 | * Set up the CS and SS registers to run signal handlers in |
511 | * 64-bit mode, even if the handler happens to be interrupting | ||
512 | * 32-bit or 16-bit code. | ||
513 | * | ||
514 | * SS is subtle. In 64-bit mode, we don't need any particular | ||
515 | * SS descriptor, but we do need SS to be valid. It's possible | ||
516 | * that the old SS is entirely bogus -- this can happen if the | ||
517 | * signal we're trying to deliver is #GP or #SS caused by a bad | ||
518 | * SS value. We also have a compatbility issue here: DOSEMU | ||
519 | * relies on the contents of the SS register indicating the | ||
520 | * SS value at the time of the signal, even though that code in | ||
521 | * DOSEMU predates sigreturn's ability to restore SS. (DOSEMU | ||
522 | * avoids relying on sigreturn to restore SS; instead it uses | ||
523 | * a trampoline.) So we do our best: if the old SS was valid, | ||
524 | * we keep it. Otherwise we replace it. | ||
525 | */ | ||
464 | regs->cs = __USER_CS; | 526 | regs->cs = __USER_CS; |
465 | 527 | ||
528 | if (unlikely(regs->ss != __USER_DS)) | ||
529 | force_valid_ss(regs); | ||
530 | |||
466 | return 0; | 531 | return 0; |
467 | } | 532 | } |
468 | #endif /* CONFIG_X86_32 */ | 533 | #endif /* CONFIG_X86_32 */ |
@@ -489,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, | |||
489 | 554 | ||
490 | put_user_try { | 555 | put_user_try { |
491 | /* Create the ucontext. */ | 556 | /* Create the ucontext. */ |
492 | if (cpu_has_xsave) | 557 | put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); |
493 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
494 | else | ||
495 | put_user_ex(0, &frame->uc.uc_flags); | ||
496 | put_user_ex(0, &frame->uc.uc_link); | 558 | put_user_ex(0, &frame->uc.uc_link); |
497 | compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); | 559 | compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); |
498 | put_user_ex(0, &frame->uc.uc__pad0); | 560 | put_user_ex(0, &frame->uc.uc__pad0); |
@@ -554,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void) | |||
554 | 616 | ||
555 | set_current_blocked(&set); | 617 | set_current_blocked(&set); |
556 | 618 | ||
557 | if (restore_sigcontext(regs, &frame->sc)) | 619 | /* |
620 | * x86_32 has no uc_flags bits relevant to restore_sigcontext. | ||
621 | * Save a few cycles by skipping the __get_user. | ||
622 | */ | ||
623 | if (restore_sigcontext(regs, &frame->sc, 0)) | ||
558 | goto badframe; | 624 | goto badframe; |
559 | return regs->ax; | 625 | return regs->ax; |
560 | 626 | ||
@@ -570,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void) | |||
570 | struct pt_regs *regs = current_pt_regs(); | 636 | struct pt_regs *regs = current_pt_regs(); |
571 | struct rt_sigframe __user *frame; | 637 | struct rt_sigframe __user *frame; |
572 | sigset_t set; | 638 | sigset_t set; |
639 | unsigned long uc_flags; | ||
573 | 640 | ||
574 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | 641 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); |
575 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | 642 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) |
576 | goto badframe; | 643 | goto badframe; |
577 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | 644 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) |
578 | goto badframe; | 645 | goto badframe; |
646 | if (__get_user(uc_flags, &frame->uc.uc_flags)) | ||
647 | goto badframe; | ||
579 | 648 | ||
580 | set_current_blocked(&set); | 649 | set_current_blocked(&set); |
581 | 650 | ||
582 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) | 651 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) |
583 | goto badframe; | 652 | goto badframe; |
584 | 653 | ||
585 | if (restore_altstack(&frame->uc.uc_stack)) | 654 | if (restore_altstack(&frame->uc.uc_stack)) |
@@ -692,12 +761,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) | |||
692 | 761 | ||
693 | static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) | 762 | static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) |
694 | { | 763 | { |
695 | #if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64) | 764 | #ifdef CONFIG_X86_64 |
765 | if (is_ia32_task()) | ||
766 | return __NR_ia32_restart_syscall; | ||
767 | #endif | ||
768 | #ifdef CONFIG_X86_X32_ABI | ||
769 | return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); | ||
770 | #else | ||
696 | return __NR_restart_syscall; | 771 | return __NR_restart_syscall; |
697 | #else /* !CONFIG_X86_32 && CONFIG_X86_64 */ | 772 | #endif |
698 | return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : | ||
699 | __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); | ||
700 | #endif /* CONFIG_X86_32 || !CONFIG_X86_64 */ | ||
701 | } | 773 | } |
702 | 774 | ||
703 | /* | 775 | /* |
@@ -763,6 +835,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void) | |||
763 | struct pt_regs *regs = current_pt_regs(); | 835 | struct pt_regs *regs = current_pt_regs(); |
764 | struct rt_sigframe_x32 __user *frame; | 836 | struct rt_sigframe_x32 __user *frame; |
765 | sigset_t set; | 837 | sigset_t set; |
838 | unsigned long uc_flags; | ||
766 | 839 | ||
767 | frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); | 840 | frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); |
768 | 841 | ||
@@ -770,10 +843,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void) | |||
770 | goto badframe; | 843 | goto badframe; |
771 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | 844 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) |
772 | goto badframe; | 845 | goto badframe; |
846 | if (__get_user(uc_flags, &frame->uc.uc_flags)) | ||
847 | goto badframe; | ||
773 | 848 | ||
774 | set_current_blocked(&set); | 849 | set_current_blocked(&set); |
775 | 850 | ||
776 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) | 851 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) |
777 | goto badframe; | 852 | goto badframe; |
778 | 853 | ||
779 | if (compat_restore_altstack(&frame->uc.uc_stack)) | 854 | if (compat_restore_altstack(&frame->uc.uc_stack)) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3bf1e0b5f827..643dbdccf4bc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -256,7 +256,7 @@ static void notrace start_secondary(void *unused) | |||
256 | x86_cpuinit.setup_percpu_clockev(); | 256 | x86_cpuinit.setup_percpu_clockev(); |
257 | 257 | ||
258 | wmb(); | 258 | wmb(); |
259 | cpu_startup_entry(CPUHP_ONLINE); | 259 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
260 | } | 260 | } |
261 | 261 | ||
262 | int topology_update_package_map(unsigned int apicid, unsigned int cpu) | 262 | int topology_update_package_map(unsigned int apicid, unsigned int cpu) |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 211c11c7bba4..06cbe25861f1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -83,30 +83,16 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_bss; | |||
83 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | 83 | DECLARE_BITMAP(used_vectors, NR_VECTORS); |
84 | EXPORT_SYMBOL_GPL(used_vectors); | 84 | EXPORT_SYMBOL_GPL(used_vectors); |
85 | 85 | ||
86 | static inline void conditional_sti(struct pt_regs *regs) | 86 | static inline void cond_local_irq_enable(struct pt_regs *regs) |
87 | { | 87 | { |
88 | if (regs->flags & X86_EFLAGS_IF) | 88 | if (regs->flags & X86_EFLAGS_IF) |
89 | local_irq_enable(); | 89 | local_irq_enable(); |
90 | } | 90 | } |
91 | 91 | ||
92 | static inline void preempt_conditional_sti(struct pt_regs *regs) | 92 | static inline void cond_local_irq_disable(struct pt_regs *regs) |
93 | { | ||
94 | preempt_count_inc(); | ||
95 | if (regs->flags & X86_EFLAGS_IF) | ||
96 | local_irq_enable(); | ||
97 | } | ||
98 | |||
99 | static inline void conditional_cli(struct pt_regs *regs) | ||
100 | { | ||
101 | if (regs->flags & X86_EFLAGS_IF) | ||
102 | local_irq_disable(); | ||
103 | } | ||
104 | |||
105 | static inline void preempt_conditional_cli(struct pt_regs *regs) | ||
106 | { | 93 | { |
107 | if (regs->flags & X86_EFLAGS_IF) | 94 | if (regs->flags & X86_EFLAGS_IF) |
108 | local_irq_disable(); | 95 | local_irq_disable(); |
109 | preempt_count_dec(); | ||
110 | } | 96 | } |
111 | 97 | ||
112 | void ist_enter(struct pt_regs *regs) | 98 | void ist_enter(struct pt_regs *regs) |
@@ -262,7 +248,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | |||
262 | tsk->thread.error_code = error_code; | 248 | tsk->thread.error_code = error_code; |
263 | tsk->thread.trap_nr = trapnr; | 249 | tsk->thread.trap_nr = trapnr; |
264 | 250 | ||
265 | #ifdef CONFIG_X86_64 | ||
266 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | 251 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && |
267 | printk_ratelimit()) { | 252 | printk_ratelimit()) { |
268 | pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", | 253 | pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", |
@@ -271,7 +256,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | |||
271 | print_vma_addr(" in ", regs->ip); | 256 | print_vma_addr(" in ", regs->ip); |
272 | pr_cont("\n"); | 257 | pr_cont("\n"); |
273 | } | 258 | } |
274 | #endif | ||
275 | 259 | ||
276 | force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); | 260 | force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); |
277 | } | 261 | } |
@@ -286,7 +270,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, | |||
286 | 270 | ||
287 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != | 271 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != |
288 | NOTIFY_STOP) { | 272 | NOTIFY_STOP) { |
289 | conditional_sti(regs); | 273 | cond_local_irq_enable(regs); |
290 | do_trap(trapnr, signr, str, regs, error_code, | 274 | do_trap(trapnr, signr, str, regs, error_code, |
291 | fill_trap_info(regs, signr, trapnr, &info)); | 275 | fill_trap_info(regs, signr, trapnr, &info)); |
292 | } | 276 | } |
@@ -368,7 +352,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) | |||
368 | if (notify_die(DIE_TRAP, "bounds", regs, error_code, | 352 | if (notify_die(DIE_TRAP, "bounds", regs, error_code, |
369 | X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) | 353 | X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) |
370 | return; | 354 | return; |
371 | conditional_sti(regs); | 355 | cond_local_irq_enable(regs); |
372 | 356 | ||
373 | if (!user_mode(regs)) | 357 | if (!user_mode(regs)) |
374 | die("bounds", regs, error_code); | 358 | die("bounds", regs, error_code); |
@@ -443,7 +427,7 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
443 | struct task_struct *tsk; | 427 | struct task_struct *tsk; |
444 | 428 | ||
445 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); | 429 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
446 | conditional_sti(regs); | 430 | cond_local_irq_enable(regs); |
447 | 431 | ||
448 | if (v8086_mode(regs)) { | 432 | if (v8086_mode(regs)) { |
449 | local_irq_enable(); | 433 | local_irq_enable(); |
@@ -517,9 +501,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) | |||
517 | * as we may switch to the interrupt stack. | 501 | * as we may switch to the interrupt stack. |
518 | */ | 502 | */ |
519 | debug_stack_usage_inc(); | 503 | debug_stack_usage_inc(); |
520 | preempt_conditional_sti(regs); | 504 | preempt_disable(); |
505 | cond_local_irq_enable(regs); | ||
521 | do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); | 506 | do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); |
522 | preempt_conditional_cli(regs); | 507 | cond_local_irq_disable(regs); |
508 | preempt_enable_no_resched(); | ||
523 | debug_stack_usage_dec(); | 509 | debug_stack_usage_dec(); |
524 | exit: | 510 | exit: |
525 | ist_exit(regs); | 511 | ist_exit(regs); |
@@ -571,6 +557,29 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) | |||
571 | NOKPROBE_SYMBOL(fixup_bad_iret); | 557 | NOKPROBE_SYMBOL(fixup_bad_iret); |
572 | #endif | 558 | #endif |
573 | 559 | ||
560 | static bool is_sysenter_singlestep(struct pt_regs *regs) | ||
561 | { | ||
562 | /* | ||
563 | * We don't try for precision here. If we're anywhere in the region of | ||
564 | * code that can be single-stepped in the SYSENTER entry path, then | ||
565 | * assume that this is a useless single-step trap due to SYSENTER | ||
566 | * being invoked with TF set. (We don't know in advance exactly | ||
567 | * which instructions will be hit because BTF could plausibly | ||
568 | * be set.) | ||
569 | */ | ||
570 | #ifdef CONFIG_X86_32 | ||
571 | return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) < | ||
572 | (unsigned long)__end_SYSENTER_singlestep_region - | ||
573 | (unsigned long)__begin_SYSENTER_singlestep_region; | ||
574 | #elif defined(CONFIG_IA32_EMULATION) | ||
575 | return (regs->ip - (unsigned long)entry_SYSENTER_compat) < | ||
576 | (unsigned long)__end_entry_SYSENTER_compat - | ||
577 | (unsigned long)entry_SYSENTER_compat; | ||
578 | #else | ||
579 | return false; | ||
580 | #endif | ||
581 | } | ||
582 | |||
574 | /* | 583 | /* |
575 | * Our handling of the processor debug registers is non-trivial. | 584 | * Our handling of the processor debug registers is non-trivial. |
576 | * We do not clear them on entry and exit from the kernel. Therefore | 585 | * We do not clear them on entry and exit from the kernel. Therefore |
@@ -605,11 +614,42 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
605 | ist_enter(regs); | 614 | ist_enter(regs); |
606 | 615 | ||
607 | get_debugreg(dr6, 6); | 616 | get_debugreg(dr6, 6); |
617 | /* | ||
618 | * The Intel SDM says: | ||
619 | * | ||
620 | * Certain debug exceptions may clear bits 0-3. The remaining | ||
621 | * contents of the DR6 register are never cleared by the | ||
622 | * processor. To avoid confusion in identifying debug | ||
623 | * exceptions, debug handlers should clear the register before | ||
624 | * returning to the interrupted task. | ||
625 | * | ||
626 | * Keep it simple: clear DR6 immediately. | ||
627 | */ | ||
628 | set_debugreg(0, 6); | ||
608 | 629 | ||
609 | /* Filter out all the reserved bits which are preset to 1 */ | 630 | /* Filter out all the reserved bits which are preset to 1 */ |
610 | dr6 &= ~DR6_RESERVED; | 631 | dr6 &= ~DR6_RESERVED; |
611 | 632 | ||
612 | /* | 633 | /* |
634 | * The SDM says "The processor clears the BTF flag when it | ||
635 | * generates a debug exception." Clear TIF_BLOCKSTEP to keep | ||
636 | * TIF_BLOCKSTEP in sync with the hardware BTF flag. | ||
637 | */ | ||
638 | clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); | ||
639 | |||
640 | if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) && | ||
641 | is_sysenter_singlestep(regs))) { | ||
642 | dr6 &= ~DR_STEP; | ||
643 | if (!dr6) | ||
644 | goto exit; | ||
645 | /* | ||
646 | * else we might have gotten a single-step trap and hit a | ||
647 | * watchpoint at the same time, in which case we should fall | ||
648 | * through and handle the watchpoint. | ||
649 | */ | ||
650 | } | ||
651 | |||
652 | /* | ||
613 | * If dr6 has no reason to give us about the origin of this trap, | 653 | * If dr6 has no reason to give us about the origin of this trap, |
614 | * then it's very likely the result of an icebp/int01 trap. | 654 | * then it's very likely the result of an icebp/int01 trap. |
615 | * User wants a sigtrap for that. | 655 | * User wants a sigtrap for that. |
@@ -617,18 +657,10 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
617 | if (!dr6 && user_mode(regs)) | 657 | if (!dr6 && user_mode(regs)) |
618 | user_icebp = 1; | 658 | user_icebp = 1; |
619 | 659 | ||
620 | /* Catch kmemcheck conditions first of all! */ | 660 | /* Catch kmemcheck conditions! */ |
621 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) | 661 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
622 | goto exit; | 662 | goto exit; |
623 | 663 | ||
624 | /* DR6 may or may not be cleared by the CPU */ | ||
625 | set_debugreg(0, 6); | ||
626 | |||
627 | /* | ||
628 | * The processor cleared BTF, so don't mark that we need it set. | ||
629 | */ | ||
630 | clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); | ||
631 | |||
632 | /* Store the virtualized DR6 value */ | 664 | /* Store the virtualized DR6 value */ |
633 | tsk->thread.debugreg6 = dr6; | 665 | tsk->thread.debugreg6 = dr6; |
634 | 666 | ||
@@ -648,24 +680,25 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
648 | debug_stack_usage_inc(); | 680 | debug_stack_usage_inc(); |
649 | 681 | ||
650 | /* It's safe to allow irq's after DR6 has been saved */ | 682 | /* It's safe to allow irq's after DR6 has been saved */ |
651 | preempt_conditional_sti(regs); | 683 | preempt_disable(); |
684 | cond_local_irq_enable(regs); | ||
652 | 685 | ||
653 | if (v8086_mode(regs)) { | 686 | if (v8086_mode(regs)) { |
654 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, | 687 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, |
655 | X86_TRAP_DB); | 688 | X86_TRAP_DB); |
656 | preempt_conditional_cli(regs); | 689 | cond_local_irq_disable(regs); |
690 | preempt_enable_no_resched(); | ||
657 | debug_stack_usage_dec(); | 691 | debug_stack_usage_dec(); |
658 | goto exit; | 692 | goto exit; |
659 | } | 693 | } |
660 | 694 | ||
661 | /* | 695 | if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) { |
662 | * Single-stepping through system calls: ignore any exceptions in | 696 | /* |
663 | * kernel space, but re-enable TF when returning to user mode. | 697 | * Historical junk that used to handle SYSENTER single-stepping. |
664 | * | 698 | * This should be unreachable now. If we survive for a while |
665 | * We already checked v86 mode above, so we can check for kernel mode | 699 | * without anyone hitting this warning, we'll turn this into |
666 | * by just checking the CPL of CS. | 700 | * an oops. |
667 | */ | 701 | */ |
668 | if ((dr6 & DR_STEP) && !user_mode(regs)) { | ||
669 | tsk->thread.debugreg6 &= ~DR_STEP; | 702 | tsk->thread.debugreg6 &= ~DR_STEP; |
670 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | 703 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
671 | regs->flags &= ~X86_EFLAGS_TF; | 704 | regs->flags &= ~X86_EFLAGS_TF; |
@@ -673,10 +706,19 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
673 | si_code = get_si_code(tsk->thread.debugreg6); | 706 | si_code = get_si_code(tsk->thread.debugreg6); |
674 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) | 707 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) |
675 | send_sigtrap(tsk, regs, error_code, si_code); | 708 | send_sigtrap(tsk, regs, error_code, si_code); |
676 | preempt_conditional_cli(regs); | 709 | cond_local_irq_disable(regs); |
710 | preempt_enable_no_resched(); | ||
677 | debug_stack_usage_dec(); | 711 | debug_stack_usage_dec(); |
678 | 712 | ||
679 | exit: | 713 | exit: |
714 | #if defined(CONFIG_X86_32) | ||
715 | /* | ||
716 | * This is the most likely code path that involves non-trivial use | ||
717 | * of the SYSENTER stack. Check that we haven't overrun it. | ||
718 | */ | ||
719 | WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, | ||
720 | "Overran or corrupted SYSENTER stack\n"); | ||
721 | #endif | ||
680 | ist_exit(regs); | 722 | ist_exit(regs); |
681 | } | 723 | } |
682 | NOKPROBE_SYMBOL(do_debug); | 724 | NOKPROBE_SYMBOL(do_debug); |
@@ -696,7 +738,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
696 | 738 | ||
697 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) | 739 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) |
698 | return; | 740 | return; |
699 | conditional_sti(regs); | 741 | cond_local_irq_enable(regs); |
700 | 742 | ||
701 | if (!user_mode(regs)) { | 743 | if (!user_mode(regs)) { |
702 | if (!fixup_exception(regs, trapnr)) { | 744 | if (!fixup_exception(regs, trapnr)) { |
@@ -743,20 +785,19 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | |||
743 | dotraplinkage void | 785 | dotraplinkage void |
744 | do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | 786 | do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) |
745 | { | 787 | { |
746 | conditional_sti(regs); | 788 | cond_local_irq_enable(regs); |
747 | } | 789 | } |
748 | 790 | ||
749 | dotraplinkage void | 791 | dotraplinkage void |
750 | do_device_not_available(struct pt_regs *regs, long error_code) | 792 | do_device_not_available(struct pt_regs *regs, long error_code) |
751 | { | 793 | { |
752 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); | 794 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
753 | BUG_ON(use_eager_fpu()); | ||
754 | 795 | ||
755 | #ifdef CONFIG_MATH_EMULATION | 796 | #ifdef CONFIG_MATH_EMULATION |
756 | if (read_cr0() & X86_CR0_EM) { | 797 | if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) { |
757 | struct math_emu_info info = { }; | 798 | struct math_emu_info info = { }; |
758 | 799 | ||
759 | conditional_sti(regs); | 800 | cond_local_irq_enable(regs); |
760 | 801 | ||
761 | info.regs = regs; | 802 | info.regs = regs; |
762 | math_emulate(&info); | 803 | math_emulate(&info); |
@@ -765,7 +806,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
765 | #endif | 806 | #endif |
766 | fpu__restore(¤t->thread.fpu); /* interrupts still off */ | 807 | fpu__restore(¤t->thread.fpu); /* interrupts still off */ |
767 | #ifdef CONFIG_X86_32 | 808 | #ifdef CONFIG_X86_32 |
768 | conditional_sti(regs); | 809 | cond_local_irq_enable(regs); |
769 | #endif | 810 | #endif |
770 | } | 811 | } |
771 | NOKPROBE_SYMBOL(do_device_not_available); | 812 | NOKPROBE_SYMBOL(do_device_not_available); |
@@ -868,7 +909,7 @@ void __init trap_init(void) | |||
868 | #endif | 909 | #endif |
869 | 910 | ||
870 | #ifdef CONFIG_X86_32 | 911 | #ifdef CONFIG_X86_32 |
871 | set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); | 912 | set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); |
872 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | 913 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); |
873 | #endif | 914 | #endif |
874 | 915 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3d743da828d3..56380440d862 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -43,6 +43,11 @@ static DEFINE_STATIC_KEY_FALSE(__use_tsc); | |||
43 | 43 | ||
44 | int tsc_clocksource_reliable; | 44 | int tsc_clocksource_reliable; |
45 | 45 | ||
46 | static u32 art_to_tsc_numerator; | ||
47 | static u32 art_to_tsc_denominator; | ||
48 | static u64 art_to_tsc_offset; | ||
49 | struct clocksource *art_related_clocksource; | ||
50 | |||
46 | /* | 51 | /* |
47 | * Use a ring-buffer like data structure, where a writer advances the head by | 52 | * Use a ring-buffer like data structure, where a writer advances the head by |
48 | * writing a new data entry and a reader advances the tail when it observes a | 53 | * writing a new data entry and a reader advances the tail when it observes a |
@@ -964,6 +969,37 @@ core_initcall(cpufreq_tsc); | |||
964 | 969 | ||
965 | #endif /* CONFIG_CPU_FREQ */ | 970 | #endif /* CONFIG_CPU_FREQ */ |
966 | 971 | ||
972 | #define ART_CPUID_LEAF (0x15) | ||
973 | #define ART_MIN_DENOMINATOR (1) | ||
974 | |||
975 | |||
976 | /* | ||
977 | * If ART is present detect the numerator:denominator to convert to TSC | ||
978 | */ | ||
979 | static void detect_art(void) | ||
980 | { | ||
981 | unsigned int unused[2]; | ||
982 | |||
983 | if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) | ||
984 | return; | ||
985 | |||
986 | cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, | ||
987 | &art_to_tsc_numerator, unused, unused+1); | ||
988 | |||
989 | /* Don't enable ART in a VM, non-stop TSC required */ | ||
990 | if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || | ||
991 | !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || | ||
992 | art_to_tsc_denominator < ART_MIN_DENOMINATOR) | ||
993 | return; | ||
994 | |||
995 | if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset)) | ||
996 | return; | ||
997 | |||
998 | /* Make this sticky over multiple CPU init calls */ | ||
999 | setup_force_cpu_cap(X86_FEATURE_ART); | ||
1000 | } | ||
1001 | |||
1002 | |||
967 | /* clocksource code */ | 1003 | /* clocksource code */ |
968 | 1004 | ||
969 | static struct clocksource clocksource_tsc; | 1005 | static struct clocksource clocksource_tsc; |
@@ -1071,6 +1107,25 @@ int unsynchronized_tsc(void) | |||
1071 | return 0; | 1107 | return 0; |
1072 | } | 1108 | } |
1073 | 1109 | ||
1110 | /* | ||
1111 | * Convert ART to TSC given numerator/denominator found in detect_art() | ||
1112 | */ | ||
1113 | struct system_counterval_t convert_art_to_tsc(cycle_t art) | ||
1114 | { | ||
1115 | u64 tmp, res, rem; | ||
1116 | |||
1117 | rem = do_div(art, art_to_tsc_denominator); | ||
1118 | |||
1119 | res = art * art_to_tsc_numerator; | ||
1120 | tmp = rem * art_to_tsc_numerator; | ||
1121 | |||
1122 | do_div(tmp, art_to_tsc_denominator); | ||
1123 | res += tmp + art_to_tsc_offset; | ||
1124 | |||
1125 | return (struct system_counterval_t) {.cs = art_related_clocksource, | ||
1126 | .cycles = res}; | ||
1127 | } | ||
1128 | EXPORT_SYMBOL(convert_art_to_tsc); | ||
1074 | 1129 | ||
1075 | static void tsc_refine_calibration_work(struct work_struct *work); | 1130 | static void tsc_refine_calibration_work(struct work_struct *work); |
1076 | static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); | 1131 | static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); |
@@ -1142,6 +1197,8 @@ static void tsc_refine_calibration_work(struct work_struct *work) | |||
1142 | (unsigned long)tsc_khz % 1000); | 1197 | (unsigned long)tsc_khz % 1000); |
1143 | 1198 | ||
1144 | out: | 1199 | out: |
1200 | if (boot_cpu_has(X86_FEATURE_ART)) | ||
1201 | art_related_clocksource = &clocksource_tsc; | ||
1145 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | 1202 | clocksource_register_khz(&clocksource_tsc, tsc_khz); |
1146 | } | 1203 | } |
1147 | 1204 | ||
@@ -1235,6 +1292,8 @@ void __init tsc_init(void) | |||
1235 | mark_tsc_unstable("TSCs unsynchronized"); | 1292 | mark_tsc_unstable("TSCs unsynchronized"); |
1236 | 1293 | ||
1237 | check_system_tsc_reliable(); | 1294 | check_system_tsc_reliable(); |
1295 | |||
1296 | detect_art(); | ||
1238 | } | 1297 | } |
1239 | 1298 | ||
1240 | #ifdef CONFIG_SMP | 1299 | #ifdef CONFIG_SMP |
@@ -1246,14 +1305,14 @@ void __init tsc_init(void) | |||
1246 | */ | 1305 | */ |
1247 | unsigned long calibrate_delay_is_known(void) | 1306 | unsigned long calibrate_delay_is_known(void) |
1248 | { | 1307 | { |
1249 | int i, cpu = smp_processor_id(); | 1308 | int sibling, cpu = smp_processor_id(); |
1250 | 1309 | ||
1251 | if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) | 1310 | if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) |
1252 | return 0; | 1311 | return 0; |
1253 | 1312 | ||
1254 | for_each_online_cpu(i) | 1313 | sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu); |
1255 | if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) | 1314 | if (sibling < nr_cpu_ids) |
1256 | return cpu_data(i).loops_per_jiffy; | 1315 | return cpu_data(sibling).loops_per_jiffy; |
1257 | return 0; | 1316 | return 0; |
1258 | } | 1317 | } |
1259 | #endif | 1318 | #endif |
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 07efb35ee4bc..014ea59aa153 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S | |||
@@ -30,7 +30,7 @@ | |||
30 | * appropriately. Either display a message or halt. | 30 | * appropriately. Either display a message or halt. |
31 | */ | 31 | */ |
32 | 32 | ||
33 | #include <asm/cpufeature.h> | 33 | #include <asm/cpufeatures.h> |
34 | #include <asm/msr-index.h> | 34 | #include <asm/msr-index.h> |
35 | 35 | ||
36 | verify_cpu: | 36 | verify_cpu: |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index e574b8546518..3dce1ca0a653 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) | |||
362 | /* make room for real-mode segments */ | 362 | /* make room for real-mode segments */ |
363 | tsk->thread.sp0 += 16; | 363 | tsk->thread.sp0 += 16; |
364 | 364 | ||
365 | if (static_cpu_has_safe(X86_FEATURE_SEP)) | 365 | if (static_cpu_has(X86_FEATURE_SEP)) |
366 | tsk->thread.sysenter_cs = 0; | 366 | tsk->thread.sysenter_cs = 0; |
367 | 367 | ||
368 | load_sp0(tss, &tsk->thread); | 368 | load_sp0(tss, &tsk->thread); |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index fe133b710bef..5af9958cbdb6 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -192,6 +192,17 @@ SECTIONS | |||
192 | :init | 192 | :init |
193 | #endif | 193 | #endif |
194 | 194 | ||
195 | /* | ||
196 | * Section for code used exclusively before alternatives are run. All | ||
197 | * references to such code must be patched out by alternatives, normally | ||
198 | * by using X86_FEATURE_ALWAYS CPU feature bit. | ||
199 | * | ||
200 | * See static_cpu_has() for an example. | ||
201 | */ | ||
202 | .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) { | ||
203 | *(.altinstr_aux) | ||
204 | } | ||
205 | |||
195 | INIT_DATA_SECTION(16) | 206 | INIT_DATA_SECTION(16) |
196 | 207 | ||
197 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { | 208 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { |
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index a2fe51b00cce..65be7cfaf947 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S | |||
@@ -1,5 +1,5 @@ | |||
1 | #include <linux/linkage.h> | 1 | #include <linux/linkage.h> |
2 | #include <asm/cpufeature.h> | 2 | #include <asm/cpufeatures.h> |
3 | #include <asm/alternative-asm.h> | 3 | #include <asm/alternative-asm.h> |
4 | 4 | ||
5 | /* | 5 | /* |
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 422db000d727..5cc78bf57232 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c | |||
@@ -21,12 +21,16 @@ static inline int myisspace(u8 c) | |||
21 | * @option: option string to look for | 21 | * @option: option string to look for |
22 | * | 22 | * |
23 | * Returns the position of that @option (starts counting with 1) | 23 | * Returns the position of that @option (starts counting with 1) |
24 | * or 0 on not found. | 24 | * or 0 on not found. @option will only be found if it is found |
25 | * as an entire word in @cmdline. For instance, if @option="car" | ||
26 | * then a cmdline which contains "cart" will not match. | ||
25 | */ | 27 | */ |
26 | int cmdline_find_option_bool(const char *cmdline, const char *option) | 28 | static int |
29 | __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, | ||
30 | const char *option) | ||
27 | { | 31 | { |
28 | char c; | 32 | char c; |
29 | int len, pos = 0, wstart = 0; | 33 | int pos = 0, wstart = 0; |
30 | const char *opptr = NULL; | 34 | const char *opptr = NULL; |
31 | enum { | 35 | enum { |
32 | st_wordstart = 0, /* Start of word/after whitespace */ | 36 | st_wordstart = 0, /* Start of word/after whitespace */ |
@@ -37,11 +41,11 @@ int cmdline_find_option_bool(const char *cmdline, const char *option) | |||
37 | if (!cmdline) | 41 | if (!cmdline) |
38 | return -1; /* No command line */ | 42 | return -1; /* No command line */ |
39 | 43 | ||
40 | len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE); | 44 | /* |
41 | if (!len) | 45 | * This 'pos' check ensures we do not overrun |
42 | return 0; | 46 | * a non-NULL-terminated 'cmdline' |
43 | 47 | */ | |
44 | while (len--) { | 48 | while (pos < max_cmdline_size) { |
45 | c = *(char *)cmdline++; | 49 | c = *(char *)cmdline++; |
46 | pos++; | 50 | pos++; |
47 | 51 | ||
@@ -58,18 +62,35 @@ int cmdline_find_option_bool(const char *cmdline, const char *option) | |||
58 | /* fall through */ | 62 | /* fall through */ |
59 | 63 | ||
60 | case st_wordcmp: | 64 | case st_wordcmp: |
61 | if (!*opptr) | 65 | if (!*opptr) { |
66 | /* | ||
67 | * We matched all the way to the end of the | ||
68 | * option we were looking for. If the | ||
69 | * command-line has a space _or_ ends, then | ||
70 | * we matched! | ||
71 | */ | ||
62 | if (!c || myisspace(c)) | 72 | if (!c || myisspace(c)) |
63 | return wstart; | 73 | return wstart; |
64 | else | 74 | /* |
65 | state = st_wordskip; | 75 | * We hit the end of the option, but _not_ |
66 | else if (!c) | 76 | * the end of a word on the cmdline. Not |
77 | * a match. | ||
78 | */ | ||
79 | } else if (!c) { | ||
80 | /* | ||
81 | * Hit the NULL terminator on the end of | ||
82 | * cmdline. | ||
83 | */ | ||
67 | return 0; | 84 | return 0; |
68 | else if (c != *opptr++) | 85 | } else if (c == *opptr++) { |
69 | state = st_wordskip; | 86 | /* |
70 | else if (!len) /* last word and is matching */ | 87 | * We are currently matching, so continue |
71 | return wstart; | 88 | * to the next character on the cmdline. |
72 | break; | 89 | */ |
90 | break; | ||
91 | } | ||
92 | state = st_wordskip; | ||
93 | /* fall through */ | ||
73 | 94 | ||
74 | case st_wordskip: | 95 | case st_wordskip: |
75 | if (!c) | 96 | if (!c) |
@@ -82,3 +103,8 @@ int cmdline_find_option_bool(const char *cmdline, const char *option) | |||
82 | 103 | ||
83 | return 0; /* Buffer overrun */ | 104 | return 0; /* Buffer overrun */ |
84 | } | 105 | } |
106 | |||
107 | int cmdline_find_option_bool(const char *cmdline, const char *option) | ||
108 | { | ||
109 | return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); | ||
110 | } | ||
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 009f98216b7e..24ef1c2104d4 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S | |||
@@ -1,7 +1,7 @@ | |||
1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ | 1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
2 | 2 | ||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | #include <asm/cpufeature.h> | 4 | #include <asm/cpufeatures.h> |
5 | #include <asm/alternative-asm.h> | 5 | #include <asm/alternative-asm.h> |
6 | 6 | ||
7 | /* | 7 | /* |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 27f89c79a44b..2b0ef26da0bd 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -10,7 +10,7 @@ | |||
10 | #include <asm/current.h> | 10 | #include <asm/current.h> |
11 | #include <asm/asm-offsets.h> | 11 | #include <asm/asm-offsets.h> |
12 | #include <asm/thread_info.h> | 12 | #include <asm/thread_info.h> |
13 | #include <asm/cpufeature.h> | 13 | #include <asm/cpufeatures.h> |
14 | #include <asm/alternative-asm.h> | 14 | #include <asm/alternative-asm.h> |
15 | #include <asm/asm.h> | 15 | #include <asm/asm.h> |
16 | #include <asm/smap.h> | 16 | #include <asm/smap.h> |
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 7d37641ada5b..cbb8ee5830ff 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -1,7 +1,7 @@ | |||
1 | /* Copyright 2002 Andi Kleen */ | 1 | /* Copyright 2002 Andi Kleen */ |
2 | 2 | ||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | #include <asm/cpufeature.h> | 4 | #include <asm/cpufeatures.h> |
5 | #include <asm/alternative-asm.h> | 5 | #include <asm/alternative-asm.h> |
6 | 6 | ||
7 | /* | 7 | /* |
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index ca2afdd6d98e..90ce01bee00c 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S | |||
@@ -6,7 +6,7 @@ | |||
6 | * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> | 6 | * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> |
7 | */ | 7 | */ |
8 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
9 | #include <asm/cpufeature.h> | 9 | #include <asm/cpufeatures.h> |
10 | #include <asm/alternative-asm.h> | 10 | #include <asm/alternative-asm.h> |
11 | 11 | ||
12 | #undef memmove | 12 | #undef memmove |
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2661fad05827..c9c81227ea37 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S | |||
@@ -1,7 +1,7 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ | 1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ |
2 | 2 | ||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | #include <asm/cpufeature.h> | 4 | #include <asm/cpufeatures.h> |
5 | #include <asm/alternative-asm.h> | 5 | #include <asm/alternative-asm.h> |
6 | 6 | ||
7 | .weak memset | 7 | .weak memset |
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 4a6f1d9b5106..99bfb192803f 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -358,20 +358,19 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |||
358 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | 358 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) |
359 | #endif | 359 | #endif |
360 | 360 | ||
361 | #ifdef CONFIG_X86_64 | ||
362 | static inline bool is_hypervisor_range(int idx) | 361 | static inline bool is_hypervisor_range(int idx) |
363 | { | 362 | { |
363 | #ifdef CONFIG_X86_64 | ||
364 | /* | 364 | /* |
365 | * ffff800000000000 - ffff87ffffffffff is reserved for | 365 | * ffff800000000000 - ffff87ffffffffff is reserved for |
366 | * the hypervisor. | 366 | * the hypervisor. |
367 | */ | 367 | */ |
368 | return paravirt_enabled() && | 368 | return (idx >= pgd_index(__PAGE_OFFSET) - 16) && |
369 | (idx >= pgd_index(__PAGE_OFFSET) - 16) && | 369 | (idx < pgd_index(__PAGE_OFFSET)); |
370 | (idx < pgd_index(__PAGE_OFFSET)); | ||
371 | } | ||
372 | #else | 370 | #else |
373 | static inline bool is_hypervisor_range(int idx) { return false; } | 371 | return false; |
374 | #endif | 372 | #endif |
373 | } | ||
375 | 374 | ||
376 | static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, | 375 | static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, |
377 | bool checkwx) | 376 | bool checkwx) |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2ebfbaf61142..bd7a9b9e2e14 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -388,7 +388,6 @@ repeat: | |||
388 | } | 388 | } |
389 | 389 | ||
390 | pte_t *kmap_pte; | 390 | pte_t *kmap_pte; |
391 | pgprot_t kmap_prot; | ||
392 | 391 | ||
393 | static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) | 392 | static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) |
394 | { | 393 | { |
@@ -405,8 +404,6 @@ static void __init kmap_init(void) | |||
405 | */ | 404 | */ |
406 | kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); | 405 | kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); |
407 | kmap_pte = kmap_get_fixmap_pte(kmap_vstart); | 406 | kmap_pte = kmap_get_fixmap_pte(kmap_vstart); |
408 | |||
409 | kmap_prot = PAGE_KERNEL; | ||
410 | } | 407 | } |
411 | 408 | ||
412 | #ifdef CONFIG_HIGHMEM | 409 | #ifdef CONFIG_HIGHMEM |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a40b755c67e3..214afda97911 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <asm/numa.h> | 53 | #include <asm/numa.h> |
54 | #include <asm/cacheflush.h> | 54 | #include <asm/cacheflush.h> |
55 | #include <asm/init.h> | 55 | #include <asm/init.h> |
56 | #include <asm/uv/uv.h> | ||
56 | #include <asm/setup.h> | 57 | #include <asm/setup.h> |
57 | 58 | ||
58 | #include "mm_internal.h" | 59 | #include "mm_internal.h" |
@@ -1203,26 +1204,13 @@ int kern_addr_valid(unsigned long addr) | |||
1203 | 1204 | ||
1204 | static unsigned long probe_memory_block_size(void) | 1205 | static unsigned long probe_memory_block_size(void) |
1205 | { | 1206 | { |
1206 | /* start from 2g */ | 1207 | unsigned long bz = MIN_MEMORY_BLOCK_SIZE; |
1207 | unsigned long bz = 1UL<<31; | ||
1208 | 1208 | ||
1209 | if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) { | 1209 | /* if system is UV or has 64GB of RAM or more, use large blocks */ |
1210 | pr_info("Using 2GB memory block size for large-memory system\n"); | 1210 | if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30))) |
1211 | return 2UL * 1024 * 1024 * 1024; | 1211 | bz = 2UL << 30; /* 2GB */ |
1212 | } | ||
1213 | |||
1214 | /* less than 64g installed */ | ||
1215 | if ((max_pfn << PAGE_SHIFT) < (16UL << 32)) | ||
1216 | return MIN_MEMORY_BLOCK_SIZE; | ||
1217 | |||
1218 | /* get the tail size */ | ||
1219 | while (bz > MIN_MEMORY_BLOCK_SIZE) { | ||
1220 | if (!((max_pfn << PAGE_SHIFT) & (bz - 1))) | ||
1221 | break; | ||
1222 | bz >>= 1; | ||
1223 | } | ||
1224 | 1212 | ||
1225 | printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20); | 1213 | pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); |
1226 | 1214 | ||
1227 | return bz; | 1215 | return bz; |
1228 | } | 1216 | } |
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index d470cf219a2d..1b1110fa0057 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c | |||
@@ -120,11 +120,22 @@ void __init kasan_init(void) | |||
120 | kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), | 120 | kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), |
121 | (void *)KASAN_SHADOW_END); | 121 | (void *)KASAN_SHADOW_END); |
122 | 122 | ||
123 | memset(kasan_zero_page, 0, PAGE_SIZE); | ||
124 | |||
125 | load_cr3(init_level4_pgt); | 123 | load_cr3(init_level4_pgt); |
126 | __flush_tlb_all(); | 124 | __flush_tlb_all(); |
127 | init_task.kasan_depth = 0; | ||
128 | 125 | ||
126 | /* | ||
127 | * kasan_zero_page has been used as early shadow memory, thus it may | ||
128 | * contain some garbage. Now we can clear and write protect it, since | ||
129 | * after the TLB flush no one should write to it. | ||
130 | */ | ||
131 | memset(kasan_zero_page, 0, PAGE_SIZE); | ||
132 | for (i = 0; i < PTRS_PER_PTE; i++) { | ||
133 | pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO); | ||
134 | set_pte(&kasan_zero_pte[i], pte); | ||
135 | } | ||
136 | /* Flush TLBs again to be sure that write protection applied. */ | ||
137 | __flush_tlb_all(); | ||
138 | |||
139 | init_task.kasan_depth = 0; | ||
129 | pr_info("KernelAddressSanitizer initialized\n"); | 140 | pr_info("KernelAddressSanitizer initialized\n"); |
130 | } | 141 | } |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 637ab34ed632..ddb2244b06a1 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -33,7 +33,7 @@ | |||
33 | struct kmmio_fault_page { | 33 | struct kmmio_fault_page { |
34 | struct list_head list; | 34 | struct list_head list; |
35 | struct kmmio_fault_page *release_next; | 35 | struct kmmio_fault_page *release_next; |
36 | unsigned long page; /* location of the fault page */ | 36 | unsigned long addr; /* the requested address */ |
37 | pteval_t old_presence; /* page presence prior to arming */ | 37 | pteval_t old_presence; /* page presence prior to arming */ |
38 | bool armed; | 38 | bool armed; |
39 | 39 | ||
@@ -70,9 +70,16 @@ unsigned int kmmio_count; | |||
70 | static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; | 70 | static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; |
71 | static LIST_HEAD(kmmio_probes); | 71 | static LIST_HEAD(kmmio_probes); |
72 | 72 | ||
73 | static struct list_head *kmmio_page_list(unsigned long page) | 73 | static struct list_head *kmmio_page_list(unsigned long addr) |
74 | { | 74 | { |
75 | return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; | 75 | unsigned int l; |
76 | pte_t *pte = lookup_address(addr, &l); | ||
77 | |||
78 | if (!pte) | ||
79 | return NULL; | ||
80 | addr &= page_level_mask(l); | ||
81 | |||
82 | return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)]; | ||
76 | } | 83 | } |
77 | 84 | ||
78 | /* Accessed per-cpu */ | 85 | /* Accessed per-cpu */ |
@@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) | |||
98 | } | 105 | } |
99 | 106 | ||
100 | /* You must be holding RCU read lock. */ | 107 | /* You must be holding RCU read lock. */ |
101 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) | 108 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) |
102 | { | 109 | { |
103 | struct list_head *head; | 110 | struct list_head *head; |
104 | struct kmmio_fault_page *f; | 111 | struct kmmio_fault_page *f; |
112 | unsigned int l; | ||
113 | pte_t *pte = lookup_address(addr, &l); | ||
105 | 114 | ||
106 | page &= PAGE_MASK; | 115 | if (!pte) |
107 | head = kmmio_page_list(page); | 116 | return NULL; |
117 | addr &= page_level_mask(l); | ||
118 | head = kmmio_page_list(addr); | ||
108 | list_for_each_entry_rcu(f, head, list) { | 119 | list_for_each_entry_rcu(f, head, list) { |
109 | if (f->page == page) | 120 | if (f->addr == addr) |
110 | return f; | 121 | return f; |
111 | } | 122 | } |
112 | return NULL; | 123 | return NULL; |
@@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) | |||
137 | static int clear_page_presence(struct kmmio_fault_page *f, bool clear) | 148 | static int clear_page_presence(struct kmmio_fault_page *f, bool clear) |
138 | { | 149 | { |
139 | unsigned int level; | 150 | unsigned int level; |
140 | pte_t *pte = lookup_address(f->page, &level); | 151 | pte_t *pte = lookup_address(f->addr, &level); |
141 | 152 | ||
142 | if (!pte) { | 153 | if (!pte) { |
143 | pr_err("no pte for page 0x%08lx\n", f->page); | 154 | pr_err("no pte for addr 0x%08lx\n", f->addr); |
144 | return -1; | 155 | return -1; |
145 | } | 156 | } |
146 | 157 | ||
@@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) | |||
156 | return -1; | 167 | return -1; |
157 | } | 168 | } |
158 | 169 | ||
159 | __flush_tlb_one(f->page); | 170 | __flush_tlb_one(f->addr); |
160 | return 0; | 171 | return 0; |
161 | } | 172 | } |
162 | 173 | ||
@@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
176 | int ret; | 187 | int ret; |
177 | WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); | 188 | WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); |
178 | if (f->armed) { | 189 | if (f->armed) { |
179 | pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", | 190 | pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n", |
180 | f->page, f->count, !!f->old_presence); | 191 | f->addr, f->count, !!f->old_presence); |
181 | } | 192 | } |
182 | ret = clear_page_presence(f, true); | 193 | ret = clear_page_presence(f, true); |
183 | WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), | 194 | WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"), |
184 | f->page); | 195 | f->addr); |
185 | f->armed = true; | 196 | f->armed = true; |
186 | return ret; | 197 | return ret; |
187 | } | 198 | } |
@@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
191 | { | 202 | { |
192 | int ret = clear_page_presence(f, false); | 203 | int ret = clear_page_presence(f, false); |
193 | WARN_ONCE(ret < 0, | 204 | WARN_ONCE(ret < 0, |
194 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); | 205 | KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr); |
195 | f->armed = false; | 206 | f->armed = false; |
196 | } | 207 | } |
197 | 208 | ||
@@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
215 | struct kmmio_context *ctx; | 226 | struct kmmio_context *ctx; |
216 | struct kmmio_fault_page *faultpage; | 227 | struct kmmio_fault_page *faultpage; |
217 | int ret = 0; /* default to fault not handled */ | 228 | int ret = 0; /* default to fault not handled */ |
229 | unsigned long page_base = addr; | ||
230 | unsigned int l; | ||
231 | pte_t *pte = lookup_address(addr, &l); | ||
232 | if (!pte) | ||
233 | return -EINVAL; | ||
234 | page_base &= page_level_mask(l); | ||
218 | 235 | ||
219 | /* | 236 | /* |
220 | * Preemption is now disabled to prevent process switch during | 237 | * Preemption is now disabled to prevent process switch during |
@@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
227 | preempt_disable(); | 244 | preempt_disable(); |
228 | rcu_read_lock(); | 245 | rcu_read_lock(); |
229 | 246 | ||
230 | faultpage = get_kmmio_fault_page(addr); | 247 | faultpage = get_kmmio_fault_page(page_base); |
231 | if (!faultpage) { | 248 | if (!faultpage) { |
232 | /* | 249 | /* |
233 | * Either this page fault is not caused by kmmio, or | 250 | * Either this page fault is not caused by kmmio, or |
@@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
239 | 256 | ||
240 | ctx = &get_cpu_var(kmmio_ctx); | 257 | ctx = &get_cpu_var(kmmio_ctx); |
241 | if (ctx->active) { | 258 | if (ctx->active) { |
242 | if (addr == ctx->addr) { | 259 | if (page_base == ctx->addr) { |
243 | /* | 260 | /* |
244 | * A second fault on the same page means some other | 261 | * A second fault on the same page means some other |
245 | * condition needs handling by do_page_fault(), the | 262 | * condition needs handling by do_page_fault(), the |
@@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
267 | ctx->active++; | 284 | ctx->active++; |
268 | 285 | ||
269 | ctx->fpage = faultpage; | 286 | ctx->fpage = faultpage; |
270 | ctx->probe = get_kmmio_probe(addr); | 287 | ctx->probe = get_kmmio_probe(page_base); |
271 | ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); | 288 | ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); |
272 | ctx->addr = addr; | 289 | ctx->addr = page_base; |
273 | 290 | ||
274 | if (ctx->probe && ctx->probe->pre_handler) | 291 | if (ctx->probe && ctx->probe->pre_handler) |
275 | ctx->probe->pre_handler(ctx->probe, regs, addr); | 292 | ctx->probe->pre_handler(ctx->probe, regs, addr); |
@@ -354,12 +371,11 @@ out: | |||
354 | } | 371 | } |
355 | 372 | ||
356 | /* You must be holding kmmio_lock. */ | 373 | /* You must be holding kmmio_lock. */ |
357 | static int add_kmmio_fault_page(unsigned long page) | 374 | static int add_kmmio_fault_page(unsigned long addr) |
358 | { | 375 | { |
359 | struct kmmio_fault_page *f; | 376 | struct kmmio_fault_page *f; |
360 | 377 | ||
361 | page &= PAGE_MASK; | 378 | f = get_kmmio_fault_page(addr); |
362 | f = get_kmmio_fault_page(page); | ||
363 | if (f) { | 379 | if (f) { |
364 | if (!f->count) | 380 | if (!f->count) |
365 | arm_kmmio_fault_page(f); | 381 | arm_kmmio_fault_page(f); |
@@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page) | |||
372 | return -1; | 388 | return -1; |
373 | 389 | ||
374 | f->count = 1; | 390 | f->count = 1; |
375 | f->page = page; | 391 | f->addr = addr; |
376 | 392 | ||
377 | if (arm_kmmio_fault_page(f)) { | 393 | if (arm_kmmio_fault_page(f)) { |
378 | kfree(f); | 394 | kfree(f); |
379 | return -1; | 395 | return -1; |
380 | } | 396 | } |
381 | 397 | ||
382 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | 398 | list_add_rcu(&f->list, kmmio_page_list(f->addr)); |
383 | 399 | ||
384 | return 0; | 400 | return 0; |
385 | } | 401 | } |
386 | 402 | ||
387 | /* You must be holding kmmio_lock. */ | 403 | /* You must be holding kmmio_lock. */ |
388 | static void release_kmmio_fault_page(unsigned long page, | 404 | static void release_kmmio_fault_page(unsigned long addr, |
389 | struct kmmio_fault_page **release_list) | 405 | struct kmmio_fault_page **release_list) |
390 | { | 406 | { |
391 | struct kmmio_fault_page *f; | 407 | struct kmmio_fault_page *f; |
392 | 408 | ||
393 | page &= PAGE_MASK; | 409 | f = get_kmmio_fault_page(addr); |
394 | f = get_kmmio_fault_page(page); | ||
395 | if (!f) | 410 | if (!f) |
396 | return; | 411 | return; |
397 | 412 | ||
@@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p) | |||
420 | int ret = 0; | 435 | int ret = 0; |
421 | unsigned long size = 0; | 436 | unsigned long size = 0; |
422 | const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); | 437 | const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); |
438 | unsigned int l; | ||
439 | pte_t *pte; | ||
423 | 440 | ||
424 | spin_lock_irqsave(&kmmio_lock, flags); | 441 | spin_lock_irqsave(&kmmio_lock, flags); |
425 | if (get_kmmio_probe(p->addr)) { | 442 | if (get_kmmio_probe(p->addr)) { |
426 | ret = -EEXIST; | 443 | ret = -EEXIST; |
427 | goto out; | 444 | goto out; |
428 | } | 445 | } |
446 | |||
447 | pte = lookup_address(p->addr, &l); | ||
448 | if (!pte) { | ||
449 | ret = -EINVAL; | ||
450 | goto out; | ||
451 | } | ||
452 | |||
429 | kmmio_count++; | 453 | kmmio_count++; |
430 | list_add_rcu(&p->list, &kmmio_probes); | 454 | list_add_rcu(&p->list, &kmmio_probes); |
431 | while (size < size_lim) { | 455 | while (size < size_lim) { |
432 | if (add_kmmio_fault_page(p->addr + size)) | 456 | if (add_kmmio_fault_page(p->addr + size)) |
433 | pr_err("Unable to set page fault.\n"); | 457 | pr_err("Unable to set page fault.\n"); |
434 | size += PAGE_SIZE; | 458 | size += page_level_size(l); |
435 | } | 459 | } |
436 | out: | 460 | out: |
437 | spin_unlock_irqrestore(&kmmio_lock, flags); | 461 | spin_unlock_irqrestore(&kmmio_lock, flags); |
@@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p) | |||
506 | const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); | 530 | const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); |
507 | struct kmmio_fault_page *release_list = NULL; | 531 | struct kmmio_fault_page *release_list = NULL; |
508 | struct kmmio_delayed_release *drelease; | 532 | struct kmmio_delayed_release *drelease; |
533 | unsigned int l; | ||
534 | pte_t *pte; | ||
535 | |||
536 | pte = lookup_address(p->addr, &l); | ||
537 | if (!pte) | ||
538 | return; | ||
509 | 539 | ||
510 | spin_lock_irqsave(&kmmio_lock, flags); | 540 | spin_lock_irqsave(&kmmio_lock, flags); |
511 | while (size < size_lim) { | 541 | while (size < size_lim) { |
512 | release_kmmio_fault_page(p->addr + size, &release_list); | 542 | release_kmmio_fault_page(p->addr + size, &release_list); |
513 | size += PAGE_SIZE; | 543 | size += page_level_size(l); |
514 | } | 544 | } |
515 | list_del_rcu(&p->list); | 545 | list_del_rcu(&p->list); |
516 | kmmio_count--; | 546 | kmmio_count--; |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 72bb52f93c3d..d2dc0438d654 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd) | |||
94 | } | 94 | } |
95 | 95 | ||
96 | /* | 96 | /* |
97 | * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 | ||
98 | * does, but not when emulating X86_32 | ||
99 | */ | ||
100 | static unsigned long mmap_legacy_base(unsigned long rnd) | ||
101 | { | ||
102 | if (mmap_is_ia32()) | ||
103 | return TASK_UNMAPPED_BASE; | ||
104 | else | ||
105 | return TASK_UNMAPPED_BASE + rnd; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * This function, called very early during the creation of a new | 97 | * This function, called very early during the creation of a new |
110 | * process VM image, sets up which VM layout function to use: | 98 | * process VM image, sets up which VM layout function to use: |
111 | */ | 99 | */ |
@@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
116 | if (current->flags & PF_RANDOMIZE) | 104 | if (current->flags & PF_RANDOMIZE) |
117 | random_factor = arch_mmap_rnd(); | 105 | random_factor = arch_mmap_rnd(); |
118 | 106 | ||
119 | mm->mmap_legacy_base = mmap_legacy_base(random_factor); | 107 | mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor; |
120 | 108 | ||
121 | if (mmap_is_legacy()) { | 109 | if (mmap_is_legacy()) { |
122 | mm->mmap_base = mm->mmap_legacy_base; | 110 | mm->mmap_base = mm->mmap_legacy_base; |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index d04f8094bc23..f70c1ff46125 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -465,46 +465,67 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | |||
465 | return true; | 465 | return true; |
466 | } | 466 | } |
467 | 467 | ||
468 | /* | ||
469 | * Mark all currently memblock-reserved physical memory (which covers the | ||
470 | * kernel's own memory ranges) as hot-unswappable. | ||
471 | */ | ||
468 | static void __init numa_clear_kernel_node_hotplug(void) | 472 | static void __init numa_clear_kernel_node_hotplug(void) |
469 | { | 473 | { |
470 | int i, nid; | 474 | nodemask_t reserved_nodemask = NODE_MASK_NONE; |
471 | nodemask_t numa_kernel_nodes = NODE_MASK_NONE; | 475 | struct memblock_region *mb_region; |
472 | phys_addr_t start, end; | 476 | int i; |
473 | struct memblock_region *r; | ||
474 | 477 | ||
475 | /* | 478 | /* |
479 | * We have to do some preprocessing of memblock regions, to | ||
480 | * make them suitable for reservation. | ||
481 | * | ||
476 | * At this time, all memory regions reserved by memblock are | 482 | * At this time, all memory regions reserved by memblock are |
477 | * used by the kernel. Set the nid in memblock.reserved will | 483 | * used by the kernel, but those regions are not split up |
478 | * mark out all the nodes the kernel resides in. | 484 | * along node boundaries yet, and don't necessarily have their |
485 | * node ID set yet either. | ||
486 | * | ||
487 | * So iterate over all memory known to the x86 architecture, | ||
488 | * and use those ranges to set the nid in memblock.reserved. | ||
489 | * This will split up the memblock regions along node | ||
490 | * boundaries and will set the node IDs as well. | ||
479 | */ | 491 | */ |
480 | for (i = 0; i < numa_meminfo.nr_blks; i++) { | 492 | for (i = 0; i < numa_meminfo.nr_blks; i++) { |
481 | struct numa_memblk *mb = &numa_meminfo.blk[i]; | 493 | struct numa_memblk *mb = numa_meminfo.blk + i; |
494 | int ret; | ||
482 | 495 | ||
483 | memblock_set_node(mb->start, mb->end - mb->start, | 496 | ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid); |
484 | &memblock.reserved, mb->nid); | 497 | WARN_ON_ONCE(ret); |
485 | } | 498 | } |
486 | 499 | ||
487 | /* | 500 | /* |
488 | * Mark all kernel nodes. | 501 | * Now go over all reserved memblock regions, to construct a |
502 | * node mask of all kernel reserved memory areas. | ||
489 | * | 503 | * |
490 | * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo | 504 | * [ Note, when booting with mem=nn[kMG] or in a kdump kernel, |
491 | * may not include all the memblock.reserved memory ranges because | 505 | * numa_meminfo might not include all memblock.reserved |
492 | * trim_snb_memory() reserves specific pages for Sandy Bridge graphics. | 506 | * memory ranges, because quirks such as trim_snb_memory() |
507 | * reserve specific pages for Sandy Bridge graphics. ] | ||
493 | */ | 508 | */ |
494 | for_each_memblock(reserved, r) | 509 | for_each_memblock(reserved, mb_region) { |
495 | if (r->nid != MAX_NUMNODES) | 510 | if (mb_region->nid != MAX_NUMNODES) |
496 | node_set(r->nid, numa_kernel_nodes); | 511 | node_set(mb_region->nid, reserved_nodemask); |
512 | } | ||
497 | 513 | ||
498 | /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ | 514 | /* |
515 | * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory | ||
516 | * belonging to the reserved node mask. | ||
517 | * | ||
518 | * Note that this will include memory regions that reside | ||
519 | * on nodes that contain kernel memory - entire nodes | ||
520 | * become hot-unpluggable: | ||
521 | */ | ||
499 | for (i = 0; i < numa_meminfo.nr_blks; i++) { | 522 | for (i = 0; i < numa_meminfo.nr_blks; i++) { |
500 | nid = numa_meminfo.blk[i].nid; | 523 | struct numa_memblk *mb = numa_meminfo.blk + i; |
501 | if (!node_isset(nid, numa_kernel_nodes)) | ||
502 | continue; | ||
503 | 524 | ||
504 | start = numa_meminfo.blk[i].start; | 525 | if (!node_isset(mb->nid, reserved_nodemask)) |
505 | end = numa_meminfo.blk[i].end; | 526 | continue; |
506 | 527 | ||
507 | memblock_clear_hotplug(start, end - start); | 528 | memblock_clear_hotplug(mb->start, mb->end - mb->start); |
508 | } | 529 | } |
509 | } | 530 | } |
510 | 531 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1c37e650acac..007ebe2d8157 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -1128,8 +1128,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, | |||
1128 | /* | 1128 | /* |
1129 | * Ignore all non primary paths. | 1129 | * Ignore all non primary paths. |
1130 | */ | 1130 | */ |
1131 | if (!primary) | 1131 | if (!primary) { |
1132 | cpa->numpages = 1; | ||
1132 | return 0; | 1133 | return 0; |
1134 | } | ||
1133 | 1135 | ||
1134 | /* | 1136 | /* |
1135 | * Ignore the NULL PTE for kernel identity mapping, as it is expected | 1137 | * Ignore the NULL PTE for kernel identity mapping, as it is expected |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index f4ae536b0914..04e2e7144bee 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -943,7 +943,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, | |||
943 | return -EINVAL; | 943 | return -EINVAL; |
944 | } | 944 | } |
945 | 945 | ||
946 | *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | | 946 | *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | |
947 | cachemode2protval(pcm)); | 947 | cachemode2protval(pcm)); |
948 | 948 | ||
949 | return 0; | 949 | return 0; |
@@ -959,7 +959,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, | |||
959 | 959 | ||
960 | /* Set prot based on lookup */ | 960 | /* Set prot based on lookup */ |
961 | pcm = lookup_memtype(pfn_t_to_phys(pfn)); | 961 | pcm = lookup_memtype(pfn_t_to_phys(pfn)); |
962 | *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | | 962 | *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | |
963 | cachemode2protval(pcm)); | 963 | cachemode2protval(pcm)); |
964 | 964 | ||
965 | return 0; | 965 | return 0; |
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 92e2eacb3321..8bea84724a7d 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | #include <asm/pgtable.h> | 5 | #include <asm/pgtable.h> |
6 | #include <asm/proto.h> | 6 | #include <asm/proto.h> |
7 | #include <asm/cpufeature.h> | ||
7 | 8 | ||
8 | static int disable_nx; | 9 | static int disable_nx; |
9 | 10 | ||
@@ -31,9 +32,8 @@ early_param("noexec", noexec_setup); | |||
31 | 32 | ||
32 | void x86_configure_nx(void) | 33 | void x86_configure_nx(void) |
33 | { | 34 | { |
34 | if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx) | 35 | /* If disable_nx is set, clear NX on all new mappings going forward. */ |
35 | __supported_pte_mask |= _PAGE_NX; | 36 | if (disable_nx) |
36 | else | ||
37 | __supported_pte_mask &= ~_PAGE_NX; | 37 | __supported_pte_mask &= ~_PAGE_NX; |
38 | } | 38 | } |
39 | 39 | ||
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 50d86c0e9ba4..660a83c8287b 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <asm/nmi.h> | 24 | #include <asm/nmi.h> |
25 | #include <asm/apic.h> | 25 | #include <asm/apic.h> |
26 | #include <asm/processor.h> | 26 | #include <asm/processor.h> |
27 | #include <asm/cpufeature.h> | ||
28 | 27 | ||
29 | #include "op_x86_model.h" | 28 | #include "op_x86_model.h" |
30 | #include "op_counter.h" | 29 | #include "op_counter.h" |
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c index 76b6632d3143..1865c196f136 100644 --- a/arch/x86/platform/geode/alix.c +++ b/arch/x86/platform/geode/alix.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/io.h> | 22 | #include <linux/io.h> |
23 | #include <linux/string.h> | 23 | #include <linux/string.h> |
24 | #include <linux/module.h> | 24 | #include <linux/moduleparam.h> |
25 | #include <linux/leds.h> | 25 | #include <linux/leds.h> |
26 | #include <linux/platform_device.h> | 26 | #include <linux/platform_device.h> |
27 | #include <linux/gpio.h> | 27 | #include <linux/gpio.h> |
@@ -35,6 +35,11 @@ | |||
35 | #define BIOS_SIGNATURE_COREBOOT 0x500 | 35 | #define BIOS_SIGNATURE_COREBOOT 0x500 |
36 | #define BIOS_REGION_SIZE 0x10000 | 36 | #define BIOS_REGION_SIZE 0x10000 |
37 | 37 | ||
38 | /* | ||
39 | * This driver is not modular, but to keep back compatibility | ||
40 | * with existing use cases, continuing with module_param is | ||
41 | * the easiest way forward. | ||
42 | */ | ||
38 | static bool force = 0; | 43 | static bool force = 0; |
39 | module_param(force, bool, 0444); | 44 | module_param(force, bool, 0444); |
40 | /* FIXME: Award bios is not automatically detected as Alix platform */ | 45 | /* FIXME: Award bios is not automatically detected as Alix platform */ |
@@ -192,9 +197,4 @@ static int __init alix_init(void) | |||
192 | 197 | ||
193 | return 0; | 198 | return 0; |
194 | } | 199 | } |
195 | 200 | device_initcall(alix_init); | |
196 | module_init(alix_init); | ||
197 | |||
198 | MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>"); | ||
199 | MODULE_DESCRIPTION("PCEngines ALIX System Setup"); | ||
200 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c index aa733fba2471..4fcdb91318a0 100644 --- a/arch/x86/platform/geode/geos.c +++ b/arch/x86/platform/geode/geos.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/init.h> | 19 | #include <linux/init.h> |
20 | #include <linux/io.h> | 20 | #include <linux/io.h> |
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/module.h> | ||
23 | #include <linux/leds.h> | 22 | #include <linux/leds.h> |
24 | #include <linux/platform_device.h> | 23 | #include <linux/platform_device.h> |
25 | #include <linux/gpio.h> | 24 | #include <linux/gpio.h> |
@@ -120,9 +119,4 @@ static int __init geos_init(void) | |||
120 | 119 | ||
121 | return 0; | 120 | return 0; |
122 | } | 121 | } |
123 | 122 | device_initcall(geos_init); | |
124 | module_init(geos_init); | ||
125 | |||
126 | MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); | ||
127 | MODULE_DESCRIPTION("Traverse Technologies Geos System Setup"); | ||
128 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c index 927e38c0089f..a2f6b982a729 100644 --- a/arch/x86/platform/geode/net5501.c +++ b/arch/x86/platform/geode/net5501.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/io.h> | 21 | #include <linux/io.h> |
22 | #include <linux/string.h> | 22 | #include <linux/string.h> |
23 | #include <linux/module.h> | ||
24 | #include <linux/leds.h> | 23 | #include <linux/leds.h> |
25 | #include <linux/platform_device.h> | 24 | #include <linux/platform_device.h> |
26 | #include <linux/gpio.h> | 25 | #include <linux/gpio.h> |
@@ -146,9 +145,4 @@ static int __init net5501_init(void) | |||
146 | 145 | ||
147 | return 0; | 146 | return 0; |
148 | } | 147 | } |
149 | 148 | device_initcall(net5501_init); | |
150 | module_init(net5501_init); | ||
151 | |||
152 | MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>"); | ||
153 | MODULE_DESCRIPTION("Soekris net5501 System Setup"); | ||
154 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c index 23381d2174ae..1eb47b6298c2 100644 --- a/arch/x86/platform/intel-mid/mfld.c +++ b/arch/x86/platform/intel-mid/mfld.c | |||
@@ -52,10 +52,7 @@ static unsigned long __init mfld_calibrate_tsc(void) | |||
52 | /* mark tsc clocksource as reliable */ | 52 | /* mark tsc clocksource as reliable */ |
53 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); | 53 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); |
54 | 54 | ||
55 | if (fast_calibrate) | 55 | return fast_calibrate; |
56 | return fast_calibrate; | ||
57 | |||
58 | return 0; | ||
59 | } | 56 | } |
60 | 57 | ||
61 | static void __init penwell_arch_setup(void) | 58 | static void __init penwell_arch_setup(void) |
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c index aaca91753d32..bd1adc621781 100644 --- a/arch/x86/platform/intel-mid/mrfl.c +++ b/arch/x86/platform/intel-mid/mrfl.c | |||
@@ -81,10 +81,7 @@ static unsigned long __init tangier_calibrate_tsc(void) | |||
81 | /* mark tsc clocksource as reliable */ | 81 | /* mark tsc clocksource as reliable */ |
82 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); | 82 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); |
83 | 83 | ||
84 | if (fast_calibrate) | 84 | return fast_calibrate; |
85 | return fast_calibrate; | ||
86 | |||
87 | return 0; | ||
88 | } | 85 | } |
89 | 86 | ||
90 | static void __init tangier_arch_setup(void) | 87 | static void __init tangier_arch_setup(void) |
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c index bfadcd0f4944..17d6d2296e4d 100644 --- a/arch/x86/platform/intel-quark/imr.c +++ b/arch/x86/platform/intel-quark/imr.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /** | 1 | /** |
2 | * imr.c | 2 | * imr.c -- Intel Isolated Memory Region driver |
3 | * | 3 | * |
4 | * Copyright(c) 2013 Intel Corporation. | 4 | * Copyright(c) 2013 Intel Corporation. |
5 | * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> | 5 | * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> |
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/debugfs.h> | 31 | #include <linux/debugfs.h> |
32 | #include <linux/init.h> | 32 | #include <linux/init.h> |
33 | #include <linux/mm.h> | 33 | #include <linux/mm.h> |
34 | #include <linux/module.h> | ||
35 | #include <linux/types.h> | 34 | #include <linux/types.h> |
36 | 35 | ||
37 | struct imr_device { | 36 | struct imr_device { |
@@ -135,11 +134,9 @@ static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr) | |||
135 | * @idev: pointer to imr_device structure. | 134 | * @idev: pointer to imr_device structure. |
136 | * @imr_id: IMR entry to write. | 135 | * @imr_id: IMR entry to write. |
137 | * @imr: IMR structure representing address and access masks. | 136 | * @imr: IMR structure representing address and access masks. |
138 | * @lock: indicates if the IMR lock bit should be applied. | ||
139 | * @return: 0 on success or error code passed from mbi_iosf on failure. | 137 | * @return: 0 on success or error code passed from mbi_iosf on failure. |
140 | */ | 138 | */ |
141 | static int imr_write(struct imr_device *idev, u32 imr_id, | 139 | static int imr_write(struct imr_device *idev, u32 imr_id, struct imr_regs *imr) |
142 | struct imr_regs *imr, bool lock) | ||
143 | { | 140 | { |
144 | unsigned long flags; | 141 | unsigned long flags; |
145 | u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base; | 142 | u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base; |
@@ -163,15 +160,6 @@ static int imr_write(struct imr_device *idev, u32 imr_id, | |||
163 | if (ret) | 160 | if (ret) |
164 | goto failed; | 161 | goto failed; |
165 | 162 | ||
166 | /* Lock bit must be set separately to addr_lo address bits. */ | ||
167 | if (lock) { | ||
168 | imr->addr_lo |= IMR_LOCK; | ||
169 | ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE, | ||
170 | reg - IMR_NUM_REGS, imr->addr_lo); | ||
171 | if (ret) | ||
172 | goto failed; | ||
173 | } | ||
174 | |||
175 | local_irq_restore(flags); | 163 | local_irq_restore(flags); |
176 | return 0; | 164 | return 0; |
177 | failed: | 165 | failed: |
@@ -270,17 +258,6 @@ static int imr_debugfs_register(struct imr_device *idev) | |||
270 | } | 258 | } |
271 | 259 | ||
272 | /** | 260 | /** |
273 | * imr_debugfs_unregister - unregister debugfs hooks. | ||
274 | * | ||
275 | * @idev: pointer to imr_device structure. | ||
276 | * @return: | ||
277 | */ | ||
278 | static void imr_debugfs_unregister(struct imr_device *idev) | ||
279 | { | ||
280 | debugfs_remove(idev->file); | ||
281 | } | ||
282 | |||
283 | /** | ||
284 | * imr_check_params - check passed address range IMR alignment and non-zero size | 261 | * imr_check_params - check passed address range IMR alignment and non-zero size |
285 | * | 262 | * |
286 | * @base: base address of intended IMR. | 263 | * @base: base address of intended IMR. |
@@ -334,11 +311,10 @@ static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr) | |||
334 | * @size: physical size of region in bytes must be aligned to 1KiB. | 311 | * @size: physical size of region in bytes must be aligned to 1KiB. |
335 | * @read_mask: read access mask. | 312 | * @read_mask: read access mask. |
336 | * @write_mask: write access mask. | 313 | * @write_mask: write access mask. |
337 | * @lock: indicates whether or not to permanently lock this region. | ||
338 | * @return: zero on success or negative value indicating error. | 314 | * @return: zero on success or negative value indicating error. |
339 | */ | 315 | */ |
340 | int imr_add_range(phys_addr_t base, size_t size, | 316 | int imr_add_range(phys_addr_t base, size_t size, |
341 | unsigned int rmask, unsigned int wmask, bool lock) | 317 | unsigned int rmask, unsigned int wmask) |
342 | { | 318 | { |
343 | phys_addr_t end; | 319 | phys_addr_t end; |
344 | unsigned int i; | 320 | unsigned int i; |
@@ -411,7 +387,7 @@ int imr_add_range(phys_addr_t base, size_t size, | |||
411 | imr.rmask = rmask; | 387 | imr.rmask = rmask; |
412 | imr.wmask = wmask; | 388 | imr.wmask = wmask; |
413 | 389 | ||
414 | ret = imr_write(idev, reg, &imr, lock); | 390 | ret = imr_write(idev, reg, &imr); |
415 | if (ret < 0) { | 391 | if (ret < 0) { |
416 | /* | 392 | /* |
417 | * In the highly unlikely event iosf_mbi_write failed | 393 | * In the highly unlikely event iosf_mbi_write failed |
@@ -422,7 +398,7 @@ int imr_add_range(phys_addr_t base, size_t size, | |||
422 | imr.addr_hi = 0; | 398 | imr.addr_hi = 0; |
423 | imr.rmask = IMR_READ_ACCESS_ALL; | 399 | imr.rmask = IMR_READ_ACCESS_ALL; |
424 | imr.wmask = IMR_WRITE_ACCESS_ALL; | 400 | imr.wmask = IMR_WRITE_ACCESS_ALL; |
425 | imr_write(idev, reg, &imr, false); | 401 | imr_write(idev, reg, &imr); |
426 | } | 402 | } |
427 | failed: | 403 | failed: |
428 | mutex_unlock(&idev->lock); | 404 | mutex_unlock(&idev->lock); |
@@ -518,7 +494,7 @@ static int __imr_remove_range(int reg, phys_addr_t base, size_t size) | |||
518 | imr.rmask = IMR_READ_ACCESS_ALL; | 494 | imr.rmask = IMR_READ_ACCESS_ALL; |
519 | imr.wmask = IMR_WRITE_ACCESS_ALL; | 495 | imr.wmask = IMR_WRITE_ACCESS_ALL; |
520 | 496 | ||
521 | ret = imr_write(idev, reg, &imr, false); | 497 | ret = imr_write(idev, reg, &imr); |
522 | 498 | ||
523 | failed: | 499 | failed: |
524 | mutex_unlock(&idev->lock); | 500 | mutex_unlock(&idev->lock); |
@@ -599,7 +575,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev) | |||
599 | * We don't round up @size since it is already PAGE_SIZE aligned. | 575 | * We don't round up @size since it is already PAGE_SIZE aligned. |
600 | * See vmlinux.lds.S for details. | 576 | * See vmlinux.lds.S for details. |
601 | */ | 577 | */ |
602 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); | 578 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); |
603 | if (ret < 0) { | 579 | if (ret < 0) { |
604 | pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n", | 580 | pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n", |
605 | size / 1024, start, end); | 581 | size / 1024, start, end); |
@@ -614,7 +590,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = { | |||
614 | { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ | 590 | { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ |
615 | {} | 591 | {} |
616 | }; | 592 | }; |
617 | MODULE_DEVICE_TABLE(x86cpu, imr_ids); | ||
618 | 593 | ||
619 | /** | 594 | /** |
620 | * imr_init - entry point for IMR driver. | 595 | * imr_init - entry point for IMR driver. |
@@ -640,22 +615,4 @@ static int __init imr_init(void) | |||
640 | imr_fixup_memmap(idev); | 615 | imr_fixup_memmap(idev); |
641 | return 0; | 616 | return 0; |
642 | } | 617 | } |
643 | 618 | device_initcall(imr_init); | |
644 | /** | ||
645 | * imr_exit - exit point for IMR code. | ||
646 | * | ||
647 | * Deregisters debugfs, leave IMR state as-is. | ||
648 | * | ||
649 | * return: | ||
650 | */ | ||
651 | static void __exit imr_exit(void) | ||
652 | { | ||
653 | imr_debugfs_unregister(&imr_dev); | ||
654 | } | ||
655 | |||
656 | module_init(imr_init); | ||
657 | module_exit(imr_exit); | ||
658 | |||
659 | MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>"); | ||
660 | MODULE_DESCRIPTION("Intel Isolated Memory Region driver"); | ||
661 | MODULE_LICENSE("Dual BSD/GPL"); | ||
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c index 278e4da4222f..f5bad40936ac 100644 --- a/arch/x86/platform/intel-quark/imr_selftest.c +++ b/arch/x86/platform/intel-quark/imr_selftest.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /** | 1 | /** |
2 | * imr_selftest.c | 2 | * imr_selftest.c -- Intel Isolated Memory Region self-test driver |
3 | * | 3 | * |
4 | * Copyright(c) 2013 Intel Corporation. | 4 | * Copyright(c) 2013 Intel Corporation. |
5 | * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> | 5 | * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie> |
@@ -15,7 +15,6 @@ | |||
15 | #include <asm/imr.h> | 15 | #include <asm/imr.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
18 | #include <linux/module.h> | ||
19 | #include <linux/types.h> | 18 | #include <linux/types.h> |
20 | 19 | ||
21 | #define SELFTEST KBUILD_MODNAME ": " | 20 | #define SELFTEST KBUILD_MODNAME ": " |
@@ -61,30 +60,30 @@ static void __init imr_self_test(void) | |||
61 | int ret; | 60 | int ret; |
62 | 61 | ||
63 | /* Test zero zero. */ | 62 | /* Test zero zero. */ |
64 | ret = imr_add_range(0, 0, 0, 0, false); | 63 | ret = imr_add_range(0, 0, 0, 0); |
65 | imr_self_test_result(ret < 0, "zero sized IMR\n"); | 64 | imr_self_test_result(ret < 0, "zero sized IMR\n"); |
66 | 65 | ||
67 | /* Test exact overlap. */ | 66 | /* Test exact overlap. */ |
68 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); | 67 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); |
69 | imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); | 68 | imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); |
70 | 69 | ||
71 | /* Test overlap with base inside of existing. */ | 70 | /* Test overlap with base inside of existing. */ |
72 | base += size - IMR_ALIGN; | 71 | base += size - IMR_ALIGN; |
73 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); | 72 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); |
74 | imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); | 73 | imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); |
75 | 74 | ||
76 | /* Test overlap with end inside of existing. */ | 75 | /* Test overlap with end inside of existing. */ |
77 | base -= size + IMR_ALIGN * 2; | 76 | base -= size + IMR_ALIGN * 2; |
78 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false); | 77 | ret = imr_add_range(base, size, IMR_CPU, IMR_CPU); |
79 | imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); | 78 | imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size)); |
80 | 79 | ||
81 | /* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */ | 80 | /* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */ |
82 | ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL, | 81 | ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL, |
83 | IMR_WRITE_ACCESS_ALL, false); | 82 | IMR_WRITE_ACCESS_ALL); |
84 | imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n"); | 83 | imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n"); |
85 | 84 | ||
86 | /* Test that a 1 KiB IMR @ zero with CPU only will work. */ | 85 | /* Test that a 1 KiB IMR @ zero with CPU only will work. */ |
87 | ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false); | 86 | ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU); |
88 | imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n"); | 87 | imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n"); |
89 | if (ret >= 0) { | 88 | if (ret >= 0) { |
90 | ret = imr_remove_range(0, IMR_ALIGN); | 89 | ret = imr_remove_range(0, IMR_ALIGN); |
@@ -93,8 +92,7 @@ static void __init imr_self_test(void) | |||
93 | 92 | ||
94 | /* Test 2 KiB works. */ | 93 | /* Test 2 KiB works. */ |
95 | size = IMR_ALIGN * 2; | 94 | size = IMR_ALIGN * 2; |
96 | ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, | 95 | ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, IMR_WRITE_ACCESS_ALL); |
97 | IMR_WRITE_ACCESS_ALL, false); | ||
98 | imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n"); | 96 | imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n"); |
99 | if (ret >= 0) { | 97 | if (ret >= 0) { |
100 | ret = imr_remove_range(0, size); | 98 | ret = imr_remove_range(0, size); |
@@ -106,7 +104,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = { | |||
106 | { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ | 104 | { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */ |
107 | {} | 105 | {} |
108 | }; | 106 | }; |
109 | MODULE_DEVICE_TABLE(x86cpu, imr_ids); | ||
110 | 107 | ||
111 | /** | 108 | /** |
112 | * imr_self_test_init - entry point for IMR driver. | 109 | * imr_self_test_init - entry point for IMR driver. |
@@ -125,13 +122,4 @@ static int __init imr_self_test_init(void) | |||
125 | * | 122 | * |
126 | * return: | 123 | * return: |
127 | */ | 124 | */ |
128 | static void __exit imr_self_test_exit(void) | 125 | device_initcall(imr_self_test_init); |
129 | { | ||
130 | } | ||
131 | |||
132 | module_init(imr_self_test_init); | ||
133 | module_exit(imr_self_test_exit); | ||
134 | |||
135 | MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>"); | ||
136 | MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver"); | ||
137 | MODULE_LICENSE("Dual BSD/GPL"); | ||
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 174781a404ff..00c319048d52 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #include <asm/asm.h> | 4 | #include <asm/asm.h> |
5 | #include <asm/segment.h> | 5 | #include <asm/segment.h> |
6 | #include <asm/cpufeature.h> | 6 | #include <asm/cpufeatures.h> |
7 | #include <asm/cmpxchg.h> | 7 | #include <asm/cmpxchg.h> |
8 | #include <asm/nops.h> | 8 | #include <asm/nops.h> |
9 | 9 | ||
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 439c0994b696..bfce503dffae 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c | |||
@@ -25,11 +25,11 @@ | |||
25 | 25 | ||
26 | #define old_mmap sys_old_mmap | 26 | #define old_mmap sys_old_mmap |
27 | 27 | ||
28 | #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; | 28 | #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; |
29 | #include <asm/syscalls_32.h> | 29 | #include <asm/syscalls_32.h> |
30 | 30 | ||
31 | #undef __SYSCALL_I386 | 31 | #undef __SYSCALL_I386 |
32 | #define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym, | 32 | #define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym, |
33 | 33 | ||
34 | extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); | 34 | extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
35 | 35 | ||
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index b74ea6c2c0e7..f306413d3eb6 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c | |||
@@ -35,14 +35,11 @@ | |||
35 | #define stub_execveat sys_execveat | 35 | #define stub_execveat sys_execveat |
36 | #define stub_rt_sigreturn sys_rt_sigreturn | 36 | #define stub_rt_sigreturn sys_rt_sigreturn |
37 | 37 | ||
38 | #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) | 38 | #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; |
39 | #define __SYSCALL_X32(nr, sym, compat) /* Not supported */ | ||
40 | |||
41 | #define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; | ||
42 | #include <asm/syscalls_64.h> | 39 | #include <asm/syscalls_64.h> |
43 | 40 | ||
44 | #undef __SYSCALL_64 | 41 | #undef __SYSCALL_64 |
45 | #define __SYSCALL_64(nr, sym, compat) [ nr ] = sym, | 42 | #define __SYSCALL_64(nr, sym, qual) [ nr ] = sym, |
46 | 43 | ||
47 | extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); | 44 | extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
48 | 45 | ||
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index ce7e3607a870..470564bbd08e 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c | |||
@@ -9,14 +9,12 @@ | |||
9 | #include <asm/types.h> | 9 | #include <asm/types.h> |
10 | 10 | ||
11 | #ifdef __i386__ | 11 | #ifdef __i386__ |
12 | #define __SYSCALL_I386(nr, sym, compat) [nr] = 1, | 12 | #define __SYSCALL_I386(nr, sym, qual) [nr] = 1, |
13 | static char syscalls[] = { | 13 | static char syscalls[] = { |
14 | #include <asm/syscalls_32.h> | 14 | #include <asm/syscalls_32.h> |
15 | }; | 15 | }; |
16 | #else | 16 | #else |
17 | #define __SYSCALL_64(nr, sym, compat) [nr] = 1, | 17 | #define __SYSCALL_64(nr, sym, qual) [nr] = 1, |
18 | #define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, | ||
19 | #define __SYSCALL_X32(nr, sym, compat) /* Not supported */ | ||
20 | static char syscalls[] = { | 18 | static char syscalls[] = { |
21 | #include <asm/syscalls_64.h> | 19 | #include <asm/syscalls_64.h> |
22 | }; | 20 | }; |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 3f4ebf0261f2..3c6d17fd423a 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -112,7 +112,7 @@ asmlinkage __visible void cpu_bringup_and_idle(int cpu) | |||
112 | xen_pvh_secondary_vcpu_init(cpu); | 112 | xen_pvh_secondary_vcpu_init(cpu); |
113 | #endif | 113 | #endif |
114 | cpu_bringup(); | 114 | cpu_bringup(); |
115 | cpu_startup_entry(CPUHP_ONLINE); | 115 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
116 | } | 116 | } |
117 | 117 | ||
118 | static void xen_smp_intr_free(unsigned int cpu) | 118 | static void xen_smp_intr_free(unsigned int cpu) |
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 4d02e38514f5..fc4ad21a5ed4 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c | |||
@@ -157,7 +157,7 @@ void secondary_start_kernel(void) | |||
157 | 157 | ||
158 | complete(&cpu_running); | 158 | complete(&cpu_running); |
159 | 159 | ||
160 | cpu_startup_entry(CPUHP_ONLINE); | 160 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
161 | } | 161 | } |
162 | 162 | ||
163 | static void mx_cpu_start(void *p) | 163 | static void mx_cpu_start(void *p) |
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 33db7406c0e2..c346be650892 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig | |||
@@ -160,6 +160,7 @@ config CLKSRC_EFM32 | |||
160 | config CLKSRC_LPC32XX | 160 | config CLKSRC_LPC32XX |
161 | bool "Clocksource for LPC32XX" if COMPILE_TEST | 161 | bool "Clocksource for LPC32XX" if COMPILE_TEST |
162 | depends on GENERIC_CLOCKEVENTS && HAS_IOMEM | 162 | depends on GENERIC_CLOCKEVENTS && HAS_IOMEM |
163 | depends on ARM | ||
163 | select CLKSRC_MMIO | 164 | select CLKSRC_MMIO |
164 | select CLKSRC_OF | 165 | select CLKSRC_OF |
165 | help | 166 | help |
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index c64d543d64bf..f0dd9d42bc7b 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c | |||
@@ -32,6 +32,14 @@ | |||
32 | #define CNTTIDR 0x08 | 32 | #define CNTTIDR 0x08 |
33 | #define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4)) | 33 | #define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4)) |
34 | 34 | ||
35 | #define CNTACR(n) (0x40 + ((n) * 4)) | ||
36 | #define CNTACR_RPCT BIT(0) | ||
37 | #define CNTACR_RVCT BIT(1) | ||
38 | #define CNTACR_RFRQ BIT(2) | ||
39 | #define CNTACR_RVOFF BIT(3) | ||
40 | #define CNTACR_RWVT BIT(4) | ||
41 | #define CNTACR_RWPT BIT(5) | ||
42 | |||
35 | #define CNTVCT_LO 0x08 | 43 | #define CNTVCT_LO 0x08 |
36 | #define CNTVCT_HI 0x0c | 44 | #define CNTVCT_HI 0x0c |
37 | #define CNTFRQ 0x10 | 45 | #define CNTFRQ 0x10 |
@@ -266,10 +274,12 @@ static void __arch_timer_setup(unsigned type, | |||
266 | if (arch_timer_use_virtual) { | 274 | if (arch_timer_use_virtual) { |
267 | clk->irq = arch_timer_ppi[VIRT_PPI]; | 275 | clk->irq = arch_timer_ppi[VIRT_PPI]; |
268 | clk->set_state_shutdown = arch_timer_shutdown_virt; | 276 | clk->set_state_shutdown = arch_timer_shutdown_virt; |
277 | clk->set_state_oneshot_stopped = arch_timer_shutdown_virt; | ||
269 | clk->set_next_event = arch_timer_set_next_event_virt; | 278 | clk->set_next_event = arch_timer_set_next_event_virt; |
270 | } else { | 279 | } else { |
271 | clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; | 280 | clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; |
272 | clk->set_state_shutdown = arch_timer_shutdown_phys; | 281 | clk->set_state_shutdown = arch_timer_shutdown_phys; |
282 | clk->set_state_oneshot_stopped = arch_timer_shutdown_phys; | ||
273 | clk->set_next_event = arch_timer_set_next_event_phys; | 283 | clk->set_next_event = arch_timer_set_next_event_phys; |
274 | } | 284 | } |
275 | } else { | 285 | } else { |
@@ -279,10 +289,12 @@ static void __arch_timer_setup(unsigned type, | |||
279 | clk->cpumask = cpu_all_mask; | 289 | clk->cpumask = cpu_all_mask; |
280 | if (arch_timer_mem_use_virtual) { | 290 | if (arch_timer_mem_use_virtual) { |
281 | clk->set_state_shutdown = arch_timer_shutdown_virt_mem; | 291 | clk->set_state_shutdown = arch_timer_shutdown_virt_mem; |
292 | clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem; | ||
282 | clk->set_next_event = | 293 | clk->set_next_event = |
283 | arch_timer_set_next_event_virt_mem; | 294 | arch_timer_set_next_event_virt_mem; |
284 | } else { | 295 | } else { |
285 | clk->set_state_shutdown = arch_timer_shutdown_phys_mem; | 296 | clk->set_state_shutdown = arch_timer_shutdown_phys_mem; |
297 | clk->set_state_oneshot_stopped = arch_timer_shutdown_phys_mem; | ||
286 | clk->set_next_event = | 298 | clk->set_next_event = |
287 | arch_timer_set_next_event_phys_mem; | 299 | arch_timer_set_next_event_phys_mem; |
288 | } | 300 | } |
@@ -757,7 +769,6 @@ static void __init arch_timer_mem_init(struct device_node *np) | |||
757 | } | 769 | } |
758 | 770 | ||
759 | cnttidr = readl_relaxed(cntctlbase + CNTTIDR); | 771 | cnttidr = readl_relaxed(cntctlbase + CNTTIDR); |
760 | iounmap(cntctlbase); | ||
761 | 772 | ||
762 | /* | 773 | /* |
763 | * Try to find a virtual capable frame. Otherwise fall back to a | 774 | * Try to find a virtual capable frame. Otherwise fall back to a |
@@ -765,20 +776,31 @@ static void __init arch_timer_mem_init(struct device_node *np) | |||
765 | */ | 776 | */ |
766 | for_each_available_child_of_node(np, frame) { | 777 | for_each_available_child_of_node(np, frame) { |
767 | int n; | 778 | int n; |
779 | u32 cntacr; | ||
768 | 780 | ||
769 | if (of_property_read_u32(frame, "frame-number", &n)) { | 781 | if (of_property_read_u32(frame, "frame-number", &n)) { |
770 | pr_err("arch_timer: Missing frame-number\n"); | 782 | pr_err("arch_timer: Missing frame-number\n"); |
771 | of_node_put(best_frame); | ||
772 | of_node_put(frame); | 783 | of_node_put(frame); |
773 | return; | 784 | goto out; |
774 | } | 785 | } |
775 | 786 | ||
776 | if (cnttidr & CNTTIDR_VIRT(n)) { | 787 | /* Try enabling everything, and see what sticks */ |
788 | cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT | | ||
789 | CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT; | ||
790 | writel_relaxed(cntacr, cntctlbase + CNTACR(n)); | ||
791 | cntacr = readl_relaxed(cntctlbase + CNTACR(n)); | ||
792 | |||
793 | if ((cnttidr & CNTTIDR_VIRT(n)) && | ||
794 | !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) { | ||
777 | of_node_put(best_frame); | 795 | of_node_put(best_frame); |
778 | best_frame = frame; | 796 | best_frame = frame; |
779 | arch_timer_mem_use_virtual = true; | 797 | arch_timer_mem_use_virtual = true; |
780 | break; | 798 | break; |
781 | } | 799 | } |
800 | |||
801 | if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT)) | ||
802 | continue; | ||
803 | |||
782 | of_node_put(best_frame); | 804 | of_node_put(best_frame); |
783 | best_frame = of_node_get(frame); | 805 | best_frame = of_node_get(frame); |
784 | } | 806 | } |
@@ -786,24 +808,26 @@ static void __init arch_timer_mem_init(struct device_node *np) | |||
786 | base = arch_counter_base = of_iomap(best_frame, 0); | 808 | base = arch_counter_base = of_iomap(best_frame, 0); |
787 | if (!base) { | 809 | if (!base) { |
788 | pr_err("arch_timer: Can't map frame's registers\n"); | 810 | pr_err("arch_timer: Can't map frame's registers\n"); |
789 | of_node_put(best_frame); | 811 | goto out; |
790 | return; | ||
791 | } | 812 | } |
792 | 813 | ||
793 | if (arch_timer_mem_use_virtual) | 814 | if (arch_timer_mem_use_virtual) |
794 | irq = irq_of_parse_and_map(best_frame, 1); | 815 | irq = irq_of_parse_and_map(best_frame, 1); |
795 | else | 816 | else |
796 | irq = irq_of_parse_and_map(best_frame, 0); | 817 | irq = irq_of_parse_and_map(best_frame, 0); |
797 | of_node_put(best_frame); | 818 | |
798 | if (!irq) { | 819 | if (!irq) { |
799 | pr_err("arch_timer: Frame missing %s irq", | 820 | pr_err("arch_timer: Frame missing %s irq", |
800 | arch_timer_mem_use_virtual ? "virt" : "phys"); | 821 | arch_timer_mem_use_virtual ? "virt" : "phys"); |
801 | return; | 822 | goto out; |
802 | } | 823 | } |
803 | 824 | ||
804 | arch_timer_detect_rate(base, np); | 825 | arch_timer_detect_rate(base, np); |
805 | arch_timer_mem_register(base, irq); | 826 | arch_timer_mem_register(base, irq); |
806 | arch_timer_common_init(); | 827 | arch_timer_common_init(); |
828 | out: | ||
829 | iounmap(cntctlbase); | ||
830 | of_node_put(best_frame); | ||
807 | } | 831 | } |
808 | CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem", | 832 | CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem", |
809 | arch_timer_mem_init); | 833 | arch_timer_mem_init); |
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index d189d8cb69f7..9df0d1699d22 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/clockchips.h> | 16 | #include <linux/clockchips.h> |
17 | #include <linux/cpu.h> | 17 | #include <linux/cpu.h> |
18 | #include <linux/clk.h> | 18 | #include <linux/clk.h> |
19 | #include <linux/delay.h> | ||
19 | #include <linux/err.h> | 20 | #include <linux/err.h> |
20 | #include <linux/io.h> | 21 | #include <linux/io.h> |
21 | #include <linux/of.h> | 22 | #include <linux/of.h> |
@@ -174,6 +175,7 @@ static int gt_clockevents_init(struct clock_event_device *clk) | |||
174 | clk->set_state_shutdown = gt_clockevent_shutdown; | 175 | clk->set_state_shutdown = gt_clockevent_shutdown; |
175 | clk->set_state_periodic = gt_clockevent_set_periodic; | 176 | clk->set_state_periodic = gt_clockevent_set_periodic; |
176 | clk->set_state_oneshot = gt_clockevent_shutdown; | 177 | clk->set_state_oneshot = gt_clockevent_shutdown; |
178 | clk->set_state_oneshot_stopped = gt_clockevent_shutdown; | ||
177 | clk->set_next_event = gt_clockevent_set_next_event; | 179 | clk->set_next_event = gt_clockevent_set_next_event; |
178 | clk->cpumask = cpumask_of(cpu); | 180 | clk->cpumask = cpumask_of(cpu); |
179 | clk->rating = 300; | 181 | clk->rating = 300; |
@@ -221,6 +223,21 @@ static u64 notrace gt_sched_clock_read(void) | |||
221 | } | 223 | } |
222 | #endif | 224 | #endif |
223 | 225 | ||
226 | static unsigned long gt_read_long(void) | ||
227 | { | ||
228 | return readl_relaxed(gt_base + GT_COUNTER0); | ||
229 | } | ||
230 | |||
231 | static struct delay_timer gt_delay_timer = { | ||
232 | .read_current_timer = gt_read_long, | ||
233 | }; | ||
234 | |||
235 | static void __init gt_delay_timer_init(void) | ||
236 | { | ||
237 | gt_delay_timer.freq = gt_clk_rate; | ||
238 | register_current_timer_delay(>_delay_timer); | ||
239 | } | ||
240 | |||
224 | static void __init gt_clocksource_init(void) | 241 | static void __init gt_clocksource_init(void) |
225 | { | 242 | { |
226 | writel(0, gt_base + GT_CONTROL); | 243 | writel(0, gt_base + GT_CONTROL); |
@@ -317,6 +334,7 @@ static void __init global_timer_of_register(struct device_node *np) | |||
317 | /* Immediately configure the timer on the boot CPU */ | 334 | /* Immediately configure the timer on the boot CPU */ |
318 | gt_clocksource_init(); | 335 | gt_clocksource_init(); |
319 | gt_clockevents_init(this_cpu_ptr(gt_evt)); | 336 | gt_clockevents_init(this_cpu_ptr(gt_evt)); |
337 | gt_delay_timer_init(); | ||
320 | 338 | ||
321 | return; | 339 | return; |
322 | 340 | ||
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index ff44082a0827..be09bc0b5e26 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c | |||
@@ -313,6 +313,7 @@ static struct clock_event_device mct_comp_device = { | |||
313 | .set_state_periodic = mct_set_state_periodic, | 313 | .set_state_periodic = mct_set_state_periodic, |
314 | .set_state_shutdown = mct_set_state_shutdown, | 314 | .set_state_shutdown = mct_set_state_shutdown, |
315 | .set_state_oneshot = mct_set_state_shutdown, | 315 | .set_state_oneshot = mct_set_state_shutdown, |
316 | .set_state_oneshot_stopped = mct_set_state_shutdown, | ||
316 | .tick_resume = mct_set_state_shutdown, | 317 | .tick_resume = mct_set_state_shutdown, |
317 | }; | 318 | }; |
318 | 319 | ||
@@ -452,6 +453,7 @@ static int exynos4_local_timer_setup(struct mct_clock_event_device *mevt) | |||
452 | evt->set_state_periodic = set_state_periodic; | 453 | evt->set_state_periodic = set_state_periodic; |
453 | evt->set_state_shutdown = set_state_shutdown; | 454 | evt->set_state_shutdown = set_state_shutdown; |
454 | evt->set_state_oneshot = set_state_shutdown; | 455 | evt->set_state_oneshot = set_state_shutdown; |
456 | evt->set_state_oneshot_stopped = set_state_shutdown; | ||
455 | evt->tick_resume = set_state_shutdown; | 457 | evt->tick_resume = set_state_shutdown; |
456 | evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; | 458 | evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; |
457 | evt->rating = 450; | 459 | evt->rating = 450; |
diff --git a/drivers/clocksource/rockchip_timer.c b/drivers/clocksource/rockchip_timer.c index 8c77a529d0d4..b991b288c803 100644 --- a/drivers/clocksource/rockchip_timer.c +++ b/drivers/clocksource/rockchip_timer.c | |||
@@ -122,23 +122,23 @@ static void __init rk_timer_init(struct device_node *np) | |||
122 | pclk = of_clk_get_by_name(np, "pclk"); | 122 | pclk = of_clk_get_by_name(np, "pclk"); |
123 | if (IS_ERR(pclk)) { | 123 | if (IS_ERR(pclk)) { |
124 | pr_err("Failed to get pclk for '%s'\n", TIMER_NAME); | 124 | pr_err("Failed to get pclk for '%s'\n", TIMER_NAME); |
125 | return; | 125 | goto out_unmap; |
126 | } | 126 | } |
127 | 127 | ||
128 | if (clk_prepare_enable(pclk)) { | 128 | if (clk_prepare_enable(pclk)) { |
129 | pr_err("Failed to enable pclk for '%s'\n", TIMER_NAME); | 129 | pr_err("Failed to enable pclk for '%s'\n", TIMER_NAME); |
130 | return; | 130 | goto out_unmap; |
131 | } | 131 | } |
132 | 132 | ||
133 | timer_clk = of_clk_get_by_name(np, "timer"); | 133 | timer_clk = of_clk_get_by_name(np, "timer"); |
134 | if (IS_ERR(timer_clk)) { | 134 | if (IS_ERR(timer_clk)) { |
135 | pr_err("Failed to get timer clock for '%s'\n", TIMER_NAME); | 135 | pr_err("Failed to get timer clock for '%s'\n", TIMER_NAME); |
136 | return; | 136 | goto out_timer_clk; |
137 | } | 137 | } |
138 | 138 | ||
139 | if (clk_prepare_enable(timer_clk)) { | 139 | if (clk_prepare_enable(timer_clk)) { |
140 | pr_err("Failed to enable timer clock\n"); | 140 | pr_err("Failed to enable timer clock\n"); |
141 | return; | 141 | goto out_timer_clk; |
142 | } | 142 | } |
143 | 143 | ||
144 | bc_timer.freq = clk_get_rate(timer_clk); | 144 | bc_timer.freq = clk_get_rate(timer_clk); |
@@ -146,7 +146,7 @@ static void __init rk_timer_init(struct device_node *np) | |||
146 | irq = irq_of_parse_and_map(np, 0); | 146 | irq = irq_of_parse_and_map(np, 0); |
147 | if (!irq) { | 147 | if (!irq) { |
148 | pr_err("Failed to map interrupts for '%s'\n", TIMER_NAME); | 148 | pr_err("Failed to map interrupts for '%s'\n", TIMER_NAME); |
149 | return; | 149 | goto out_irq; |
150 | } | 150 | } |
151 | 151 | ||
152 | ce->name = TIMER_NAME; | 152 | ce->name = TIMER_NAME; |
@@ -164,10 +164,19 @@ static void __init rk_timer_init(struct device_node *np) | |||
164 | ret = request_irq(irq, rk_timer_interrupt, IRQF_TIMER, TIMER_NAME, ce); | 164 | ret = request_irq(irq, rk_timer_interrupt, IRQF_TIMER, TIMER_NAME, ce); |
165 | if (ret) { | 165 | if (ret) { |
166 | pr_err("Failed to initialize '%s': %d\n", TIMER_NAME, ret); | 166 | pr_err("Failed to initialize '%s': %d\n", TIMER_NAME, ret); |
167 | return; | 167 | goto out_irq; |
168 | } | 168 | } |
169 | 169 | ||
170 | clockevents_config_and_register(ce, bc_timer.freq, 1, UINT_MAX); | 170 | clockevents_config_and_register(ce, bc_timer.freq, 1, UINT_MAX); |
171 | |||
172 | return; | ||
173 | |||
174 | out_irq: | ||
175 | clk_disable_unprepare(timer_clk); | ||
176 | out_timer_clk: | ||
177 | clk_disable_unprepare(pclk); | ||
178 | out_unmap: | ||
179 | iounmap(bc_timer.base); | ||
171 | } | 180 | } |
172 | 181 | ||
173 | CLOCKSOURCE_OF_DECLARE(rk_timer, "rockchip,rk3288-timer", rk_timer_init); | 182 | CLOCKSOURCE_OF_DECLARE(rk_timer, "rockchip,rk3288-timer", rk_timer_init); |
diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/time-lpc32xx.c index 1316876b487a..daae61e8c820 100644 --- a/drivers/clocksource/time-lpc32xx.c +++ b/drivers/clocksource/time-lpc32xx.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/clk.h> | 18 | #include <linux/clk.h> |
19 | #include <linux/clockchips.h> | 19 | #include <linux/clockchips.h> |
20 | #include <linux/clocksource.h> | 20 | #include <linux/clocksource.h> |
21 | #include <linux/delay.h> | ||
21 | #include <linux/interrupt.h> | 22 | #include <linux/interrupt.h> |
22 | #include <linux/irq.h> | 23 | #include <linux/irq.h> |
23 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
@@ -43,6 +44,7 @@ | |||
43 | struct lpc32xx_clock_event_ddata { | 44 | struct lpc32xx_clock_event_ddata { |
44 | struct clock_event_device evtdev; | 45 | struct clock_event_device evtdev; |
45 | void __iomem *base; | 46 | void __iomem *base; |
47 | u32 ticks_per_jiffy; | ||
46 | }; | 48 | }; |
47 | 49 | ||
48 | /* Needed for the sched clock */ | 50 | /* Needed for the sched clock */ |
@@ -53,6 +55,15 @@ static u64 notrace lpc32xx_read_sched_clock(void) | |||
53 | return readl(clocksource_timer_counter); | 55 | return readl(clocksource_timer_counter); |
54 | } | 56 | } |
55 | 57 | ||
58 | static unsigned long lpc32xx_delay_timer_read(void) | ||
59 | { | ||
60 | return readl(clocksource_timer_counter); | ||
61 | } | ||
62 | |||
63 | static struct delay_timer lpc32xx_delay_timer = { | ||
64 | .read_current_timer = lpc32xx_delay_timer_read, | ||
65 | }; | ||
66 | |||
56 | static int lpc32xx_clkevt_next_event(unsigned long delta, | 67 | static int lpc32xx_clkevt_next_event(unsigned long delta, |
57 | struct clock_event_device *evtdev) | 68 | struct clock_event_device *evtdev) |
58 | { | 69 | { |
@@ -60,14 +71,13 @@ static int lpc32xx_clkevt_next_event(unsigned long delta, | |||
60 | container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev); | 71 | container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev); |
61 | 72 | ||
62 | /* | 73 | /* |
63 | * Place timer in reset and program the delta in the prescale | 74 | * Place timer in reset and program the delta in the match |
64 | * register (PR). When the prescale counter matches the value | 75 | * channel 0 (MR0). When the timer counter matches the value |
65 | * in PR the counter register is incremented and the compare | 76 | * in MR0 register the match will trigger an interrupt. |
66 | * match will trigger. After setup the timer is released from | 77 | * After setup the timer is released from reset and enabled. |
67 | * reset and enabled. | ||
68 | */ | 78 | */ |
69 | writel_relaxed(LPC32XX_TIMER_TCR_CRST, ddata->base + LPC32XX_TIMER_TCR); | 79 | writel_relaxed(LPC32XX_TIMER_TCR_CRST, ddata->base + LPC32XX_TIMER_TCR); |
70 | writel_relaxed(delta, ddata->base + LPC32XX_TIMER_PR); | 80 | writel_relaxed(delta, ddata->base + LPC32XX_TIMER_MR0); |
71 | writel_relaxed(LPC32XX_TIMER_TCR_CEN, ddata->base + LPC32XX_TIMER_TCR); | 81 | writel_relaxed(LPC32XX_TIMER_TCR_CEN, ddata->base + LPC32XX_TIMER_TCR); |
72 | 82 | ||
73 | return 0; | 83 | return 0; |
@@ -86,11 +96,39 @@ static int lpc32xx_clkevt_shutdown(struct clock_event_device *evtdev) | |||
86 | 96 | ||
87 | static int lpc32xx_clkevt_oneshot(struct clock_event_device *evtdev) | 97 | static int lpc32xx_clkevt_oneshot(struct clock_event_device *evtdev) |
88 | { | 98 | { |
99 | struct lpc32xx_clock_event_ddata *ddata = | ||
100 | container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev); | ||
101 | |||
89 | /* | 102 | /* |
90 | * When using oneshot, we must also disable the timer | 103 | * When using oneshot, we must also disable the timer |
91 | * to wait for the first call to set_next_event(). | 104 | * to wait for the first call to set_next_event(). |
92 | */ | 105 | */ |
93 | return lpc32xx_clkevt_shutdown(evtdev); | 106 | writel_relaxed(0, ddata->base + LPC32XX_TIMER_TCR); |
107 | |||
108 | /* Enable interrupt, reset on match and stop on match (MCR). */ | ||
109 | writel_relaxed(LPC32XX_TIMER_MCR_MR0I | LPC32XX_TIMER_MCR_MR0R | | ||
110 | LPC32XX_TIMER_MCR_MR0S, ddata->base + LPC32XX_TIMER_MCR); | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | static int lpc32xx_clkevt_periodic(struct clock_event_device *evtdev) | ||
115 | { | ||
116 | struct lpc32xx_clock_event_ddata *ddata = | ||
117 | container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev); | ||
118 | |||
119 | /* Enable interrupt and reset on match. */ | ||
120 | writel_relaxed(LPC32XX_TIMER_MCR_MR0I | LPC32XX_TIMER_MCR_MR0R, | ||
121 | ddata->base + LPC32XX_TIMER_MCR); | ||
122 | |||
123 | /* | ||
124 | * Place timer in reset and program the delta in the match | ||
125 | * channel 0 (MR0). | ||
126 | */ | ||
127 | writel_relaxed(LPC32XX_TIMER_TCR_CRST, ddata->base + LPC32XX_TIMER_TCR); | ||
128 | writel_relaxed(ddata->ticks_per_jiffy, ddata->base + LPC32XX_TIMER_MR0); | ||
129 | writel_relaxed(LPC32XX_TIMER_TCR_CEN, ddata->base + LPC32XX_TIMER_TCR); | ||
130 | |||
131 | return 0; | ||
94 | } | 132 | } |
95 | 133 | ||
96 | static irqreturn_t lpc32xx_clock_event_handler(int irq, void *dev_id) | 134 | static irqreturn_t lpc32xx_clock_event_handler(int irq, void *dev_id) |
@@ -108,11 +146,13 @@ static irqreturn_t lpc32xx_clock_event_handler(int irq, void *dev_id) | |||
108 | static struct lpc32xx_clock_event_ddata lpc32xx_clk_event_ddata = { | 146 | static struct lpc32xx_clock_event_ddata lpc32xx_clk_event_ddata = { |
109 | .evtdev = { | 147 | .evtdev = { |
110 | .name = "lpc3220 clockevent", | 148 | .name = "lpc3220 clockevent", |
111 | .features = CLOCK_EVT_FEAT_ONESHOT, | 149 | .features = CLOCK_EVT_FEAT_ONESHOT | |
150 | CLOCK_EVT_FEAT_PERIODIC, | ||
112 | .rating = 300, | 151 | .rating = 300, |
113 | .set_next_event = lpc32xx_clkevt_next_event, | 152 | .set_next_event = lpc32xx_clkevt_next_event, |
114 | .set_state_shutdown = lpc32xx_clkevt_shutdown, | 153 | .set_state_shutdown = lpc32xx_clkevt_shutdown, |
115 | .set_state_oneshot = lpc32xx_clkevt_oneshot, | 154 | .set_state_oneshot = lpc32xx_clkevt_oneshot, |
155 | .set_state_periodic = lpc32xx_clkevt_periodic, | ||
116 | }, | 156 | }, |
117 | }; | 157 | }; |
118 | 158 | ||
@@ -162,6 +202,8 @@ static int __init lpc32xx_clocksource_init(struct device_node *np) | |||
162 | } | 202 | } |
163 | 203 | ||
164 | clocksource_timer_counter = base + LPC32XX_TIMER_TC; | 204 | clocksource_timer_counter = base + LPC32XX_TIMER_TC; |
205 | lpc32xx_delay_timer.freq = rate; | ||
206 | register_current_timer_delay(&lpc32xx_delay_timer); | ||
165 | sched_clock_register(lpc32xx_read_sched_clock, 32, rate); | 207 | sched_clock_register(lpc32xx_read_sched_clock, 32, rate); |
166 | 208 | ||
167 | return 0; | 209 | return 0; |
@@ -210,18 +252,16 @@ static int __init lpc32xx_clockevent_init(struct device_node *np) | |||
210 | 252 | ||
211 | /* | 253 | /* |
212 | * Disable timer and clear any pending interrupt (IR) on match | 254 | * Disable timer and clear any pending interrupt (IR) on match |
213 | * channel 0 (MR0). Configure a compare match value of 1 on MR0 | 255 | * channel 0 (MR0). Clear the prescaler as it's not used. |
214 | * and enable interrupt, reset on match and stop on match (MCR). | ||
215 | */ | 256 | */ |
216 | writel_relaxed(0, base + LPC32XX_TIMER_TCR); | 257 | writel_relaxed(0, base + LPC32XX_TIMER_TCR); |
258 | writel_relaxed(0, base + LPC32XX_TIMER_PR); | ||
217 | writel_relaxed(0, base + LPC32XX_TIMER_CTCR); | 259 | writel_relaxed(0, base + LPC32XX_TIMER_CTCR); |
218 | writel_relaxed(LPC32XX_TIMER_IR_MR0INT, base + LPC32XX_TIMER_IR); | 260 | writel_relaxed(LPC32XX_TIMER_IR_MR0INT, base + LPC32XX_TIMER_IR); |
219 | writel_relaxed(1, base + LPC32XX_TIMER_MR0); | ||
220 | writel_relaxed(LPC32XX_TIMER_MCR_MR0I | LPC32XX_TIMER_MCR_MR0R | | ||
221 | LPC32XX_TIMER_MCR_MR0S, base + LPC32XX_TIMER_MCR); | ||
222 | 261 | ||
223 | rate = clk_get_rate(clk); | 262 | rate = clk_get_rate(clk); |
224 | lpc32xx_clk_event_ddata.base = base; | 263 | lpc32xx_clk_event_ddata.base = base; |
264 | lpc32xx_clk_event_ddata.ticks_per_jiffy = DIV_ROUND_CLOSEST(rate, HZ); | ||
225 | clockevents_config_and_register(&lpc32xx_clk_event_ddata.evtdev, | 265 | clockevents_config_and_register(&lpc32xx_clk_event_ddata.evtdev, |
226 | rate, 1, -1); | 266 | rate, 1, -1); |
227 | 267 | ||
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index cd83d477e32d..3a4b39afc0ab 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c | |||
@@ -1431,7 +1431,7 @@ static int __init intel_pstate_init(void) | |||
1431 | if (!all_cpu_data) | 1431 | if (!all_cpu_data) |
1432 | return -ENOMEM; | 1432 | return -ENOMEM; |
1433 | 1433 | ||
1434 | if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) { | 1434 | if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) { |
1435 | pr_info("intel_pstate: HWP enabled\n"); | 1435 | pr_info("intel_pstate: HWP enabled\n"); |
1436 | hwp_active++; | 1436 | hwp_active++; |
1437 | } | 1437 | } |
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index fb50911b3940..7e8c441ff2de 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig | |||
@@ -60,6 +60,17 @@ config ARM_VIC_NR | |||
60 | The maximum number of VICs available in the system, for | 60 | The maximum number of VICs available in the system, for |
61 | power management. | 61 | power management. |
62 | 62 | ||
63 | config ARMADA_370_XP_IRQ | ||
64 | bool | ||
65 | select GENERIC_IRQ_CHIP | ||
66 | select PCI_MSI_IRQ_DOMAIN if PCI_MSI | ||
67 | |||
68 | config ALPINE_MSI | ||
69 | bool | ||
70 | depends on PCI && PCI_MSI | ||
71 | select GENERIC_IRQ_CHIP | ||
72 | select PCI_MSI_IRQ_DOMAIN | ||
73 | |||
63 | config ATMEL_AIC_IRQ | 74 | config ATMEL_AIC_IRQ |
64 | bool | 75 | bool |
65 | select GENERIC_IRQ_CHIP | 76 | select GENERIC_IRQ_CHIP |
@@ -78,6 +89,11 @@ config I8259 | |||
78 | bool | 89 | bool |
79 | select IRQ_DOMAIN | 90 | select IRQ_DOMAIN |
80 | 91 | ||
92 | config BCM6345_L1_IRQ | ||
93 | bool | ||
94 | select GENERIC_IRQ_CHIP | ||
95 | select IRQ_DOMAIN | ||
96 | |||
81 | config BCM7038_L1_IRQ | 97 | config BCM7038_L1_IRQ |
82 | bool | 98 | bool |
83 | select GENERIC_IRQ_CHIP | 99 | select GENERIC_IRQ_CHIP |
@@ -151,6 +167,11 @@ config ST_IRQCHIP | |||
151 | help | 167 | help |
152 | Enables SysCfg Controlled IRQs on STi based platforms. | 168 | Enables SysCfg Controlled IRQs on STi based platforms. |
153 | 169 | ||
170 | config TANGO_IRQ | ||
171 | bool | ||
172 | select IRQ_DOMAIN | ||
173 | select GENERIC_IRQ_CHIP | ||
174 | |||
154 | config TB10X_IRQC | 175 | config TB10X_IRQC |
155 | bool | 176 | bool |
156 | select IRQ_DOMAIN | 177 | select IRQ_DOMAIN |
@@ -160,6 +181,7 @@ config TS4800_IRQ | |||
160 | tristate "TS-4800 IRQ controller" | 181 | tristate "TS-4800 IRQ controller" |
161 | select IRQ_DOMAIN | 182 | select IRQ_DOMAIN |
162 | depends on HAS_IOMEM | 183 | depends on HAS_IOMEM |
184 | depends on SOC_IMX51 || COMPILE_TEST | ||
163 | help | 185 | help |
164 | Support for the TS-4800 FPGA IRQ controller | 186 | Support for the TS-4800 FPGA IRQ controller |
165 | 187 | ||
@@ -193,6 +215,8 @@ config KEYSTONE_IRQ | |||
193 | 215 | ||
194 | config MIPS_GIC | 216 | config MIPS_GIC |
195 | bool | 217 | bool |
218 | select GENERIC_IRQ_IPI | ||
219 | select IRQ_DOMAIN_HIERARCHY | ||
196 | select MIPS_CM | 220 | select MIPS_CM |
197 | 221 | ||
198 | config INGENIC_IRQ | 222 | config INGENIC_IRQ |
@@ -218,3 +242,7 @@ config IRQ_MXS | |||
218 | def_bool y if MACH_ASM9260 || ARCH_MXS | 242 | def_bool y if MACH_ASM9260 || ARCH_MXS |
219 | select IRQ_DOMAIN | 243 | select IRQ_DOMAIN |
220 | select STMP_DEVICE | 244 | select STMP_DEVICE |
245 | |||
246 | config MVEBU_ODMI | ||
247 | bool | ||
248 | select GENERIC_MSI_IRQ_DOMAIN | ||
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 18caacb60d58..b03cfcbbac6b 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile | |||
@@ -1,11 +1,13 @@ | |||
1 | obj-$(CONFIG_IRQCHIP) += irqchip.o | 1 | obj-$(CONFIG_IRQCHIP) += irqchip.o |
2 | 2 | ||
3 | obj-$(CONFIG_ALPINE_MSI) += irq-alpine-msi.o | ||
4 | obj-$(CONFIG_ATH79) += irq-ath79-cpu.o | ||
5 | obj-$(CONFIG_ATH79) += irq-ath79-misc.o | ||
3 | obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o | 6 | obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o |
4 | obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2836.o | 7 | obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2836.o |
5 | obj-$(CONFIG_ARCH_EXYNOS) += exynos-combiner.o | 8 | obj-$(CONFIG_ARCH_EXYNOS) += exynos-combiner.o |
6 | obj-$(CONFIG_ARCH_HIP04) += irq-hip04.o | 9 | obj-$(CONFIG_ARCH_HIP04) += irq-hip04.o |
7 | obj-$(CONFIG_ARCH_MMP) += irq-mmp.o | 10 | obj-$(CONFIG_ARCH_MMP) += irq-mmp.o |
8 | obj-$(CONFIG_ARCH_MVEBU) += irq-armada-370-xp.o | ||
9 | obj-$(CONFIG_IRQ_MXS) += irq-mxs.o | 11 | obj-$(CONFIG_IRQ_MXS) += irq-mxs.o |
10 | obj-$(CONFIG_ARCH_TEGRA) += irq-tegra.o | 12 | obj-$(CONFIG_ARCH_TEGRA) += irq-tegra.o |
11 | obj-$(CONFIG_ARCH_S3C24XX) += irq-s3c24xx.o | 13 | obj-$(CONFIG_ARCH_S3C24XX) += irq-s3c24xx.o |
@@ -28,6 +30,7 @@ obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o irq-g | |||
28 | obj-$(CONFIG_HISILICON_IRQ_MBIGEN) += irq-mbigen.o | 30 | obj-$(CONFIG_HISILICON_IRQ_MBIGEN) += irq-mbigen.o |
29 | obj-$(CONFIG_ARM_NVIC) += irq-nvic.o | 31 | obj-$(CONFIG_ARM_NVIC) += irq-nvic.o |
30 | obj-$(CONFIG_ARM_VIC) += irq-vic.o | 32 | obj-$(CONFIG_ARM_VIC) += irq-vic.o |
33 | obj-$(CONFIG_ARMADA_370_XP_IRQ) += irq-armada-370-xp.o | ||
31 | obj-$(CONFIG_ATMEL_AIC_IRQ) += irq-atmel-aic-common.o irq-atmel-aic.o | 34 | obj-$(CONFIG_ATMEL_AIC_IRQ) += irq-atmel-aic-common.o irq-atmel-aic.o |
32 | obj-$(CONFIG_ATMEL_AIC5_IRQ) += irq-atmel-aic-common.o irq-atmel-aic5.o | 35 | obj-$(CONFIG_ATMEL_AIC5_IRQ) += irq-atmel-aic-common.o irq-atmel-aic5.o |
33 | obj-$(CONFIG_I8259) += irq-i8259.o | 36 | obj-$(CONFIG_I8259) += irq-i8259.o |
@@ -40,12 +43,14 @@ obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o | |||
40 | obj-$(CONFIG_ARCH_NSPIRE) += irq-zevio.o | 43 | obj-$(CONFIG_ARCH_NSPIRE) += irq-zevio.o |
41 | obj-$(CONFIG_ARCH_VT8500) += irq-vt8500.o | 44 | obj-$(CONFIG_ARCH_VT8500) += irq-vt8500.o |
42 | obj-$(CONFIG_ST_IRQCHIP) += irq-st.o | 45 | obj-$(CONFIG_ST_IRQCHIP) += irq-st.o |
46 | obj-$(CONFIG_TANGO_IRQ) += irq-tango.o | ||
43 | obj-$(CONFIG_TB10X_IRQC) += irq-tb10x.o | 47 | obj-$(CONFIG_TB10X_IRQC) += irq-tb10x.o |
44 | obj-$(CONFIG_TS4800_IRQ) += irq-ts4800.o | 48 | obj-$(CONFIG_TS4800_IRQ) += irq-ts4800.o |
45 | obj-$(CONFIG_XTENSA) += irq-xtensa-pic.o | 49 | obj-$(CONFIG_XTENSA) += irq-xtensa-pic.o |
46 | obj-$(CONFIG_XTENSA_MX) += irq-xtensa-mx.o | 50 | obj-$(CONFIG_XTENSA_MX) += irq-xtensa-mx.o |
47 | obj-$(CONFIG_IRQ_CROSSBAR) += irq-crossbar.o | 51 | obj-$(CONFIG_IRQ_CROSSBAR) += irq-crossbar.o |
48 | obj-$(CONFIG_SOC_VF610) += irq-vf610-mscm-ir.o | 52 | obj-$(CONFIG_SOC_VF610) += irq-vf610-mscm-ir.o |
53 | obj-$(CONFIG_BCM6345_L1_IRQ) += irq-bcm6345-l1.o | ||
49 | obj-$(CONFIG_BCM7038_L1_IRQ) += irq-bcm7038-l1.o | 54 | obj-$(CONFIG_BCM7038_L1_IRQ) += irq-bcm7038-l1.o |
50 | obj-$(CONFIG_BCM7120_L2_IRQ) += irq-bcm7120-l2.o | 55 | obj-$(CONFIG_BCM7120_L2_IRQ) += irq-bcm7120-l2.o |
51 | obj-$(CONFIG_BRCMSTB_L2_IRQ) += irq-brcmstb-l2.o | 56 | obj-$(CONFIG_BRCMSTB_L2_IRQ) += irq-brcmstb-l2.o |
@@ -59,3 +64,4 @@ obj-$(CONFIG_ARCH_SA1100) += irq-sa11x0.o | |||
59 | obj-$(CONFIG_INGENIC_IRQ) += irq-ingenic.o | 64 | obj-$(CONFIG_INGENIC_IRQ) += irq-ingenic.o |
60 | obj-$(CONFIG_IMX_GPCV2) += irq-imx-gpcv2.o | 65 | obj-$(CONFIG_IMX_GPCV2) += irq-imx-gpcv2.o |
61 | obj-$(CONFIG_PIC32_EVIC) += irq-pic32-evic.o | 66 | obj-$(CONFIG_PIC32_EVIC) += irq-pic32-evic.o |
67 | obj-$(CONFIG_MVEBU_ODMI) += irq-mvebu-odmi.o | ||
diff --git a/drivers/irqchip/irq-alpine-msi.c b/drivers/irqchip/irq-alpine-msi.c new file mode 100644 index 000000000000..25384255b30f --- /dev/null +++ b/drivers/irqchip/irq-alpine-msi.c | |||
@@ -0,0 +1,293 @@ | |||
1 | /* | ||
2 | * Annapurna Labs MSIX support services | ||
3 | * | ||
4 | * Copyright (C) 2016, Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
5 | * | ||
6 | * Antoine Tenart <antoine.tenart@free-electrons.com> | ||
7 | * | ||
8 | * This file is licensed under the terms of the GNU General Public | ||
9 | * License version 2. This program is licensed "as is" without any | ||
10 | * warranty of any kind, whether express or implied. | ||
11 | */ | ||
12 | |||
13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
14 | |||
15 | #include <linux/irqchip.h> | ||
16 | #include <linux/irqchip/arm-gic.h> | ||
17 | #include <linux/msi.h> | ||
18 | #include <linux/of.h> | ||
19 | #include <linux/of_address.h> | ||
20 | #include <linux/of_irq.h> | ||
21 | #include <linux/of_pci.h> | ||
22 | #include <linux/pci.h> | ||
23 | #include <linux/slab.h> | ||
24 | |||
25 | #include <asm/irq.h> | ||
26 | #include <asm-generic/msi.h> | ||
27 | |||
28 | /* MSIX message address format: local GIC target */ | ||
29 | #define ALPINE_MSIX_SPI_TARGET_CLUSTER0 BIT(16) | ||
30 | |||
31 | struct alpine_msix_data { | ||
32 | spinlock_t msi_map_lock; | ||
33 | phys_addr_t addr; | ||
34 | u32 spi_first; /* The SGI number that MSIs start */ | ||
35 | u32 num_spis; /* The number of SGIs for MSIs */ | ||
36 | unsigned long *msi_map; | ||
37 | }; | ||
38 | |||
39 | static void alpine_msix_mask_msi_irq(struct irq_data *d) | ||
40 | { | ||
41 | pci_msi_mask_irq(d); | ||
42 | irq_chip_mask_parent(d); | ||
43 | } | ||
44 | |||
45 | static void alpine_msix_unmask_msi_irq(struct irq_data *d) | ||
46 | { | ||
47 | pci_msi_unmask_irq(d); | ||
48 | irq_chip_unmask_parent(d); | ||
49 | } | ||
50 | |||
51 | static struct irq_chip alpine_msix_irq_chip = { | ||
52 | .name = "MSIx", | ||
53 | .irq_mask = alpine_msix_mask_msi_irq, | ||
54 | .irq_unmask = alpine_msix_unmask_msi_irq, | ||
55 | .irq_eoi = irq_chip_eoi_parent, | ||
56 | .irq_set_affinity = irq_chip_set_affinity_parent, | ||
57 | }; | ||
58 | |||
59 | static int alpine_msix_allocate_sgi(struct alpine_msix_data *priv, int num_req) | ||
60 | { | ||
61 | int first; | ||
62 | |||
63 | spin_lock(&priv->msi_map_lock); | ||
64 | |||
65 | first = bitmap_find_next_zero_area(priv->msi_map, priv->num_spis, 0, | ||
66 | num_req, 0); | ||
67 | if (first >= priv->num_spis) { | ||
68 | spin_unlock(&priv->msi_map_lock); | ||
69 | return -ENOSPC; | ||
70 | } | ||
71 | |||
72 | bitmap_set(priv->msi_map, first, num_req); | ||
73 | |||
74 | spin_unlock(&priv->msi_map_lock); | ||
75 | |||
76 | return priv->spi_first + first; | ||
77 | } | ||
78 | |||
79 | static void alpine_msix_free_sgi(struct alpine_msix_data *priv, unsigned sgi, | ||
80 | int num_req) | ||
81 | { | ||
82 | int first = sgi - priv->spi_first; | ||
83 | |||
84 | spin_lock(&priv->msi_map_lock); | ||
85 | |||
86 | bitmap_clear(priv->msi_map, first, num_req); | ||
87 | |||
88 | spin_unlock(&priv->msi_map_lock); | ||
89 | } | ||
90 | |||
91 | static void alpine_msix_compose_msi_msg(struct irq_data *data, | ||
92 | struct msi_msg *msg) | ||
93 | { | ||
94 | struct alpine_msix_data *priv = irq_data_get_irq_chip_data(data); | ||
95 | phys_addr_t msg_addr = priv->addr; | ||
96 | |||
97 | msg_addr |= (data->hwirq << 3); | ||
98 | |||
99 | msg->address_hi = upper_32_bits(msg_addr); | ||
100 | msg->address_lo = lower_32_bits(msg_addr); | ||
101 | msg->data = 0; | ||
102 | } | ||
103 | |||
104 | static struct msi_domain_info alpine_msix_domain_info = { | ||
105 | .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | | ||
106 | MSI_FLAG_PCI_MSIX, | ||
107 | .chip = &alpine_msix_irq_chip, | ||
108 | }; | ||
109 | |||
110 | static struct irq_chip middle_irq_chip = { | ||
111 | .name = "alpine_msix_middle", | ||
112 | .irq_mask = irq_chip_mask_parent, | ||
113 | .irq_unmask = irq_chip_unmask_parent, | ||
114 | .irq_eoi = irq_chip_eoi_parent, | ||
115 | .irq_set_affinity = irq_chip_set_affinity_parent, | ||
116 | .irq_compose_msi_msg = alpine_msix_compose_msi_msg, | ||
117 | }; | ||
118 | |||
119 | static int alpine_msix_gic_domain_alloc(struct irq_domain *domain, | ||
120 | unsigned int virq, int sgi) | ||
121 | { | ||
122 | struct irq_fwspec fwspec; | ||
123 | struct irq_data *d; | ||
124 | int ret; | ||
125 | |||
126 | if (!is_of_node(domain->parent->fwnode)) | ||
127 | return -EINVAL; | ||
128 | |||
129 | fwspec.fwnode = domain->parent->fwnode; | ||
130 | fwspec.param_count = 3; | ||
131 | fwspec.param[0] = 0; | ||
132 | fwspec.param[1] = sgi; | ||
133 | fwspec.param[2] = IRQ_TYPE_EDGE_RISING; | ||
134 | |||
135 | ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec); | ||
136 | if (ret) | ||
137 | return ret; | ||
138 | |||
139 | d = irq_domain_get_irq_data(domain->parent, virq); | ||
140 | d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING); | ||
141 | |||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | static int alpine_msix_middle_domain_alloc(struct irq_domain *domain, | ||
146 | unsigned int virq, | ||
147 | unsigned int nr_irqs, void *args) | ||
148 | { | ||
149 | struct alpine_msix_data *priv = domain->host_data; | ||
150 | int sgi, err, i; | ||
151 | |||
152 | sgi = alpine_msix_allocate_sgi(priv, nr_irqs); | ||
153 | if (sgi < 0) | ||
154 | return sgi; | ||
155 | |||
156 | for (i = 0; i < nr_irqs; i++) { | ||
157 | err = alpine_msix_gic_domain_alloc(domain, virq + i, sgi + i); | ||
158 | if (err) | ||
159 | goto err_sgi; | ||
160 | |||
161 | irq_domain_set_hwirq_and_chip(domain, virq + i, sgi + i, | ||
162 | &middle_irq_chip, priv); | ||
163 | } | ||
164 | |||
165 | return 0; | ||
166 | |||
167 | err_sgi: | ||
168 | while (--i >= 0) | ||
169 | irq_domain_free_irqs_parent(domain, virq, i); | ||
170 | alpine_msix_free_sgi(priv, sgi, nr_irqs); | ||
171 | return err; | ||
172 | } | ||
173 | |||
174 | static void alpine_msix_middle_domain_free(struct irq_domain *domain, | ||
175 | unsigned int virq, | ||
176 | unsigned int nr_irqs) | ||
177 | { | ||
178 | struct irq_data *d = irq_domain_get_irq_data(domain, virq); | ||
179 | struct alpine_msix_data *priv = irq_data_get_irq_chip_data(d); | ||
180 | |||
181 | irq_domain_free_irqs_parent(domain, virq, nr_irqs); | ||
182 | alpine_msix_free_sgi(priv, d->hwirq, nr_irqs); | ||
183 | } | ||
184 | |||
185 | static const struct irq_domain_ops alpine_msix_middle_domain_ops = { | ||
186 | .alloc = alpine_msix_middle_domain_alloc, | ||
187 | .free = alpine_msix_middle_domain_free, | ||
188 | }; | ||
189 | |||
190 | static int alpine_msix_init_domains(struct alpine_msix_data *priv, | ||
191 | struct device_node *node) | ||
192 | { | ||
193 | struct irq_domain *middle_domain, *msi_domain, *gic_domain; | ||
194 | struct device_node *gic_node; | ||
195 | |||
196 | gic_node = of_irq_find_parent(node); | ||
197 | if (!gic_node) { | ||
198 | pr_err("Failed to find the GIC node\n"); | ||
199 | return -ENODEV; | ||
200 | } | ||
201 | |||
202 | gic_domain = irq_find_host(gic_node); | ||
203 | if (!gic_domain) { | ||
204 | pr_err("Failed to find the GIC domain\n"); | ||
205 | return -ENXIO; | ||
206 | } | ||
207 | |||
208 | middle_domain = irq_domain_add_tree(NULL, | ||
209 | &alpine_msix_middle_domain_ops, | ||
210 | priv); | ||
211 | if (!middle_domain) { | ||
212 | pr_err("Failed to create the MSIX middle domain\n"); | ||
213 | return -ENOMEM; | ||
214 | } | ||
215 | |||
216 | middle_domain->parent = gic_domain; | ||
217 | |||
218 | msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node), | ||
219 | &alpine_msix_domain_info, | ||
220 | middle_domain); | ||
221 | if (!msi_domain) { | ||
222 | pr_err("Failed to create MSI domain\n"); | ||
223 | irq_domain_remove(middle_domain); | ||
224 | return -ENOMEM; | ||
225 | } | ||
226 | |||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | static int alpine_msix_init(struct device_node *node, | ||
231 | struct device_node *parent) | ||
232 | { | ||
233 | struct alpine_msix_data *priv; | ||
234 | struct resource res; | ||
235 | int ret; | ||
236 | |||
237 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
238 | if (!priv) | ||
239 | return -ENOMEM; | ||
240 | |||
241 | spin_lock_init(&priv->msi_map_lock); | ||
242 | |||
243 | ret = of_address_to_resource(node, 0, &res); | ||
244 | if (ret) { | ||
245 | pr_err("Failed to allocate resource\n"); | ||
246 | goto err_priv; | ||
247 | } | ||
248 | |||
249 | /* | ||
250 | * The 20 least significant bits of addr provide direct information | ||
251 | * regarding the interrupt destination. | ||
252 | * | ||
253 | * To select the primary GIC as the target GIC, bits [18:17] must be set | ||
254 | * to 0x0. In this case, bit 16 (SPI_TARGET_CLUSTER0) must be set. | ||
255 | */ | ||
256 | priv->addr = res.start & GENMASK_ULL(63,20); | ||
257 | priv->addr |= ALPINE_MSIX_SPI_TARGET_CLUSTER0; | ||
258 | |||
259 | if (of_property_read_u32(node, "al,msi-base-spi", &priv->spi_first)) { | ||
260 | pr_err("Unable to parse MSI base\n"); | ||
261 | ret = -EINVAL; | ||
262 | goto err_priv; | ||
263 | } | ||
264 | |||
265 | if (of_property_read_u32(node, "al,msi-num-spis", &priv->num_spis)) { | ||
266 | pr_err("Unable to parse MSI numbers\n"); | ||
267 | ret = -EINVAL; | ||
268 | goto err_priv; | ||
269 | } | ||
270 | |||
271 | priv->msi_map = kzalloc(sizeof(*priv->msi_map) * BITS_TO_LONGS(priv->num_spis), | ||
272 | GFP_KERNEL); | ||
273 | if (!priv->msi_map) { | ||
274 | ret = -ENOMEM; | ||
275 | goto err_priv; | ||
276 | } | ||
277 | |||
278 | pr_debug("Registering %d msixs, starting at %d\n", | ||
279 | priv->num_spis, priv->spi_first); | ||
280 | |||
281 | ret = alpine_msix_init_domains(priv, node); | ||
282 | if (ret) | ||
283 | goto err_map; | ||
284 | |||
285 | return 0; | ||
286 | |||
287 | err_map: | ||
288 | kfree(priv->msi_map); | ||
289 | err_priv: | ||
290 | kfree(priv); | ||
291 | return ret; | ||
292 | } | ||
293 | IRQCHIP_DECLARE(alpine_msix, "al,alpine-msix", alpine_msix_init); | ||
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 3f3a8c3d2175..e7dc6cbda2a1 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c | |||
@@ -71,6 +71,7 @@ static u32 doorbell_mask_reg; | |||
71 | static int parent_irq; | 71 | static int parent_irq; |
72 | #ifdef CONFIG_PCI_MSI | 72 | #ifdef CONFIG_PCI_MSI |
73 | static struct irq_domain *armada_370_xp_msi_domain; | 73 | static struct irq_domain *armada_370_xp_msi_domain; |
74 | static struct irq_domain *armada_370_xp_msi_inner_domain; | ||
74 | static DECLARE_BITMAP(msi_used, PCI_MSI_DOORBELL_NR); | 75 | static DECLARE_BITMAP(msi_used, PCI_MSI_DOORBELL_NR); |
75 | static DEFINE_MUTEX(msi_used_lock); | 76 | static DEFINE_MUTEX(msi_used_lock); |
76 | static phys_addr_t msi_doorbell_addr; | 77 | static phys_addr_t msi_doorbell_addr; |
@@ -115,127 +116,102 @@ static void armada_370_xp_irq_unmask(struct irq_data *d) | |||
115 | 116 | ||
116 | #ifdef CONFIG_PCI_MSI | 117 | #ifdef CONFIG_PCI_MSI |
117 | 118 | ||
118 | static int armada_370_xp_alloc_msi(void) | 119 | static struct irq_chip armada_370_xp_msi_irq_chip = { |
119 | { | 120 | .name = "MPIC MSI", |
120 | int hwirq; | 121 | .irq_mask = pci_msi_mask_irq, |
122 | .irq_unmask = pci_msi_unmask_irq, | ||
123 | }; | ||
121 | 124 | ||
122 | mutex_lock(&msi_used_lock); | 125 | static struct msi_domain_info armada_370_xp_msi_domain_info = { |
123 | hwirq = find_first_zero_bit(&msi_used, PCI_MSI_DOORBELL_NR); | 126 | .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | |
124 | if (hwirq >= PCI_MSI_DOORBELL_NR) | 127 | MSI_FLAG_MULTI_PCI_MSI), |
125 | hwirq = -ENOSPC; | 128 | .chip = &armada_370_xp_msi_irq_chip, |
126 | else | 129 | }; |
127 | set_bit(hwirq, msi_used); | ||
128 | mutex_unlock(&msi_used_lock); | ||
129 | 130 | ||
130 | return hwirq; | 131 | static void armada_370_xp_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) |
132 | { | ||
133 | msg->address_lo = lower_32_bits(msi_doorbell_addr); | ||
134 | msg->address_hi = upper_32_bits(msi_doorbell_addr); | ||
135 | msg->data = 0xf00 | (data->hwirq + PCI_MSI_DOORBELL_START); | ||
131 | } | 136 | } |
132 | 137 | ||
133 | static void armada_370_xp_free_msi(int hwirq) | 138 | static int armada_370_xp_msi_set_affinity(struct irq_data *irq_data, |
139 | const struct cpumask *mask, bool force) | ||
134 | { | 140 | { |
135 | mutex_lock(&msi_used_lock); | 141 | return -EINVAL; |
136 | if (!test_bit(hwirq, msi_used)) | ||
137 | pr_err("trying to free unused MSI#%d\n", hwirq); | ||
138 | else | ||
139 | clear_bit(hwirq, msi_used); | ||
140 | mutex_unlock(&msi_used_lock); | ||
141 | } | 142 | } |
142 | 143 | ||
143 | static int armada_370_xp_setup_msi_irq(struct msi_controller *chip, | 144 | static struct irq_chip armada_370_xp_msi_bottom_irq_chip = { |
144 | struct pci_dev *pdev, | 145 | .name = "MPIC MSI", |
145 | struct msi_desc *desc) | 146 | .irq_compose_msi_msg = armada_370_xp_compose_msi_msg, |
146 | { | 147 | .irq_set_affinity = armada_370_xp_msi_set_affinity, |
147 | struct msi_msg msg; | 148 | }; |
148 | int virq, hwirq; | ||
149 | 149 | ||
150 | /* We support MSI, but not MSI-X */ | 150 | static int armada_370_xp_msi_alloc(struct irq_domain *domain, unsigned int virq, |
151 | if (desc->msi_attrib.is_msix) | 151 | unsigned int nr_irqs, void *args) |
152 | return -EINVAL; | 152 | { |
153 | int hwirq, i; | ||
153 | 154 | ||
154 | hwirq = armada_370_xp_alloc_msi(); | 155 | mutex_lock(&msi_used_lock); |
155 | if (hwirq < 0) | ||
156 | return hwirq; | ||
157 | 156 | ||
158 | virq = irq_create_mapping(armada_370_xp_msi_domain, hwirq); | 157 | hwirq = bitmap_find_next_zero_area(msi_used, PCI_MSI_DOORBELL_NR, |
159 | if (!virq) { | 158 | 0, nr_irqs, 0); |
160 | armada_370_xp_free_msi(hwirq); | 159 | if (hwirq >= PCI_MSI_DOORBELL_NR) { |
161 | return -EINVAL; | 160 | mutex_unlock(&msi_used_lock); |
161 | return -ENOSPC; | ||
162 | } | 162 | } |
163 | 163 | ||
164 | irq_set_msi_desc(virq, desc); | 164 | bitmap_set(msi_used, hwirq, nr_irqs); |
165 | 165 | mutex_unlock(&msi_used_lock); | |
166 | msg.address_lo = msi_doorbell_addr; | ||
167 | msg.address_hi = 0; | ||
168 | msg.data = 0xf00 | (hwirq + 16); | ||
169 | |||
170 | pci_write_msi_msg(virq, &msg); | ||
171 | return 0; | ||
172 | } | ||
173 | 166 | ||
174 | static void armada_370_xp_teardown_msi_irq(struct msi_controller *chip, | 167 | for (i = 0; i < nr_irqs; i++) { |
175 | unsigned int irq) | 168 | irq_domain_set_info(domain, virq + i, hwirq + i, |
176 | { | 169 | &armada_370_xp_msi_bottom_irq_chip, |
177 | struct irq_data *d = irq_get_irq_data(irq); | 170 | domain->host_data, handle_simple_irq, |
178 | unsigned long hwirq = d->hwirq; | 171 | NULL, NULL); |
172 | } | ||
179 | 173 | ||
180 | irq_dispose_mapping(irq); | 174 | return hwirq; |
181 | armada_370_xp_free_msi(hwirq); | ||
182 | } | 175 | } |
183 | 176 | ||
184 | static struct irq_chip armada_370_xp_msi_irq_chip = { | 177 | static void armada_370_xp_msi_free(struct irq_domain *domain, |
185 | .name = "armada_370_xp_msi_irq", | 178 | unsigned int virq, unsigned int nr_irqs) |
186 | .irq_enable = pci_msi_unmask_irq, | ||
187 | .irq_disable = pci_msi_mask_irq, | ||
188 | .irq_mask = pci_msi_mask_irq, | ||
189 | .irq_unmask = pci_msi_unmask_irq, | ||
190 | }; | ||
191 | |||
192 | static int armada_370_xp_msi_map(struct irq_domain *domain, unsigned int virq, | ||
193 | irq_hw_number_t hw) | ||
194 | { | 179 | { |
195 | irq_set_chip_and_handler(virq, &armada_370_xp_msi_irq_chip, | 180 | struct irq_data *d = irq_domain_get_irq_data(domain, virq); |
196 | handle_simple_irq); | ||
197 | 181 | ||
198 | return 0; | 182 | mutex_lock(&msi_used_lock); |
183 | bitmap_clear(msi_used, d->hwirq, nr_irqs); | ||
184 | mutex_unlock(&msi_used_lock); | ||
199 | } | 185 | } |
200 | 186 | ||
201 | static const struct irq_domain_ops armada_370_xp_msi_irq_ops = { | 187 | static const struct irq_domain_ops armada_370_xp_msi_domain_ops = { |
202 | .map = armada_370_xp_msi_map, | 188 | .alloc = armada_370_xp_msi_alloc, |
189 | .free = armada_370_xp_msi_free, | ||
203 | }; | 190 | }; |
204 | 191 | ||
205 | static int armada_370_xp_msi_init(struct device_node *node, | 192 | static int armada_370_xp_msi_init(struct device_node *node, |
206 | phys_addr_t main_int_phys_base) | 193 | phys_addr_t main_int_phys_base) |
207 | { | 194 | { |
208 | struct msi_controller *msi_chip; | ||
209 | u32 reg; | 195 | u32 reg; |
210 | int ret; | ||
211 | 196 | ||
212 | msi_doorbell_addr = main_int_phys_base + | 197 | msi_doorbell_addr = main_int_phys_base + |
213 | ARMADA_370_XP_SW_TRIG_INT_OFFS; | 198 | ARMADA_370_XP_SW_TRIG_INT_OFFS; |
214 | 199 | ||
215 | msi_chip = kzalloc(sizeof(*msi_chip), GFP_KERNEL); | 200 | armada_370_xp_msi_inner_domain = |
216 | if (!msi_chip) | 201 | irq_domain_add_linear(NULL, PCI_MSI_DOORBELL_NR, |
202 | &armada_370_xp_msi_domain_ops, NULL); | ||
203 | if (!armada_370_xp_msi_inner_domain) | ||
217 | return -ENOMEM; | 204 | return -ENOMEM; |
218 | 205 | ||
219 | msi_chip->setup_irq = armada_370_xp_setup_msi_irq; | ||
220 | msi_chip->teardown_irq = armada_370_xp_teardown_msi_irq; | ||
221 | msi_chip->of_node = node; | ||
222 | |||
223 | armada_370_xp_msi_domain = | 206 | armada_370_xp_msi_domain = |
224 | irq_domain_add_linear(NULL, PCI_MSI_DOORBELL_NR, | 207 | pci_msi_create_irq_domain(of_node_to_fwnode(node), |
225 | &armada_370_xp_msi_irq_ops, | 208 | &armada_370_xp_msi_domain_info, |
226 | NULL); | 209 | armada_370_xp_msi_inner_domain); |
227 | if (!armada_370_xp_msi_domain) { | 210 | if (!armada_370_xp_msi_domain) { |
228 | kfree(msi_chip); | 211 | irq_domain_remove(armada_370_xp_msi_inner_domain); |
229 | return -ENOMEM; | 212 | return -ENOMEM; |
230 | } | 213 | } |
231 | 214 | ||
232 | ret = of_pci_msi_chip_add(msi_chip); | ||
233 | if (ret < 0) { | ||
234 | irq_domain_remove(armada_370_xp_msi_domain); | ||
235 | kfree(msi_chip); | ||
236 | return ret; | ||
237 | } | ||
238 | |||
239 | reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS) | 215 | reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS) |
240 | | PCI_MSI_DOORBELL_MASK; | 216 | | PCI_MSI_DOORBELL_MASK; |
241 | 217 | ||
@@ -280,7 +256,7 @@ static int armada_xp_set_affinity(struct irq_data *d, | |||
280 | #endif | 256 | #endif |
281 | 257 | ||
282 | static struct irq_chip armada_370_xp_irq_chip = { | 258 | static struct irq_chip armada_370_xp_irq_chip = { |
283 | .name = "armada_370_xp_irq", | 259 | .name = "MPIC", |
284 | .irq_mask = armada_370_xp_irq_mask, | 260 | .irq_mask = armada_370_xp_irq_mask, |
285 | .irq_mask_ack = armada_370_xp_irq_mask, | 261 | .irq_mask_ack = armada_370_xp_irq_mask, |
286 | .irq_unmask = armada_370_xp_irq_unmask, | 262 | .irq_unmask = armada_370_xp_irq_unmask, |
@@ -427,12 +403,12 @@ static void armada_370_xp_handle_msi_irq(struct pt_regs *regs, bool is_chained) | |||
427 | continue; | 403 | continue; |
428 | 404 | ||
429 | if (is_chained) { | 405 | if (is_chained) { |
430 | irq = irq_find_mapping(armada_370_xp_msi_domain, | 406 | irq = irq_find_mapping(armada_370_xp_msi_inner_domain, |
431 | msinr - 16); | 407 | msinr - PCI_MSI_DOORBELL_START); |
432 | generic_handle_irq(irq); | 408 | generic_handle_irq(irq); |
433 | } else { | 409 | } else { |
434 | irq = msinr - 16; | 410 | irq = msinr - PCI_MSI_DOORBELL_START; |
435 | handle_domain_irq(armada_370_xp_msi_domain, | 411 | handle_domain_irq(armada_370_xp_msi_inner_domain, |
436 | irq, regs); | 412 | irq, regs); |
437 | } | 413 | } |
438 | } | 414 | } |
@@ -604,8 +580,8 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node, | |||
604 | armada_370_xp_mpic_domain = | 580 | armada_370_xp_mpic_domain = |
605 | irq_domain_add_linear(node, nr_irqs, | 581 | irq_domain_add_linear(node, nr_irqs, |
606 | &armada_370_xp_mpic_irq_ops, NULL); | 582 | &armada_370_xp_mpic_irq_ops, NULL); |
607 | |||
608 | BUG_ON(!armada_370_xp_mpic_domain); | 583 | BUG_ON(!armada_370_xp_mpic_domain); |
584 | armada_370_xp_mpic_domain->bus_token = DOMAIN_BUS_WIRED; | ||
609 | 585 | ||
610 | /* Setup for the boot CPU */ | 586 | /* Setup for the boot CPU */ |
611 | armada_xp_mpic_perf_init(); | 587 | armada_xp_mpic_perf_init(); |
diff --git a/drivers/irqchip/irq-ath79-cpu.c b/drivers/irqchip/irq-ath79-cpu.c new file mode 100644 index 000000000000..befe93c5a51a --- /dev/null +++ b/drivers/irqchip/irq-ath79-cpu.c | |||
@@ -0,0 +1,97 @@ | |||
1 | /* | ||
2 | * Atheros AR71xx/AR724x/AR913x specific interrupt handling | ||
3 | * | ||
4 | * Copyright (C) 2015 Alban Bedel <albeu@free.fr> | ||
5 | * Copyright (C) 2010-2011 Jaiganesh Narayanan <jnarayanan@atheros.com> | ||
6 | * Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org> | ||
7 | * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org> | ||
8 | * | ||
9 | * Parts of this file are based on Atheros' 2.6.15/2.6.31 BSP | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License version 2 as published | ||
13 | * by the Free Software Foundation. | ||
14 | */ | ||
15 | |||
16 | #include <linux/interrupt.h> | ||
17 | #include <linux/irqchip.h> | ||
18 | #include <linux/of.h> | ||
19 | |||
20 | #include <asm/irq_cpu.h> | ||
21 | #include <asm/mach-ath79/ath79.h> | ||
22 | |||
23 | /* | ||
24 | * The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for | ||
25 | * these devices typically allocate coherent DMA memory, however the | ||
26 | * DMA controller may still have some unsynchronized data in the FIFO. | ||
27 | * Issue a flush in the handlers to ensure that the driver sees | ||
28 | * the update. | ||
29 | * | ||
30 | * This array map the interrupt lines to the DDR write buffer channels. | ||
31 | */ | ||
32 | |||
33 | static unsigned irq_wb_chan[8] = { | ||
34 | -1, -1, -1, -1, -1, -1, -1, -1, | ||
35 | }; | ||
36 | |||
37 | asmlinkage void plat_irq_dispatch(void) | ||
38 | { | ||
39 | unsigned long pending; | ||
40 | int irq; | ||
41 | |||
42 | pending = read_c0_status() & read_c0_cause() & ST0_IM; | ||
43 | |||
44 | if (!pending) { | ||
45 | spurious_interrupt(); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | pending >>= CAUSEB_IP; | ||
50 | while (pending) { | ||
51 | irq = fls(pending) - 1; | ||
52 | if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1) | ||
53 | ath79_ddr_wb_flush(irq_wb_chan[irq]); | ||
54 | do_IRQ(MIPS_CPU_IRQ_BASE + irq); | ||
55 | pending &= ~BIT(irq); | ||
56 | } | ||
57 | } | ||
58 | |||
59 | static int __init ar79_cpu_intc_of_init( | ||
60 | struct device_node *node, struct device_node *parent) | ||
61 | { | ||
62 | int err, i, count; | ||
63 | |||
64 | /* Fill the irq_wb_chan table */ | ||
65 | count = of_count_phandle_with_args( | ||
66 | node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells"); | ||
67 | |||
68 | for (i = 0; i < count; i++) { | ||
69 | struct of_phandle_args args; | ||
70 | u32 irq = i; | ||
71 | |||
72 | of_property_read_u32_index( | ||
73 | node, "qca,ddr-wb-channel-interrupts", i, &irq); | ||
74 | if (irq >= ARRAY_SIZE(irq_wb_chan)) | ||
75 | continue; | ||
76 | |||
77 | err = of_parse_phandle_with_args( | ||
78 | node, "qca,ddr-wb-channels", | ||
79 | "#qca,ddr-wb-channel-cells", | ||
80 | i, &args); | ||
81 | if (err) | ||
82 | return err; | ||
83 | |||
84 | irq_wb_chan[irq] = args.args[0]; | ||
85 | } | ||
86 | |||
87 | return mips_cpu_irq_of_init(node, parent); | ||
88 | } | ||
89 | IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc", | ||
90 | ar79_cpu_intc_of_init); | ||
91 | |||
92 | void __init ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3) | ||
93 | { | ||
94 | irq_wb_chan[2] = irq_wb_chan2; | ||
95 | irq_wb_chan[3] = irq_wb_chan3; | ||
96 | mips_cpu_irq_init(); | ||
97 | } | ||
diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c new file mode 100644 index 000000000000..aa7290784636 --- /dev/null +++ b/drivers/irqchip/irq-ath79-misc.c | |||
@@ -0,0 +1,189 @@ | |||
1 | /* | ||
2 | * Atheros AR71xx/AR724x/AR913x MISC interrupt controller | ||
3 | * | ||
4 | * Copyright (C) 2015 Alban Bedel <albeu@free.fr> | ||
5 | * Copyright (C) 2010-2011 Jaiganesh Narayanan <jnarayanan@atheros.com> | ||
6 | * Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org> | ||
7 | * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org> | ||
8 | * | ||
9 | * Parts of this file are based on Atheros' 2.6.15/2.6.31 BSP | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License version 2 as published | ||
13 | * by the Free Software Foundation. | ||
14 | */ | ||
15 | |||
16 | #include <linux/irqchip.h> | ||
17 | #include <linux/irqchip/chained_irq.h> | ||
18 | #include <linux/of_address.h> | ||
19 | #include <linux/of_irq.h> | ||
20 | |||
21 | #define AR71XX_RESET_REG_MISC_INT_STATUS 0 | ||
22 | #define AR71XX_RESET_REG_MISC_INT_ENABLE 4 | ||
23 | |||
24 | #define ATH79_MISC_IRQ_COUNT 32 | ||
25 | |||
26 | static void ath79_misc_irq_handler(struct irq_desc *desc) | ||
27 | { | ||
28 | struct irq_domain *domain = irq_desc_get_handler_data(desc); | ||
29 | struct irq_chip *chip = irq_desc_get_chip(desc); | ||
30 | void __iomem *base = domain->host_data; | ||
31 | u32 pending; | ||
32 | |||
33 | chained_irq_enter(chip, desc); | ||
34 | |||
35 | pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) & | ||
36 | __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
37 | |||
38 | if (!pending) { | ||
39 | spurious_interrupt(); | ||
40 | chained_irq_exit(chip, desc); | ||
41 | return; | ||
42 | } | ||
43 | |||
44 | while (pending) { | ||
45 | int bit = __ffs(pending); | ||
46 | |||
47 | generic_handle_irq(irq_linear_revmap(domain, bit)); | ||
48 | pending &= ~BIT(bit); | ||
49 | } | ||
50 | |||
51 | chained_irq_exit(chip, desc); | ||
52 | } | ||
53 | |||
54 | static void ar71xx_misc_irq_unmask(struct irq_data *d) | ||
55 | { | ||
56 | void __iomem *base = irq_data_get_irq_chip_data(d); | ||
57 | unsigned int irq = d->hwirq; | ||
58 | u32 t; | ||
59 | |||
60 | t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
61 | __raw_writel(t | BIT(irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
62 | |||
63 | /* flush write */ | ||
64 | __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
65 | } | ||
66 | |||
67 | static void ar71xx_misc_irq_mask(struct irq_data *d) | ||
68 | { | ||
69 | void __iomem *base = irq_data_get_irq_chip_data(d); | ||
70 | unsigned int irq = d->hwirq; | ||
71 | u32 t; | ||
72 | |||
73 | t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
74 | __raw_writel(t & ~BIT(irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
75 | |||
76 | /* flush write */ | ||
77 | __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
78 | } | ||
79 | |||
80 | static void ar724x_misc_irq_ack(struct irq_data *d) | ||
81 | { | ||
82 | void __iomem *base = irq_data_get_irq_chip_data(d); | ||
83 | unsigned int irq = d->hwirq; | ||
84 | u32 t; | ||
85 | |||
86 | t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS); | ||
87 | __raw_writel(t & ~BIT(irq), base + AR71XX_RESET_REG_MISC_INT_STATUS); | ||
88 | |||
89 | /* flush write */ | ||
90 | __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS); | ||
91 | } | ||
92 | |||
93 | static struct irq_chip ath79_misc_irq_chip = { | ||
94 | .name = "MISC", | ||
95 | .irq_unmask = ar71xx_misc_irq_unmask, | ||
96 | .irq_mask = ar71xx_misc_irq_mask, | ||
97 | }; | ||
98 | |||
99 | static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) | ||
100 | { | ||
101 | irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq); | ||
102 | irq_set_chip_data(irq, d->host_data); | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | static const struct irq_domain_ops misc_irq_domain_ops = { | ||
107 | .xlate = irq_domain_xlate_onecell, | ||
108 | .map = misc_map, | ||
109 | }; | ||
110 | |||
111 | static void __init ath79_misc_intc_domain_init( | ||
112 | struct irq_domain *domain, int irq) | ||
113 | { | ||
114 | void __iomem *base = domain->host_data; | ||
115 | |||
116 | /* Disable and clear all interrupts */ | ||
117 | __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE); | ||
118 | __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS); | ||
119 | |||
120 | irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain); | ||
121 | } | ||
122 | |||
123 | static int __init ath79_misc_intc_of_init( | ||
124 | struct device_node *node, struct device_node *parent) | ||
125 | { | ||
126 | struct irq_domain *domain; | ||
127 | void __iomem *base; | ||
128 | int irq; | ||
129 | |||
130 | irq = irq_of_parse_and_map(node, 0); | ||
131 | if (!irq) { | ||
132 | pr_err("Failed to get MISC IRQ\n"); | ||
133 | return -EINVAL; | ||
134 | } | ||
135 | |||
136 | base = of_iomap(node, 0); | ||
137 | if (!base) { | ||
138 | pr_err("Failed to get MISC IRQ registers\n"); | ||
139 | return -ENOMEM; | ||
140 | } | ||
141 | |||
142 | domain = irq_domain_add_linear(node, ATH79_MISC_IRQ_COUNT, | ||
143 | &misc_irq_domain_ops, base); | ||
144 | if (!domain) { | ||
145 | pr_err("Failed to add MISC irqdomain\n"); | ||
146 | return -EINVAL; | ||
147 | } | ||
148 | |||
149 | ath79_misc_intc_domain_init(domain, irq); | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | static int __init ar7100_misc_intc_of_init( | ||
154 | struct device_node *node, struct device_node *parent) | ||
155 | { | ||
156 | ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; | ||
157 | return ath79_misc_intc_of_init(node, parent); | ||
158 | } | ||
159 | |||
160 | IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc", | ||
161 | ar7100_misc_intc_of_init); | ||
162 | |||
163 | static int __init ar7240_misc_intc_of_init( | ||
164 | struct device_node *node, struct device_node *parent) | ||
165 | { | ||
166 | ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; | ||
167 | return ath79_misc_intc_of_init(node, parent); | ||
168 | } | ||
169 | |||
170 | IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc", | ||
171 | ar7240_misc_intc_of_init); | ||
172 | |||
173 | void __init ath79_misc_irq_init(void __iomem *regs, int irq, | ||
174 | int irq_base, bool is_ar71xx) | ||
175 | { | ||
176 | struct irq_domain *domain; | ||
177 | |||
178 | if (is_ar71xx) | ||
179 | ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; | ||
180 | else | ||
181 | ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; | ||
182 | |||
183 | domain = irq_domain_add_legacy(NULL, ATH79_MISC_IRQ_COUNT, | ||
184 | irq_base, 0, &misc_irq_domain_ops, regs); | ||
185 | if (!domain) | ||
186 | panic("Failed to create MISC irqdomain"); | ||
187 | |||
188 | ath79_misc_intc_domain_init(domain, irq); | ||
189 | } | ||
diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 37199b9b2cfa..28b26c80f4cf 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c | |||
@@ -80,16 +80,10 @@ int aic_common_set_type(struct irq_data *d, unsigned type, unsigned *val) | |||
80 | return 0; | 80 | return 0; |
81 | } | 81 | } |
82 | 82 | ||
83 | int aic_common_set_priority(int priority, unsigned *val) | 83 | void aic_common_set_priority(int priority, unsigned *val) |
84 | { | 84 | { |
85 | if (priority < AT91_AIC_IRQ_MIN_PRIORITY || | ||
86 | priority > AT91_AIC_IRQ_MAX_PRIORITY) | ||
87 | return -EINVAL; | ||
88 | |||
89 | *val &= ~AT91_AIC_PRIOR; | 85 | *val &= ~AT91_AIC_PRIOR; |
90 | *val |= priority; | 86 | *val |= priority; |
91 | |||
92 | return 0; | ||
93 | } | 87 | } |
94 | 88 | ||
95 | int aic_common_irq_domain_xlate(struct irq_domain *d, | 89 | int aic_common_irq_domain_xlate(struct irq_domain *d, |
@@ -193,7 +187,7 @@ void __init aic_common_rtt_irq_fixup(struct device_node *root) | |||
193 | } | 187 | } |
194 | } | 188 | } |
195 | 189 | ||
196 | void __init aic_common_irq_fixup(const struct of_device_id *matches) | 190 | static void __init aic_common_irq_fixup(const struct of_device_id *matches) |
197 | { | 191 | { |
198 | struct device_node *root = of_find_node_by_path("/"); | 192 | struct device_node *root = of_find_node_by_path("/"); |
199 | const struct of_device_id *match; | 193 | const struct of_device_id *match; |
@@ -214,7 +208,8 @@ void __init aic_common_irq_fixup(const struct of_device_id *matches) | |||
214 | 208 | ||
215 | struct irq_domain *__init aic_common_of_init(struct device_node *node, | 209 | struct irq_domain *__init aic_common_of_init(struct device_node *node, |
216 | const struct irq_domain_ops *ops, | 210 | const struct irq_domain_ops *ops, |
217 | const char *name, int nirqs) | 211 | const char *name, int nirqs, |
212 | const struct of_device_id *matches) | ||
218 | { | 213 | { |
219 | struct irq_chip_generic *gc; | 214 | struct irq_chip_generic *gc; |
220 | struct irq_domain *domain; | 215 | struct irq_domain *domain; |
@@ -264,6 +259,7 @@ struct irq_domain *__init aic_common_of_init(struct device_node *node, | |||
264 | } | 259 | } |
265 | 260 | ||
266 | aic_common_ext_irq_of_init(domain); | 261 | aic_common_ext_irq_of_init(domain); |
262 | aic_common_irq_fixup(matches); | ||
267 | 263 | ||
268 | return domain; | 264 | return domain; |
269 | 265 | ||
diff --git a/drivers/irqchip/irq-atmel-aic-common.h b/drivers/irqchip/irq-atmel-aic-common.h index 603f0a9d5411..af60376d50de 100644 --- a/drivers/irqchip/irq-atmel-aic-common.h +++ b/drivers/irqchip/irq-atmel-aic-common.h | |||
@@ -19,7 +19,7 @@ | |||
19 | 19 | ||
20 | int aic_common_set_type(struct irq_data *d, unsigned type, unsigned *val); | 20 | int aic_common_set_type(struct irq_data *d, unsigned type, unsigned *val); |
21 | 21 | ||
22 | int aic_common_set_priority(int priority, unsigned *val); | 22 | void aic_common_set_priority(int priority, unsigned *val); |
23 | 23 | ||
24 | int aic_common_irq_domain_xlate(struct irq_domain *d, | 24 | int aic_common_irq_domain_xlate(struct irq_domain *d, |
25 | struct device_node *ctrlr, | 25 | struct device_node *ctrlr, |
@@ -30,12 +30,11 @@ int aic_common_irq_domain_xlate(struct irq_domain *d, | |||
30 | 30 | ||
31 | struct irq_domain *__init aic_common_of_init(struct device_node *node, | 31 | struct irq_domain *__init aic_common_of_init(struct device_node *node, |
32 | const struct irq_domain_ops *ops, | 32 | const struct irq_domain_ops *ops, |
33 | const char *name, int nirqs); | 33 | const char *name, int nirqs, |
34 | const struct of_device_id *matches); | ||
34 | 35 | ||
35 | void __init aic_common_rtc_irq_fixup(struct device_node *root); | 36 | void __init aic_common_rtc_irq_fixup(struct device_node *root); |
36 | 37 | ||
37 | void __init aic_common_rtt_irq_fixup(struct device_node *root); | 38 | void __init aic_common_rtt_irq_fixup(struct device_node *root); |
38 | 39 | ||
39 | void __init aic_common_irq_fixup(const struct of_device_id *matches); | ||
40 | |||
41 | #endif /* __IRQ_ATMEL_AIC_COMMON_H */ | 40 | #endif /* __IRQ_ATMEL_AIC_COMMON_H */ |
diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c index 8a0c7f288198..112e17c2768b 100644 --- a/drivers/irqchip/irq-atmel-aic.c +++ b/drivers/irqchip/irq-atmel-aic.c | |||
@@ -196,9 +196,8 @@ static int aic_irq_domain_xlate(struct irq_domain *d, | |||
196 | 196 | ||
197 | irq_gc_lock(gc); | 197 | irq_gc_lock(gc); |
198 | smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq)); | 198 | smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq)); |
199 | ret = aic_common_set_priority(intspec[2], &smr); | 199 | aic_common_set_priority(intspec[2], &smr); |
200 | if (!ret) | 200 | irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq)); |
201 | irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq)); | ||
202 | irq_gc_unlock(gc); | 201 | irq_gc_unlock(gc); |
203 | 202 | ||
204 | return ret; | 203 | return ret; |
@@ -248,12 +247,10 @@ static int __init aic_of_init(struct device_node *node, | |||
248 | return -EEXIST; | 247 | return -EEXIST; |
249 | 248 | ||
250 | domain = aic_common_of_init(node, &aic_irq_ops, "atmel-aic", | 249 | domain = aic_common_of_init(node, &aic_irq_ops, "atmel-aic", |
251 | NR_AIC_IRQS); | 250 | NR_AIC_IRQS, aic_irq_fixups); |
252 | if (IS_ERR(domain)) | 251 | if (IS_ERR(domain)) |
253 | return PTR_ERR(domain); | 252 | return PTR_ERR(domain); |
254 | 253 | ||
255 | aic_common_irq_fixup(aic_irq_fixups); | ||
256 | |||
257 | aic_domain = domain; | 254 | aic_domain = domain; |
258 | gc = irq_get_domain_generic_chip(domain, 0); | 255 | gc = irq_get_domain_generic_chip(domain, 0); |
259 | 256 | ||
diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index 62bb840c613f..4f0d068e1abe 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c | |||
@@ -272,9 +272,8 @@ static int aic5_irq_domain_xlate(struct irq_domain *d, | |||
272 | irq_gc_lock(bgc); | 272 | irq_gc_lock(bgc); |
273 | irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR); | 273 | irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR); |
274 | smr = irq_reg_readl(bgc, AT91_AIC5_SMR); | 274 | smr = irq_reg_readl(bgc, AT91_AIC5_SMR); |
275 | ret = aic_common_set_priority(intspec[2], &smr); | 275 | aic_common_set_priority(intspec[2], &smr); |
276 | if (!ret) | 276 | irq_reg_writel(bgc, smr, AT91_AIC5_SMR); |
277 | irq_reg_writel(bgc, intspec[2] | smr, AT91_AIC5_SMR); | ||
278 | irq_gc_unlock(bgc); | 277 | irq_gc_unlock(bgc); |
279 | 278 | ||
280 | return ret; | 279 | return ret; |
@@ -312,12 +311,10 @@ static int __init aic5_of_init(struct device_node *node, | |||
312 | return -EEXIST; | 311 | return -EEXIST; |
313 | 312 | ||
314 | domain = aic_common_of_init(node, &aic5_irq_ops, "atmel-aic5", | 313 | domain = aic_common_of_init(node, &aic5_irq_ops, "atmel-aic5", |
315 | nirqs); | 314 | nirqs, aic5_irq_fixups); |
316 | if (IS_ERR(domain)) | 315 | if (IS_ERR(domain)) |
317 | return PTR_ERR(domain); | 316 | return PTR_ERR(domain); |
318 | 317 | ||
319 | aic_common_irq_fixup(aic5_irq_fixups); | ||
320 | |||
321 | aic5_domain = domain; | 318 | aic5_domain = domain; |
322 | nchips = aic5_domain->revmap_size / 32; | 319 | nchips = aic5_domain->revmap_size / 32; |
323 | for (i = 0; i < nchips; i++) { | 320 | for (i = 0; i < nchips; i++) { |
diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c index 963065a0d774..b6e950d4782a 100644 --- a/drivers/irqchip/irq-bcm2836.c +++ b/drivers/irqchip/irq-bcm2836.c | |||
@@ -229,7 +229,6 @@ int __init bcm2836_smp_boot_secondary(unsigned int cpu, | |||
229 | unsigned long secondary_startup_phys = | 229 | unsigned long secondary_startup_phys = |
230 | (unsigned long)virt_to_phys((void *)secondary_startup); | 230 | (unsigned long)virt_to_phys((void *)secondary_startup); |
231 | 231 | ||
232 | dsb(); | ||
233 | writel(secondary_startup_phys, | 232 | writel(secondary_startup_phys, |
234 | intc.base + LOCAL_MAILBOX3_SET0 + 16 * cpu); | 233 | intc.base + LOCAL_MAILBOX3_SET0 + 16 * cpu); |
235 | 234 | ||
diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c new file mode 100644 index 000000000000..b844c89a9506 --- /dev/null +++ b/drivers/irqchip/irq-bcm6345-l1.c | |||
@@ -0,0 +1,364 @@ | |||
1 | /* | ||
2 | * Broadcom BCM6345 style Level 1 interrupt controller driver | ||
3 | * | ||
4 | * Copyright (C) 2014 Broadcom Corporation | ||
5 | * Copyright 2015 Simon Arlott | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This is based on the BCM7038 (which supports SMP) but with a single | ||
12 | * enable register instead of separate mask/set/clear registers. | ||
13 | * | ||
14 | * The BCM3380 has a similar mask/status register layout, but each pair | ||
15 | * of words is at separate locations (and SMP is not supported). | ||
16 | * | ||
17 | * ENABLE/STATUS words are packed next to each other for each CPU: | ||
18 | * | ||
19 | * BCM6368: | ||
20 | * 0x1000_0020: CPU0_W0_ENABLE | ||
21 | * 0x1000_0024: CPU0_W1_ENABLE | ||
22 | * 0x1000_0028: CPU0_W0_STATUS IRQs 31-63 | ||
23 | * 0x1000_002c: CPU0_W1_STATUS IRQs 0-31 | ||
24 | * 0x1000_0030: CPU1_W0_ENABLE | ||
25 | * 0x1000_0034: CPU1_W1_ENABLE | ||
26 | * 0x1000_0038: CPU1_W0_STATUS IRQs 31-63 | ||
27 | * 0x1000_003c: CPU1_W1_STATUS IRQs 0-31 | ||
28 | * | ||
29 | * BCM63168: | ||
30 | * 0x1000_0020: CPU0_W0_ENABLE | ||
31 | * 0x1000_0024: CPU0_W1_ENABLE | ||
32 | * 0x1000_0028: CPU0_W2_ENABLE | ||
33 | * 0x1000_002c: CPU0_W3_ENABLE | ||
34 | * 0x1000_0030: CPU0_W0_STATUS IRQs 96-127 | ||
35 | * 0x1000_0034: CPU0_W1_STATUS IRQs 64-95 | ||
36 | * 0x1000_0038: CPU0_W2_STATUS IRQs 32-63 | ||
37 | * 0x1000_003c: CPU0_W3_STATUS IRQs 0-31 | ||
38 | * 0x1000_0040: CPU1_W0_ENABLE | ||
39 | * 0x1000_0044: CPU1_W1_ENABLE | ||
40 | * 0x1000_0048: CPU1_W2_ENABLE | ||
41 | * 0x1000_004c: CPU1_W3_ENABLE | ||
42 | * 0x1000_0050: CPU1_W0_STATUS IRQs 96-127 | ||
43 | * 0x1000_0054: CPU1_W1_STATUS IRQs 64-95 | ||
44 | * 0x1000_0058: CPU1_W2_STATUS IRQs 32-63 | ||
45 | * 0x1000_005c: CPU1_W3_STATUS IRQs 0-31 | ||
46 | * | ||
47 | * IRQs are numbered in CPU native endian order | ||
48 | * (which is big-endian in these examples) | ||
49 | */ | ||
50 | |||
51 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
52 | |||
53 | #include <linux/bitops.h> | ||
54 | #include <linux/cpumask.h> | ||
55 | #include <linux/kconfig.h> | ||
56 | #include <linux/kernel.h> | ||
57 | #include <linux/init.h> | ||
58 | #include <linux/interrupt.h> | ||
59 | #include <linux/io.h> | ||
60 | #include <linux/ioport.h> | ||
61 | #include <linux/irq.h> | ||
62 | #include <linux/irqdomain.h> | ||
63 | #include <linux/module.h> | ||
64 | #include <linux/of.h> | ||
65 | #include <linux/of_irq.h> | ||
66 | #include <linux/of_address.h> | ||
67 | #include <linux/of_platform.h> | ||
68 | #include <linux/platform_device.h> | ||
69 | #include <linux/slab.h> | ||
70 | #include <linux/smp.h> | ||
71 | #include <linux/types.h> | ||
72 | #include <linux/irqchip.h> | ||
73 | #include <linux/irqchip/chained_irq.h> | ||
74 | |||
75 | #define IRQS_PER_WORD 32 | ||
76 | #define REG_BYTES_PER_IRQ_WORD (sizeof(u32) * 2) | ||
77 | |||
78 | struct bcm6345_l1_cpu; | ||
79 | |||
80 | struct bcm6345_l1_chip { | ||
81 | raw_spinlock_t lock; | ||
82 | unsigned int n_words; | ||
83 | struct irq_domain *domain; | ||
84 | struct cpumask cpumask; | ||
85 | struct bcm6345_l1_cpu *cpus[NR_CPUS]; | ||
86 | }; | ||
87 | |||
88 | struct bcm6345_l1_cpu { | ||
89 | void __iomem *map_base; | ||
90 | unsigned int parent_irq; | ||
91 | u32 enable_cache[]; | ||
92 | }; | ||
93 | |||
94 | static inline unsigned int reg_enable(struct bcm6345_l1_chip *intc, | ||
95 | unsigned int word) | ||
96 | { | ||
97 | #ifdef __BIG_ENDIAN | ||
98 | return (1 * intc->n_words - word - 1) * sizeof(u32); | ||
99 | #else | ||
100 | return (0 * intc->n_words + word) * sizeof(u32); | ||
101 | #endif | ||
102 | } | ||
103 | |||
104 | static inline unsigned int reg_status(struct bcm6345_l1_chip *intc, | ||
105 | unsigned int word) | ||
106 | { | ||
107 | #ifdef __BIG_ENDIAN | ||
108 | return (2 * intc->n_words - word - 1) * sizeof(u32); | ||
109 | #else | ||
110 | return (1 * intc->n_words + word) * sizeof(u32); | ||
111 | #endif | ||
112 | } | ||
113 | |||
114 | static inline unsigned int cpu_for_irq(struct bcm6345_l1_chip *intc, | ||
115 | struct irq_data *d) | ||
116 | { | ||
117 | return cpumask_first_and(&intc->cpumask, irq_data_get_affinity_mask(d)); | ||
118 | } | ||
119 | |||
120 | static void bcm6345_l1_irq_handle(struct irq_desc *desc) | ||
121 | { | ||
122 | struct bcm6345_l1_chip *intc = irq_desc_get_handler_data(desc); | ||
123 | struct bcm6345_l1_cpu *cpu; | ||
124 | struct irq_chip *chip = irq_desc_get_chip(desc); | ||
125 | unsigned int idx; | ||
126 | |||
127 | #ifdef CONFIG_SMP | ||
128 | cpu = intc->cpus[cpu_logical_map(smp_processor_id())]; | ||
129 | #else | ||
130 | cpu = intc->cpus[0]; | ||
131 | #endif | ||
132 | |||
133 | chained_irq_enter(chip, desc); | ||
134 | |||
135 | for (idx = 0; idx < intc->n_words; idx++) { | ||
136 | int base = idx * IRQS_PER_WORD; | ||
137 | unsigned long pending; | ||
138 | irq_hw_number_t hwirq; | ||
139 | unsigned int irq; | ||
140 | |||
141 | pending = __raw_readl(cpu->map_base + reg_status(intc, idx)); | ||
142 | pending &= __raw_readl(cpu->map_base + reg_enable(intc, idx)); | ||
143 | |||
144 | for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) { | ||
145 | irq = irq_linear_revmap(intc->domain, base + hwirq); | ||
146 | if (irq) | ||
147 | do_IRQ(irq); | ||
148 | else | ||
149 | spurious_interrupt(); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | chained_irq_exit(chip, desc); | ||
154 | } | ||
155 | |||
156 | static inline void __bcm6345_l1_unmask(struct irq_data *d) | ||
157 | { | ||
158 | struct bcm6345_l1_chip *intc = irq_data_get_irq_chip_data(d); | ||
159 | u32 word = d->hwirq / IRQS_PER_WORD; | ||
160 | u32 mask = BIT(d->hwirq % IRQS_PER_WORD); | ||
161 | unsigned int cpu_idx = cpu_for_irq(intc, d); | ||
162 | |||
163 | intc->cpus[cpu_idx]->enable_cache[word] |= mask; | ||
164 | __raw_writel(intc->cpus[cpu_idx]->enable_cache[word], | ||
165 | intc->cpus[cpu_idx]->map_base + reg_enable(intc, word)); | ||
166 | } | ||
167 | |||
168 | static inline void __bcm6345_l1_mask(struct irq_data *d) | ||
169 | { | ||
170 | struct bcm6345_l1_chip *intc = irq_data_get_irq_chip_data(d); | ||
171 | u32 word = d->hwirq / IRQS_PER_WORD; | ||
172 | u32 mask = BIT(d->hwirq % IRQS_PER_WORD); | ||
173 | unsigned int cpu_idx = cpu_for_irq(intc, d); | ||
174 | |||
175 | intc->cpus[cpu_idx]->enable_cache[word] &= ~mask; | ||
176 | __raw_writel(intc->cpus[cpu_idx]->enable_cache[word], | ||
177 | intc->cpus[cpu_idx]->map_base + reg_enable(intc, word)); | ||
178 | } | ||
179 | |||
180 | static void bcm6345_l1_unmask(struct irq_data *d) | ||
181 | { | ||
182 | struct bcm6345_l1_chip *intc = irq_data_get_irq_chip_data(d); | ||
183 | unsigned long flags; | ||
184 | |||
185 | raw_spin_lock_irqsave(&intc->lock, flags); | ||
186 | __bcm6345_l1_unmask(d); | ||
187 | raw_spin_unlock_irqrestore(&intc->lock, flags); | ||
188 | } | ||
189 | |||
190 | static void bcm6345_l1_mask(struct irq_data *d) | ||
191 | { | ||
192 | struct bcm6345_l1_chip *intc = irq_data_get_irq_chip_data(d); | ||
193 | unsigned long flags; | ||
194 | |||
195 | raw_spin_lock_irqsave(&intc->lock, flags); | ||
196 | __bcm6345_l1_mask(d); | ||
197 | raw_spin_unlock_irqrestore(&intc->lock, flags); | ||
198 | } | ||
199 | |||
200 | static int bcm6345_l1_set_affinity(struct irq_data *d, | ||
201 | const struct cpumask *dest, | ||
202 | bool force) | ||
203 | { | ||
204 | struct bcm6345_l1_chip *intc = irq_data_get_irq_chip_data(d); | ||
205 | u32 word = d->hwirq / IRQS_PER_WORD; | ||
206 | u32 mask = BIT(d->hwirq % IRQS_PER_WORD); | ||
207 | unsigned int old_cpu = cpu_for_irq(intc, d); | ||
208 | unsigned int new_cpu; | ||
209 | struct cpumask valid; | ||
210 | unsigned long flags; | ||
211 | bool enabled; | ||
212 | |||
213 | if (!cpumask_and(&valid, &intc->cpumask, dest)) | ||
214 | return -EINVAL; | ||
215 | |||
216 | new_cpu = cpumask_any_and(&valid, cpu_online_mask); | ||
217 | if (new_cpu >= nr_cpu_ids) | ||
218 | return -EINVAL; | ||
219 | |||
220 | dest = cpumask_of(new_cpu); | ||
221 | |||
222 | raw_spin_lock_irqsave(&intc->lock, flags); | ||
223 | if (old_cpu != new_cpu) { | ||
224 | enabled = intc->cpus[old_cpu]->enable_cache[word] & mask; | ||
225 | if (enabled) | ||
226 | __bcm6345_l1_mask(d); | ||
227 | cpumask_copy(irq_data_get_affinity_mask(d), dest); | ||
228 | if (enabled) | ||
229 | __bcm6345_l1_unmask(d); | ||
230 | } else { | ||
231 | cpumask_copy(irq_data_get_affinity_mask(d), dest); | ||
232 | } | ||
233 | raw_spin_unlock_irqrestore(&intc->lock, flags); | ||
234 | |||
235 | return IRQ_SET_MASK_OK_NOCOPY; | ||
236 | } | ||
237 | |||
238 | static int __init bcm6345_l1_init_one(struct device_node *dn, | ||
239 | unsigned int idx, | ||
240 | struct bcm6345_l1_chip *intc) | ||
241 | { | ||
242 | struct resource res; | ||
243 | resource_size_t sz; | ||
244 | struct bcm6345_l1_cpu *cpu; | ||
245 | unsigned int i, n_words; | ||
246 | |||
247 | if (of_address_to_resource(dn, idx, &res)) | ||
248 | return -EINVAL; | ||
249 | sz = resource_size(&res); | ||
250 | n_words = sz / REG_BYTES_PER_IRQ_WORD; | ||
251 | |||
252 | if (!intc->n_words) | ||
253 | intc->n_words = n_words; | ||
254 | else if (intc->n_words != n_words) | ||
255 | return -EINVAL; | ||
256 | |||
257 | cpu = intc->cpus[idx] = kzalloc(sizeof(*cpu) + n_words * sizeof(u32), | ||
258 | GFP_KERNEL); | ||
259 | if (!cpu) | ||
260 | return -ENOMEM; | ||
261 | |||
262 | cpu->map_base = ioremap(res.start, sz); | ||
263 | if (!cpu->map_base) | ||
264 | return -ENOMEM; | ||
265 | |||
266 | for (i = 0; i < n_words; i++) { | ||
267 | cpu->enable_cache[i] = 0; | ||
268 | __raw_writel(0, cpu->map_base + reg_enable(intc, i)); | ||
269 | } | ||
270 | |||
271 | cpu->parent_irq = irq_of_parse_and_map(dn, idx); | ||
272 | if (!cpu->parent_irq) { | ||
273 | pr_err("failed to map parent interrupt %d\n", cpu->parent_irq); | ||
274 | return -EINVAL; | ||
275 | } | ||
276 | irq_set_chained_handler_and_data(cpu->parent_irq, | ||
277 | bcm6345_l1_irq_handle, intc); | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | static struct irq_chip bcm6345_l1_irq_chip = { | ||
283 | .name = "bcm6345-l1", | ||
284 | .irq_mask = bcm6345_l1_mask, | ||
285 | .irq_unmask = bcm6345_l1_unmask, | ||
286 | .irq_set_affinity = bcm6345_l1_set_affinity, | ||
287 | }; | ||
288 | |||
289 | static int bcm6345_l1_map(struct irq_domain *d, unsigned int virq, | ||
290 | irq_hw_number_t hw_irq) | ||
291 | { | ||
292 | irq_set_chip_and_handler(virq, | ||
293 | &bcm6345_l1_irq_chip, handle_percpu_irq); | ||
294 | irq_set_chip_data(virq, d->host_data); | ||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | static const struct irq_domain_ops bcm6345_l1_domain_ops = { | ||
299 | .xlate = irq_domain_xlate_onecell, | ||
300 | .map = bcm6345_l1_map, | ||
301 | }; | ||
302 | |||
303 | static int __init bcm6345_l1_of_init(struct device_node *dn, | ||
304 | struct device_node *parent) | ||
305 | { | ||
306 | struct bcm6345_l1_chip *intc; | ||
307 | unsigned int idx; | ||
308 | int ret; | ||
309 | |||
310 | intc = kzalloc(sizeof(*intc), GFP_KERNEL); | ||
311 | if (!intc) | ||
312 | return -ENOMEM; | ||
313 | |||
314 | for_each_possible_cpu(idx) { | ||
315 | ret = bcm6345_l1_init_one(dn, idx, intc); | ||
316 | if (ret) | ||
317 | pr_err("failed to init intc L1 for cpu %d: %d\n", | ||
318 | idx, ret); | ||
319 | else | ||
320 | cpumask_set_cpu(idx, &intc->cpumask); | ||
321 | } | ||
322 | |||
323 | if (!cpumask_weight(&intc->cpumask)) { | ||
324 | ret = -ENODEV; | ||
325 | goto out_free; | ||
326 | } | ||
327 | |||
328 | raw_spin_lock_init(&intc->lock); | ||
329 | |||
330 | intc->domain = irq_domain_add_linear(dn, IRQS_PER_WORD * intc->n_words, | ||
331 | &bcm6345_l1_domain_ops, | ||
332 | intc); | ||
333 | if (!intc->domain) { | ||
334 | ret = -ENOMEM; | ||
335 | goto out_unmap; | ||
336 | } | ||
337 | |||
338 | pr_info("registered BCM6345 L1 intc (IRQs: %d)\n", | ||
339 | IRQS_PER_WORD * intc->n_words); | ||
340 | for_each_cpu(idx, &intc->cpumask) { | ||
341 | struct bcm6345_l1_cpu *cpu = intc->cpus[idx]; | ||
342 | |||
343 | pr_info(" CPU%u at MMIO 0x%p (irq = %d)\n", idx, | ||
344 | cpu->map_base, cpu->parent_irq); | ||
345 | } | ||
346 | |||
347 | return 0; | ||
348 | |||
349 | out_unmap: | ||
350 | for_each_possible_cpu(idx) { | ||
351 | struct bcm6345_l1_cpu *cpu = intc->cpus[idx]; | ||
352 | |||
353 | if (cpu) { | ||
354 | if (cpu->map_base) | ||
355 | iounmap(cpu->map_base); | ||
356 | kfree(cpu); | ||
357 | } | ||
358 | } | ||
359 | out_free: | ||
360 | kfree(intc); | ||
361 | return ret; | ||
362 | } | ||
363 | |||
364 | IRQCHIP_DECLARE(bcm6345_l1, "brcm,bcm6345-l1-intc", bcm6345_l1_of_init); | ||
diff --git a/drivers/irqchip/irq-gic-realview.c b/drivers/irqchip/irq-gic-realview.c index aa46eb280a7f..54c296401525 100644 --- a/drivers/irqchip/irq-gic-realview.c +++ b/drivers/irqchip/irq-gic-realview.c | |||
@@ -10,7 +10,8 @@ | |||
10 | #include <linux/irqchip/arm-gic.h> | 10 | #include <linux/irqchip/arm-gic.h> |
11 | 11 | ||
12 | #define REALVIEW_SYS_LOCK_OFFSET 0x20 | 12 | #define REALVIEW_SYS_LOCK_OFFSET 0x20 |
13 | #define REALVIEW_PB11MP_SYS_PLD_CTRL1 0x74 | 13 | #define REALVIEW_SYS_PLD_CTRL1 0x74 |
14 | #define REALVIEW_EB_REVB_SYS_PLD_CTRL1 0xD8 | ||
14 | #define VERSATILE_LOCK_VAL 0xA05F | 15 | #define VERSATILE_LOCK_VAL 0xA05F |
15 | #define PLD_INTMODE_MASK BIT(22)|BIT(23)|BIT(24) | 16 | #define PLD_INTMODE_MASK BIT(22)|BIT(23)|BIT(24) |
16 | #define PLD_INTMODE_LEGACY 0x0 | 17 | #define PLD_INTMODE_LEGACY 0x0 |
@@ -18,26 +19,57 @@ | |||
18 | #define PLD_INTMODE_NEW_NO_DCC BIT(23) | 19 | #define PLD_INTMODE_NEW_NO_DCC BIT(23) |
19 | #define PLD_INTMODE_FIQ_ENABLE BIT(24) | 20 | #define PLD_INTMODE_FIQ_ENABLE BIT(24) |
20 | 21 | ||
22 | /* For some reason RealView EB Rev B moved this register */ | ||
23 | static const struct of_device_id syscon_pldset_of_match[] = { | ||
24 | { | ||
25 | .compatible = "arm,realview-eb11mp-revb-syscon", | ||
26 | .data = (void *)REALVIEW_EB_REVB_SYS_PLD_CTRL1, | ||
27 | }, | ||
28 | { | ||
29 | .compatible = "arm,realview-eb11mp-revc-syscon", | ||
30 | .data = (void *)REALVIEW_SYS_PLD_CTRL1, | ||
31 | }, | ||
32 | { | ||
33 | .compatible = "arm,realview-eb-syscon", | ||
34 | .data = (void *)REALVIEW_SYS_PLD_CTRL1, | ||
35 | }, | ||
36 | { | ||
37 | .compatible = "arm,realview-pb11mp-syscon", | ||
38 | .data = (void *)REALVIEW_SYS_PLD_CTRL1, | ||
39 | }, | ||
40 | {}, | ||
41 | }; | ||
42 | |||
21 | static int __init | 43 | static int __init |
22 | realview_gic_of_init(struct device_node *node, struct device_node *parent) | 44 | realview_gic_of_init(struct device_node *node, struct device_node *parent) |
23 | { | 45 | { |
24 | static struct regmap *map; | 46 | static struct regmap *map; |
47 | struct device_node *np; | ||
48 | const struct of_device_id *gic_id; | ||
49 | u32 pld1_ctrl; | ||
50 | |||
51 | np = of_find_matching_node_and_match(NULL, syscon_pldset_of_match, | ||
52 | &gic_id); | ||
53 | if (!np) | ||
54 | return -ENODEV; | ||
55 | pld1_ctrl = (u32)gic_id->data; | ||
25 | 56 | ||
26 | /* The PB11MPCore GIC needs to be configured in the syscon */ | 57 | /* The PB11MPCore GIC needs to be configured in the syscon */ |
27 | map = syscon_regmap_lookup_by_compatible("arm,realview-pb11mp-syscon"); | 58 | map = syscon_node_to_regmap(np); |
28 | if (!IS_ERR(map)) { | 59 | if (!IS_ERR(map)) { |
29 | /* new irq mode with no DCC */ | 60 | /* new irq mode with no DCC */ |
30 | regmap_write(map, REALVIEW_SYS_LOCK_OFFSET, | 61 | regmap_write(map, REALVIEW_SYS_LOCK_OFFSET, |
31 | VERSATILE_LOCK_VAL); | 62 | VERSATILE_LOCK_VAL); |
32 | regmap_update_bits(map, REALVIEW_PB11MP_SYS_PLD_CTRL1, | 63 | regmap_update_bits(map, pld1_ctrl, |
33 | PLD_INTMODE_NEW_NO_DCC, | 64 | PLD_INTMODE_NEW_NO_DCC, |
34 | PLD_INTMODE_MASK); | 65 | PLD_INTMODE_MASK); |
35 | regmap_write(map, REALVIEW_SYS_LOCK_OFFSET, 0x0000); | 66 | regmap_write(map, REALVIEW_SYS_LOCK_OFFSET, 0x0000); |
36 | pr_info("TC11MP GIC: set up interrupt controller to NEW mode, no DCC\n"); | 67 | pr_info("RealView GIC: set up interrupt controller to NEW mode, no DCC\n"); |
37 | } else { | 68 | } else { |
38 | pr_err("TC11MP GIC setup: could not find syscon\n"); | 69 | pr_err("RealView GIC setup: could not find syscon\n"); |
39 | return -ENXIO; | 70 | return -ENODEV; |
40 | } | 71 | } |
41 | return gic_of_init(node, parent); | 72 | return gic_of_init(node, parent); |
42 | } | 73 | } |
43 | IRQCHIP_DECLARE(armtc11mp_gic, "arm,tc11mp-gic", realview_gic_of_init); | 74 | IRQCHIP_DECLARE(armtc11mp_gic, "arm,tc11mp-gic", realview_gic_of_init); |
75 | IRQCHIP_DECLARE(armeb11mp_gic, "arm,eb11mp-gic", realview_gic_of_init); | ||
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index c779f83e511d..28f047c61baa 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c | |||
@@ -92,18 +92,6 @@ static struct msi_domain_info gicv2m_msi_domain_info = { | |||
92 | .chip = &gicv2m_msi_irq_chip, | 92 | .chip = &gicv2m_msi_irq_chip, |
93 | }; | 93 | }; |
94 | 94 | ||
95 | static int gicv2m_set_affinity(struct irq_data *irq_data, | ||
96 | const struct cpumask *mask, bool force) | ||
97 | { | ||
98 | int ret; | ||
99 | |||
100 | ret = irq_chip_set_affinity_parent(irq_data, mask, force); | ||
101 | if (ret == IRQ_SET_MASK_OK) | ||
102 | ret = IRQ_SET_MASK_OK_DONE; | ||
103 | |||
104 | return ret; | ||
105 | } | ||
106 | |||
107 | static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) | 95 | static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) |
108 | { | 96 | { |
109 | struct v2m_data *v2m = irq_data_get_irq_chip_data(data); | 97 | struct v2m_data *v2m = irq_data_get_irq_chip_data(data); |
@@ -122,7 +110,7 @@ static struct irq_chip gicv2m_irq_chip = { | |||
122 | .irq_mask = irq_chip_mask_parent, | 110 | .irq_mask = irq_chip_mask_parent, |
123 | .irq_unmask = irq_chip_unmask_parent, | 111 | .irq_unmask = irq_chip_unmask_parent, |
124 | .irq_eoi = irq_chip_eoi_parent, | 112 | .irq_eoi = irq_chip_eoi_parent, |
125 | .irq_set_affinity = gicv2m_set_affinity, | 113 | .irq_set_affinity = irq_chip_set_affinity_parent, |
126 | .irq_compose_msi_msg = gicv2m_compose_msi_msg, | 114 | .irq_compose_msi_msg = gicv2m_compose_msi_msg, |
127 | }; | 115 | }; |
128 | 116 | ||
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 43dfd15c1dd2..39261798c59f 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c | |||
@@ -103,7 +103,6 @@ struct its_device { | |||
103 | 103 | ||
104 | static LIST_HEAD(its_nodes); | 104 | static LIST_HEAD(its_nodes); |
105 | static DEFINE_SPINLOCK(its_lock); | 105 | static DEFINE_SPINLOCK(its_lock); |
106 | static struct device_node *gic_root_node; | ||
107 | static struct rdists *gic_rdists; | 106 | static struct rdists *gic_rdists; |
108 | 107 | ||
109 | #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) | 108 | #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) |
@@ -671,7 +670,7 @@ static int its_chunk_to_lpi(int chunk) | |||
671 | return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192; | 670 | return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192; |
672 | } | 671 | } |
673 | 672 | ||
674 | static int its_lpi_init(u32 id_bits) | 673 | static int __init its_lpi_init(u32 id_bits) |
675 | { | 674 | { |
676 | lpi_chunks = its_lpi_to_chunk(1UL << id_bits); | 675 | lpi_chunks = its_lpi_to_chunk(1UL << id_bits); |
677 | 676 | ||
@@ -1430,7 +1429,8 @@ static void its_enable_quirks(struct its_node *its) | |||
1430 | gic_enable_quirks(iidr, its_quirks, its); | 1429 | gic_enable_quirks(iidr, its_quirks, its); |
1431 | } | 1430 | } |
1432 | 1431 | ||
1433 | static int its_probe(struct device_node *node, struct irq_domain *parent) | 1432 | static int __init its_probe(struct device_node *node, |
1433 | struct irq_domain *parent) | ||
1434 | { | 1434 | { |
1435 | struct resource res; | 1435 | struct resource res; |
1436 | struct its_node *its; | 1436 | struct its_node *its; |
@@ -1591,7 +1591,7 @@ static struct of_device_id its_device_id[] = { | |||
1591 | {}, | 1591 | {}, |
1592 | }; | 1592 | }; |
1593 | 1593 | ||
1594 | int its_init(struct device_node *node, struct rdists *rdists, | 1594 | int __init its_init(struct device_node *node, struct rdists *rdists, |
1595 | struct irq_domain *parent_domain) | 1595 | struct irq_domain *parent_domain) |
1596 | { | 1596 | { |
1597 | struct device_node *np; | 1597 | struct device_node *np; |
@@ -1607,8 +1607,6 @@ int its_init(struct device_node *node, struct rdists *rdists, | |||
1607 | } | 1607 | } |
1608 | 1608 | ||
1609 | gic_rdists = rdists; | 1609 | gic_rdists = rdists; |
1610 | gic_root_node = node; | ||
1611 | |||
1612 | its_alloc_lpi_tables(); | 1610 | its_alloc_lpi_tables(); |
1613 | its_lpi_init(rdists->id_bits); | 1611 | its_lpi_init(rdists->id_bits); |
1614 | 1612 | ||
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index d7be6ddc34f6..5b7d3c2129d8 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c | |||
@@ -15,10 +15,12 @@ | |||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | 15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/acpi.h> | ||
18 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
19 | #include <linux/cpu_pm.h> | 20 | #include <linux/cpu_pm.h> |
20 | #include <linux/delay.h> | 21 | #include <linux/delay.h> |
21 | #include <linux/interrupt.h> | 22 | #include <linux/interrupt.h> |
23 | #include <linux/irqdomain.h> | ||
22 | #include <linux/of.h> | 24 | #include <linux/of.h> |
23 | #include <linux/of_address.h> | 25 | #include <linux/of_address.h> |
24 | #include <linux/of_irq.h> | 26 | #include <linux/of_irq.h> |
@@ -38,6 +40,7 @@ | |||
38 | struct redist_region { | 40 | struct redist_region { |
39 | void __iomem *redist_base; | 41 | void __iomem *redist_base; |
40 | phys_addr_t phys_base; | 42 | phys_addr_t phys_base; |
43 | bool single_redist; | ||
41 | }; | 44 | }; |
42 | 45 | ||
43 | struct gic_chip_data { | 46 | struct gic_chip_data { |
@@ -434,6 +437,9 @@ static int gic_populate_rdist(void) | |||
434 | return 0; | 437 | return 0; |
435 | } | 438 | } |
436 | 439 | ||
440 | if (gic_data.redist_regions[i].single_redist) | ||
441 | break; | ||
442 | |||
437 | if (gic_data.redist_stride) { | 443 | if (gic_data.redist_stride) { |
438 | ptr += gic_data.redist_stride; | 444 | ptr += gic_data.redist_stride; |
439 | } else { | 445 | } else { |
@@ -634,7 +640,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, | |||
634 | else | 640 | else |
635 | gic_dist_wait_for_rwp(); | 641 | gic_dist_wait_for_rwp(); |
636 | 642 | ||
637 | return IRQ_SET_MASK_OK; | 643 | return IRQ_SET_MASK_OK_DONE; |
638 | } | 644 | } |
639 | #else | 645 | #else |
640 | #define gic_set_affinity NULL | 646 | #define gic_set_affinity NULL |
@@ -764,6 +770,15 @@ static int gic_irq_domain_translate(struct irq_domain *d, | |||
764 | return 0; | 770 | return 0; |
765 | } | 771 | } |
766 | 772 | ||
773 | if (is_fwnode_irqchip(fwspec->fwnode)) { | ||
774 | if(fwspec->param_count != 2) | ||
775 | return -EINVAL; | ||
776 | |||
777 | *hwirq = fwspec->param[0]; | ||
778 | *type = fwspec->param[1]; | ||
779 | return 0; | ||
780 | } | ||
781 | |||
767 | return -EINVAL; | 782 | return -EINVAL; |
768 | } | 783 | } |
769 | 784 | ||
@@ -811,17 +826,88 @@ static void gicv3_enable_quirks(void) | |||
811 | #endif | 826 | #endif |
812 | } | 827 | } |
813 | 828 | ||
829 | static int __init gic_init_bases(void __iomem *dist_base, | ||
830 | struct redist_region *rdist_regs, | ||
831 | u32 nr_redist_regions, | ||
832 | u64 redist_stride, | ||
833 | struct fwnode_handle *handle) | ||
834 | { | ||
835 | struct device_node *node; | ||
836 | u32 typer; | ||
837 | int gic_irqs; | ||
838 | int err; | ||
839 | |||
840 | if (!is_hyp_mode_available()) | ||
841 | static_key_slow_dec(&supports_deactivate); | ||
842 | |||
843 | if (static_key_true(&supports_deactivate)) | ||
844 | pr_info("GIC: Using split EOI/Deactivate mode\n"); | ||
845 | |||
846 | gic_data.dist_base = dist_base; | ||
847 | gic_data.redist_regions = rdist_regs; | ||
848 | gic_data.nr_redist_regions = nr_redist_regions; | ||
849 | gic_data.redist_stride = redist_stride; | ||
850 | |||
851 | gicv3_enable_quirks(); | ||
852 | |||
853 | /* | ||
854 | * Find out how many interrupts are supported. | ||
855 | * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI) | ||
856 | */ | ||
857 | typer = readl_relaxed(gic_data.dist_base + GICD_TYPER); | ||
858 | gic_data.rdists.id_bits = GICD_TYPER_ID_BITS(typer); | ||
859 | gic_irqs = GICD_TYPER_IRQS(typer); | ||
860 | if (gic_irqs > 1020) | ||
861 | gic_irqs = 1020; | ||
862 | gic_data.irq_nr = gic_irqs; | ||
863 | |||
864 | gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops, | ||
865 | &gic_data); | ||
866 | gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist)); | ||
867 | |||
868 | if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) { | ||
869 | err = -ENOMEM; | ||
870 | goto out_free; | ||
871 | } | ||
872 | |||
873 | set_handle_irq(gic_handle_irq); | ||
874 | |||
875 | node = to_of_node(handle); | ||
876 | if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis() && | ||
877 | node) /* Temp hack to prevent ITS init for ACPI */ | ||
878 | its_init(node, &gic_data.rdists, gic_data.domain); | ||
879 | |||
880 | gic_smp_init(); | ||
881 | gic_dist_init(); | ||
882 | gic_cpu_init(); | ||
883 | gic_cpu_pm_init(); | ||
884 | |||
885 | return 0; | ||
886 | |||
887 | out_free: | ||
888 | if (gic_data.domain) | ||
889 | irq_domain_remove(gic_data.domain); | ||
890 | free_percpu(gic_data.rdists.rdist); | ||
891 | return err; | ||
892 | } | ||
893 | |||
894 | static int __init gic_validate_dist_version(void __iomem *dist_base) | ||
895 | { | ||
896 | u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; | ||
897 | |||
898 | if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4) | ||
899 | return -ENODEV; | ||
900 | |||
901 | return 0; | ||
902 | } | ||
903 | |||
814 | static int __init gic_of_init(struct device_node *node, struct device_node *parent) | 904 | static int __init gic_of_init(struct device_node *node, struct device_node *parent) |
815 | { | 905 | { |
816 | void __iomem *dist_base; | 906 | void __iomem *dist_base; |
817 | struct redist_region *rdist_regs; | 907 | struct redist_region *rdist_regs; |
818 | u64 redist_stride; | 908 | u64 redist_stride; |
819 | u32 nr_redist_regions; | 909 | u32 nr_redist_regions; |
820 | u32 typer; | 910 | int err, i; |
821 | u32 reg; | ||
822 | int gic_irqs; | ||
823 | int err; | ||
824 | int i; | ||
825 | 911 | ||
826 | dist_base = of_iomap(node, 0); | 912 | dist_base = of_iomap(node, 0); |
827 | if (!dist_base) { | 913 | if (!dist_base) { |
@@ -830,11 +916,10 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare | |||
830 | return -ENXIO; | 916 | return -ENXIO; |
831 | } | 917 | } |
832 | 918 | ||
833 | reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; | 919 | err = gic_validate_dist_version(dist_base); |
834 | if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4) { | 920 | if (err) { |
835 | pr_err("%s: no distributor detected, giving up\n", | 921 | pr_err("%s: no distributor detected, giving up\n", |
836 | node->full_name); | 922 | node->full_name); |
837 | err = -ENODEV; | ||
838 | goto out_unmap_dist; | 923 | goto out_unmap_dist; |
839 | } | 924 | } |
840 | 925 | ||
@@ -865,63 +950,229 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare | |||
865 | if (of_property_read_u64(node, "redistributor-stride", &redist_stride)) | 950 | if (of_property_read_u64(node, "redistributor-stride", &redist_stride)) |
866 | redist_stride = 0; | 951 | redist_stride = 0; |
867 | 952 | ||
868 | if (!is_hyp_mode_available()) | 953 | err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions, |
869 | static_key_slow_dec(&supports_deactivate); | 954 | redist_stride, &node->fwnode); |
955 | if (!err) | ||
956 | return 0; | ||
870 | 957 | ||
871 | if (static_key_true(&supports_deactivate)) | 958 | out_unmap_rdist: |
872 | pr_info("GIC: Using split EOI/Deactivate mode\n"); | 959 | for (i = 0; i < nr_redist_regions; i++) |
960 | if (rdist_regs[i].redist_base) | ||
961 | iounmap(rdist_regs[i].redist_base); | ||
962 | kfree(rdist_regs); | ||
963 | out_unmap_dist: | ||
964 | iounmap(dist_base); | ||
965 | return err; | ||
966 | } | ||
873 | 967 | ||
874 | gic_data.dist_base = dist_base; | 968 | IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init); |
875 | gic_data.redist_regions = rdist_regs; | ||
876 | gic_data.nr_redist_regions = nr_redist_regions; | ||
877 | gic_data.redist_stride = redist_stride; | ||
878 | 969 | ||
879 | gicv3_enable_quirks(); | 970 | #ifdef CONFIG_ACPI |
971 | static void __iomem *dist_base; | ||
972 | static struct redist_region *redist_regs __initdata; | ||
973 | static u32 nr_redist_regions __initdata; | ||
974 | static bool single_redist; | ||
975 | |||
976 | static void __init | ||
977 | gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base) | ||
978 | { | ||
979 | static int count = 0; | ||
980 | |||
981 | redist_regs[count].phys_base = phys_base; | ||
982 | redist_regs[count].redist_base = redist_base; | ||
983 | redist_regs[count].single_redist = single_redist; | ||
984 | count++; | ||
985 | } | ||
986 | |||
987 | static int __init | ||
988 | gic_acpi_parse_madt_redist(struct acpi_subtable_header *header, | ||
989 | const unsigned long end) | ||
990 | { | ||
991 | struct acpi_madt_generic_redistributor *redist = | ||
992 | (struct acpi_madt_generic_redistributor *)header; | ||
993 | void __iomem *redist_base; | ||
994 | |||
995 | redist_base = ioremap(redist->base_address, redist->length); | ||
996 | if (!redist_base) { | ||
997 | pr_err("Couldn't map GICR region @%llx\n", redist->base_address); | ||
998 | return -ENOMEM; | ||
999 | } | ||
1000 | |||
1001 | gic_acpi_register_redist(redist->base_address, redist_base); | ||
1002 | return 0; | ||
1003 | } | ||
1004 | |||
1005 | static int __init | ||
1006 | gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header, | ||
1007 | const unsigned long end) | ||
1008 | { | ||
1009 | struct acpi_madt_generic_interrupt *gicc = | ||
1010 | (struct acpi_madt_generic_interrupt *)header; | ||
1011 | u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; | ||
1012 | u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; | ||
1013 | void __iomem *redist_base; | ||
1014 | |||
1015 | redist_base = ioremap(gicc->gicr_base_address, size); | ||
1016 | if (!redist_base) | ||
1017 | return -ENOMEM; | ||
1018 | |||
1019 | gic_acpi_register_redist(gicc->gicr_base_address, redist_base); | ||
1020 | return 0; | ||
1021 | } | ||
1022 | |||
1023 | static int __init gic_acpi_collect_gicr_base(void) | ||
1024 | { | ||
1025 | acpi_tbl_entry_handler redist_parser; | ||
1026 | enum acpi_madt_type type; | ||
1027 | |||
1028 | if (single_redist) { | ||
1029 | type = ACPI_MADT_TYPE_GENERIC_INTERRUPT; | ||
1030 | redist_parser = gic_acpi_parse_madt_gicc; | ||
1031 | } else { | ||
1032 | type = ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR; | ||
1033 | redist_parser = gic_acpi_parse_madt_redist; | ||
1034 | } | ||
1035 | |||
1036 | /* Collect redistributor base addresses in GICR entries */ | ||
1037 | if (acpi_table_parse_madt(type, redist_parser, 0) > 0) | ||
1038 | return 0; | ||
1039 | |||
1040 | pr_info("No valid GICR entries exist\n"); | ||
1041 | return -ENODEV; | ||
1042 | } | ||
1043 | |||
1044 | static int __init gic_acpi_match_gicr(struct acpi_subtable_header *header, | ||
1045 | const unsigned long end) | ||
1046 | { | ||
1047 | /* Subtable presence means that redist exists, that's it */ | ||
1048 | return 0; | ||
1049 | } | ||
1050 | |||
1051 | static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header, | ||
1052 | const unsigned long end) | ||
1053 | { | ||
1054 | struct acpi_madt_generic_interrupt *gicc = | ||
1055 | (struct acpi_madt_generic_interrupt *)header; | ||
880 | 1056 | ||
881 | /* | 1057 | /* |
882 | * Find out how many interrupts are supported. | 1058 | * If GICC is enabled and has valid gicr base address, then it means |
883 | * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI) | 1059 | * GICR base is presented via GICC |
884 | */ | 1060 | */ |
885 | typer = readl_relaxed(gic_data.dist_base + GICD_TYPER); | 1061 | if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) |
886 | gic_data.rdists.id_bits = GICD_TYPER_ID_BITS(typer); | 1062 | return 0; |
887 | gic_irqs = GICD_TYPER_IRQS(typer); | ||
888 | if (gic_irqs > 1020) | ||
889 | gic_irqs = 1020; | ||
890 | gic_data.irq_nr = gic_irqs; | ||
891 | 1063 | ||
892 | gic_data.domain = irq_domain_add_tree(node, &gic_irq_domain_ops, | 1064 | return -ENODEV; |
893 | &gic_data); | 1065 | } |
894 | gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist)); | ||
895 | 1066 | ||
896 | if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) { | 1067 | static int __init gic_acpi_count_gicr_regions(void) |
1068 | { | ||
1069 | int count; | ||
1070 | |||
1071 | /* | ||
1072 | * Count how many redistributor regions we have. It is not allowed | ||
1073 | * to mix redistributor description, GICR and GICC subtables have to be | ||
1074 | * mutually exclusive. | ||
1075 | */ | ||
1076 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR, | ||
1077 | gic_acpi_match_gicr, 0); | ||
1078 | if (count > 0) { | ||
1079 | single_redist = false; | ||
1080 | return count; | ||
1081 | } | ||
1082 | |||
1083 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, | ||
1084 | gic_acpi_match_gicc, 0); | ||
1085 | if (count > 0) | ||
1086 | single_redist = true; | ||
1087 | |||
1088 | return count; | ||
1089 | } | ||
1090 | |||
1091 | static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header, | ||
1092 | struct acpi_probe_entry *ape) | ||
1093 | { | ||
1094 | struct acpi_madt_generic_distributor *dist; | ||
1095 | int count; | ||
1096 | |||
1097 | dist = (struct acpi_madt_generic_distributor *)header; | ||
1098 | if (dist->version != ape->driver_data) | ||
1099 | return false; | ||
1100 | |||
1101 | /* We need to do that exercise anyway, the sooner the better */ | ||
1102 | count = gic_acpi_count_gicr_regions(); | ||
1103 | if (count <= 0) | ||
1104 | return false; | ||
1105 | |||
1106 | nr_redist_regions = count; | ||
1107 | return true; | ||
1108 | } | ||
1109 | |||
1110 | #define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K) | ||
1111 | |||
1112 | static int __init | ||
1113 | gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end) | ||
1114 | { | ||
1115 | struct acpi_madt_generic_distributor *dist; | ||
1116 | struct fwnode_handle *domain_handle; | ||
1117 | int i, err; | ||
1118 | |||
1119 | /* Get distributor base address */ | ||
1120 | dist = (struct acpi_madt_generic_distributor *)header; | ||
1121 | dist_base = ioremap(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE); | ||
1122 | if (!dist_base) { | ||
1123 | pr_err("Unable to map GICD registers\n"); | ||
1124 | return -ENOMEM; | ||
1125 | } | ||
1126 | |||
1127 | err = gic_validate_dist_version(dist_base); | ||
1128 | if (err) { | ||
1129 | pr_err("No distributor detected at @%p, giving up", dist_base); | ||
1130 | goto out_dist_unmap; | ||
1131 | } | ||
1132 | |||
1133 | redist_regs = kzalloc(sizeof(*redist_regs) * nr_redist_regions, | ||
1134 | GFP_KERNEL); | ||
1135 | if (!redist_regs) { | ||
897 | err = -ENOMEM; | 1136 | err = -ENOMEM; |
898 | goto out_free; | 1137 | goto out_dist_unmap; |
899 | } | 1138 | } |
900 | 1139 | ||
901 | set_handle_irq(gic_handle_irq); | 1140 | err = gic_acpi_collect_gicr_base(); |
1141 | if (err) | ||
1142 | goto out_redist_unmap; | ||
902 | 1143 | ||
903 | if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis()) | 1144 | domain_handle = irq_domain_alloc_fwnode(dist_base); |
904 | its_init(node, &gic_data.rdists, gic_data.domain); | 1145 | if (!domain_handle) { |
1146 | err = -ENOMEM; | ||
1147 | goto out_redist_unmap; | ||
1148 | } | ||
905 | 1149 | ||
906 | gic_smp_init(); | 1150 | err = gic_init_bases(dist_base, redist_regs, nr_redist_regions, 0, |
907 | gic_dist_init(); | 1151 | domain_handle); |
908 | gic_cpu_init(); | 1152 | if (err) |
909 | gic_cpu_pm_init(); | 1153 | goto out_fwhandle_free; |
910 | 1154 | ||
1155 | acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle); | ||
911 | return 0; | 1156 | return 0; |
912 | 1157 | ||
913 | out_free: | 1158 | out_fwhandle_free: |
914 | if (gic_data.domain) | 1159 | irq_domain_free_fwnode(domain_handle); |
915 | irq_domain_remove(gic_data.domain); | 1160 | out_redist_unmap: |
916 | free_percpu(gic_data.rdists.rdist); | ||
917 | out_unmap_rdist: | ||
918 | for (i = 0; i < nr_redist_regions; i++) | 1161 | for (i = 0; i < nr_redist_regions; i++) |
919 | if (rdist_regs[i].redist_base) | 1162 | if (redist_regs[i].redist_base) |
920 | iounmap(rdist_regs[i].redist_base); | 1163 | iounmap(redist_regs[i].redist_base); |
921 | kfree(rdist_regs); | 1164 | kfree(redist_regs); |
922 | out_unmap_dist: | 1165 | out_dist_unmap: |
923 | iounmap(dist_base); | 1166 | iounmap(dist_base); |
924 | return err; | 1167 | return err; |
925 | } | 1168 | } |
926 | 1169 | IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, | |
927 | IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init); | 1170 | acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_V3, |
1171 | gic_acpi_init); | ||
1172 | IRQCHIP_ACPI_DECLARE(gic_v4, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, | ||
1173 | acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_V4, | ||
1174 | gic_acpi_init); | ||
1175 | IRQCHIP_ACPI_DECLARE(gic_v3_or_v4, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, | ||
1176 | acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_NONE, | ||
1177 | gic_acpi_init); | ||
1178 | #endif | ||
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 8f9ebf714e2b..282344b95ec2 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c | |||
@@ -319,7 +319,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, | |||
319 | writel_relaxed(val | bit, reg); | 319 | writel_relaxed(val | bit, reg); |
320 | raw_spin_unlock_irqrestore(&irq_controller_lock, flags); | 320 | raw_spin_unlock_irqrestore(&irq_controller_lock, flags); |
321 | 321 | ||
322 | return IRQ_SET_MASK_OK; | 322 | return IRQ_SET_MASK_OK_DONE; |
323 | } | 323 | } |
324 | #endif | 324 | #endif |
325 | 325 | ||
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 9e17ef27a183..94a30da0cfac 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c | |||
@@ -29,16 +29,32 @@ struct gic_pcpu_mask { | |||
29 | DECLARE_BITMAP(pcpu_mask, GIC_MAX_INTRS); | 29 | DECLARE_BITMAP(pcpu_mask, GIC_MAX_INTRS); |
30 | }; | 30 | }; |
31 | 31 | ||
32 | struct gic_irq_spec { | ||
33 | enum { | ||
34 | GIC_DEVICE, | ||
35 | GIC_IPI | ||
36 | } type; | ||
37 | |||
38 | union { | ||
39 | struct cpumask *ipimask; | ||
40 | unsigned int hwirq; | ||
41 | }; | ||
42 | }; | ||
43 | |||
32 | static unsigned long __gic_base_addr; | 44 | static unsigned long __gic_base_addr; |
45 | |||
33 | static void __iomem *gic_base; | 46 | static void __iomem *gic_base; |
34 | static struct gic_pcpu_mask pcpu_masks[NR_CPUS]; | 47 | static struct gic_pcpu_mask pcpu_masks[NR_CPUS]; |
35 | static DEFINE_SPINLOCK(gic_lock); | 48 | static DEFINE_SPINLOCK(gic_lock); |
36 | static struct irq_domain *gic_irq_domain; | 49 | static struct irq_domain *gic_irq_domain; |
50 | static struct irq_domain *gic_dev_domain; | ||
51 | static struct irq_domain *gic_ipi_domain; | ||
37 | static int gic_shared_intrs; | 52 | static int gic_shared_intrs; |
38 | static int gic_vpes; | 53 | static int gic_vpes; |
39 | static unsigned int gic_cpu_pin; | 54 | static unsigned int gic_cpu_pin; |
40 | static unsigned int timer_cpu_pin; | 55 | static unsigned int timer_cpu_pin; |
41 | static struct irq_chip gic_level_irq_controller, gic_edge_irq_controller; | 56 | static struct irq_chip gic_level_irq_controller, gic_edge_irq_controller; |
57 | DECLARE_BITMAP(ipi_resrv, GIC_MAX_INTRS); | ||
42 | 58 | ||
43 | static void __gic_irq_dispatch(void); | 59 | static void __gic_irq_dispatch(void); |
44 | 60 | ||
@@ -264,9 +280,11 @@ static void gic_bind_eic_interrupt(int irq, int set) | |||
264 | GIC_VPE_EIC_SS(irq), set); | 280 | GIC_VPE_EIC_SS(irq), set); |
265 | } | 281 | } |
266 | 282 | ||
267 | void gic_send_ipi(unsigned int intr) | 283 | static void gic_send_ipi(struct irq_data *d, unsigned int cpu) |
268 | { | 284 | { |
269 | gic_write(GIC_REG(SHARED, GIC_SH_WEDGE), GIC_SH_WEDGE_SET(intr)); | 285 | irq_hw_number_t hwirq = GIC_HWIRQ_TO_SHARED(irqd_to_hwirq(d)); |
286 | |||
287 | gic_write(GIC_REG(SHARED, GIC_SH_WEDGE), GIC_SH_WEDGE_SET(hwirq)); | ||
270 | } | 288 | } |
271 | 289 | ||
272 | int gic_get_c0_compare_int(void) | 290 | int gic_get_c0_compare_int(void) |
@@ -449,7 +467,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, | |||
449 | gic_map_to_vpe(irq, mips_cm_vp_id(cpumask_first(&tmp))); | 467 | gic_map_to_vpe(irq, mips_cm_vp_id(cpumask_first(&tmp))); |
450 | 468 | ||
451 | /* Update the pcpu_masks */ | 469 | /* Update the pcpu_masks */ |
452 | for (i = 0; i < NR_CPUS; i++) | 470 | for (i = 0; i < gic_vpes; i++) |
453 | clear_bit(irq, pcpu_masks[i].pcpu_mask); | 471 | clear_bit(irq, pcpu_masks[i].pcpu_mask); |
454 | set_bit(irq, pcpu_masks[cpumask_first(&tmp)].pcpu_mask); | 472 | set_bit(irq, pcpu_masks[cpumask_first(&tmp)].pcpu_mask); |
455 | 473 | ||
@@ -479,6 +497,7 @@ static struct irq_chip gic_edge_irq_controller = { | |||
479 | #ifdef CONFIG_SMP | 497 | #ifdef CONFIG_SMP |
480 | .irq_set_affinity = gic_set_affinity, | 498 | .irq_set_affinity = gic_set_affinity, |
481 | #endif | 499 | #endif |
500 | .ipi_send_single = gic_send_ipi, | ||
482 | }; | 501 | }; |
483 | 502 | ||
484 | static void gic_handle_local_int(bool chained) | 503 | static void gic_handle_local_int(bool chained) |
@@ -572,83 +591,6 @@ static void gic_irq_dispatch(struct irq_desc *desc) | |||
572 | gic_handle_shared_int(true); | 591 | gic_handle_shared_int(true); |
573 | } | 592 | } |
574 | 593 | ||
575 | #ifdef CONFIG_MIPS_GIC_IPI | ||
576 | static int gic_resched_int_base; | ||
577 | static int gic_call_int_base; | ||
578 | |||
579 | unsigned int plat_ipi_resched_int_xlate(unsigned int cpu) | ||
580 | { | ||
581 | return gic_resched_int_base + cpu; | ||
582 | } | ||
583 | |||
584 | unsigned int plat_ipi_call_int_xlate(unsigned int cpu) | ||
585 | { | ||
586 | return gic_call_int_base + cpu; | ||
587 | } | ||
588 | |||
589 | static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) | ||
590 | { | ||
591 | scheduler_ipi(); | ||
592 | |||
593 | return IRQ_HANDLED; | ||
594 | } | ||
595 | |||
596 | static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) | ||
597 | { | ||
598 | generic_smp_call_function_interrupt(); | ||
599 | |||
600 | return IRQ_HANDLED; | ||
601 | } | ||
602 | |||
603 | static struct irqaction irq_resched = { | ||
604 | .handler = ipi_resched_interrupt, | ||
605 | .flags = IRQF_PERCPU, | ||
606 | .name = "IPI resched" | ||
607 | }; | ||
608 | |||
609 | static struct irqaction irq_call = { | ||
610 | .handler = ipi_call_interrupt, | ||
611 | .flags = IRQF_PERCPU, | ||
612 | .name = "IPI call" | ||
613 | }; | ||
614 | |||
615 | static __init void gic_ipi_init_one(unsigned int intr, int cpu, | ||
616 | struct irqaction *action) | ||
617 | { | ||
618 | int virq = irq_create_mapping(gic_irq_domain, | ||
619 | GIC_SHARED_TO_HWIRQ(intr)); | ||
620 | int i; | ||
621 | |||
622 | gic_map_to_vpe(intr, mips_cm_vp_id(cpu)); | ||
623 | for (i = 0; i < NR_CPUS; i++) | ||
624 | clear_bit(intr, pcpu_masks[i].pcpu_mask); | ||
625 | set_bit(intr, pcpu_masks[cpu].pcpu_mask); | ||
626 | |||
627 | irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING); | ||
628 | |||
629 | irq_set_handler(virq, handle_percpu_irq); | ||
630 | setup_irq(virq, action); | ||
631 | } | ||
632 | |||
633 | static __init void gic_ipi_init(void) | ||
634 | { | ||
635 | int i; | ||
636 | |||
637 | /* Use last 2 * NR_CPUS interrupts as IPIs */ | ||
638 | gic_resched_int_base = gic_shared_intrs - nr_cpu_ids; | ||
639 | gic_call_int_base = gic_resched_int_base - nr_cpu_ids; | ||
640 | |||
641 | for (i = 0; i < nr_cpu_ids; i++) { | ||
642 | gic_ipi_init_one(gic_call_int_base + i, i, &irq_call); | ||
643 | gic_ipi_init_one(gic_resched_int_base + i, i, &irq_resched); | ||
644 | } | ||
645 | } | ||
646 | #else | ||
647 | static inline void gic_ipi_init(void) | ||
648 | { | ||
649 | } | ||
650 | #endif | ||
651 | |||
652 | static void __init gic_basic_init(void) | 594 | static void __init gic_basic_init(void) |
653 | { | 595 | { |
654 | unsigned int i; | 596 | unsigned int i; |
@@ -753,19 +695,21 @@ static int gic_local_irq_domain_map(struct irq_domain *d, unsigned int virq, | |||
753 | } | 695 | } |
754 | 696 | ||
755 | static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, | 697 | static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, |
756 | irq_hw_number_t hw) | 698 | irq_hw_number_t hw, unsigned int vpe) |
757 | { | 699 | { |
758 | int intr = GIC_HWIRQ_TO_SHARED(hw); | 700 | int intr = GIC_HWIRQ_TO_SHARED(hw); |
759 | unsigned long flags; | 701 | unsigned long flags; |
702 | int i; | ||
760 | 703 | ||
761 | irq_set_chip_and_handler(virq, &gic_level_irq_controller, | 704 | irq_set_chip_and_handler(virq, &gic_level_irq_controller, |
762 | handle_level_irq); | 705 | handle_level_irq); |
763 | 706 | ||
764 | spin_lock_irqsave(&gic_lock, flags); | 707 | spin_lock_irqsave(&gic_lock, flags); |
765 | gic_map_to_pin(intr, gic_cpu_pin); | 708 | gic_map_to_pin(intr, gic_cpu_pin); |
766 | /* Map to VPE 0 by default */ | 709 | gic_map_to_vpe(intr, vpe); |
767 | gic_map_to_vpe(intr, 0); | 710 | for (i = 0; i < gic_vpes; i++) |
768 | set_bit(intr, pcpu_masks[0].pcpu_mask); | 711 | clear_bit(intr, pcpu_masks[i].pcpu_mask); |
712 | set_bit(intr, pcpu_masks[vpe].pcpu_mask); | ||
769 | spin_unlock_irqrestore(&gic_lock, flags); | 713 | spin_unlock_irqrestore(&gic_lock, flags); |
770 | 714 | ||
771 | return 0; | 715 | return 0; |
@@ -776,10 +720,93 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, | |||
776 | { | 720 | { |
777 | if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS) | 721 | if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS) |
778 | return gic_local_irq_domain_map(d, virq, hw); | 722 | return gic_local_irq_domain_map(d, virq, hw); |
779 | return gic_shared_irq_domain_map(d, virq, hw); | 723 | return gic_shared_irq_domain_map(d, virq, hw, 0); |
780 | } | 724 | } |
781 | 725 | ||
782 | static int gic_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, | 726 | static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq, |
727 | unsigned int nr_irqs, void *arg) | ||
728 | { | ||
729 | struct gic_irq_spec *spec = arg; | ||
730 | irq_hw_number_t hwirq, base_hwirq; | ||
731 | int cpu, ret, i; | ||
732 | |||
733 | if (spec->type == GIC_DEVICE) { | ||
734 | /* verify that it doesn't conflict with an IPI irq */ | ||
735 | if (test_bit(spec->hwirq, ipi_resrv)) | ||
736 | return -EBUSY; | ||
737 | } else { | ||
738 | base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs); | ||
739 | if (base_hwirq == gic_shared_intrs) { | ||
740 | return -ENOMEM; | ||
741 | } | ||
742 | |||
743 | /* check that we have enough space */ | ||
744 | for (i = base_hwirq; i < nr_irqs; i++) { | ||
745 | if (!test_bit(i, ipi_resrv)) | ||
746 | return -EBUSY; | ||
747 | } | ||
748 | bitmap_clear(ipi_resrv, base_hwirq, nr_irqs); | ||
749 | |||
750 | /* map the hwirq for each cpu consecutively */ | ||
751 | i = 0; | ||
752 | for_each_cpu(cpu, spec->ipimask) { | ||
753 | hwirq = GIC_SHARED_TO_HWIRQ(base_hwirq + i); | ||
754 | |||
755 | ret = irq_domain_set_hwirq_and_chip(d, virq + i, hwirq, | ||
756 | &gic_edge_irq_controller, | ||
757 | NULL); | ||
758 | if (ret) | ||
759 | goto error; | ||
760 | |||
761 | ret = gic_shared_irq_domain_map(d, virq + i, hwirq, cpu); | ||
762 | if (ret) | ||
763 | goto error; | ||
764 | |||
765 | i++; | ||
766 | } | ||
767 | |||
768 | /* | ||
769 | * tell the parent about the base hwirq we allocated so it can | ||
770 | * set its own domain data | ||
771 | */ | ||
772 | spec->hwirq = base_hwirq; | ||
773 | } | ||
774 | |||
775 | return 0; | ||
776 | error: | ||
777 | bitmap_set(ipi_resrv, base_hwirq, nr_irqs); | ||
778 | return ret; | ||
779 | } | ||
780 | |||
781 | void gic_irq_domain_free(struct irq_domain *d, unsigned int virq, | ||
782 | unsigned int nr_irqs) | ||
783 | { | ||
784 | irq_hw_number_t base_hwirq; | ||
785 | struct irq_data *data; | ||
786 | |||
787 | data = irq_get_irq_data(virq); | ||
788 | if (!data) | ||
789 | return; | ||
790 | |||
791 | base_hwirq = GIC_HWIRQ_TO_SHARED(irqd_to_hwirq(data)); | ||
792 | bitmap_set(ipi_resrv, base_hwirq, nr_irqs); | ||
793 | } | ||
794 | |||
795 | int gic_irq_domain_match(struct irq_domain *d, struct device_node *node, | ||
796 | enum irq_domain_bus_token bus_token) | ||
797 | { | ||
798 | /* this domain should'nt be accessed directly */ | ||
799 | return 0; | ||
800 | } | ||
801 | |||
802 | static const struct irq_domain_ops gic_irq_domain_ops = { | ||
803 | .map = gic_irq_domain_map, | ||
804 | .alloc = gic_irq_domain_alloc, | ||
805 | .free = gic_irq_domain_free, | ||
806 | .match = gic_irq_domain_match, | ||
807 | }; | ||
808 | |||
809 | static int gic_dev_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, | ||
783 | const u32 *intspec, unsigned int intsize, | 810 | const u32 *intspec, unsigned int intsize, |
784 | irq_hw_number_t *out_hwirq, | 811 | irq_hw_number_t *out_hwirq, |
785 | unsigned int *out_type) | 812 | unsigned int *out_type) |
@@ -798,9 +825,130 @@ static int gic_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, | |||
798 | return 0; | 825 | return 0; |
799 | } | 826 | } |
800 | 827 | ||
801 | static const struct irq_domain_ops gic_irq_domain_ops = { | 828 | static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq, |
802 | .map = gic_irq_domain_map, | 829 | unsigned int nr_irqs, void *arg) |
803 | .xlate = gic_irq_domain_xlate, | 830 | { |
831 | struct irq_fwspec *fwspec = arg; | ||
832 | struct gic_irq_spec spec = { | ||
833 | .type = GIC_DEVICE, | ||
834 | .hwirq = fwspec->param[1], | ||
835 | }; | ||
836 | int i, ret; | ||
837 | bool is_shared = fwspec->param[0] == GIC_SHARED; | ||
838 | |||
839 | if (is_shared) { | ||
840 | ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec); | ||
841 | if (ret) | ||
842 | return ret; | ||
843 | } | ||
844 | |||
845 | for (i = 0; i < nr_irqs; i++) { | ||
846 | irq_hw_number_t hwirq; | ||
847 | |||
848 | if (is_shared) | ||
849 | hwirq = GIC_SHARED_TO_HWIRQ(spec.hwirq + i); | ||
850 | else | ||
851 | hwirq = GIC_LOCAL_TO_HWIRQ(spec.hwirq + i); | ||
852 | |||
853 | ret = irq_domain_set_hwirq_and_chip(d, virq + i, | ||
854 | hwirq, | ||
855 | &gic_level_irq_controller, | ||
856 | NULL); | ||
857 | if (ret) | ||
858 | return ret; | ||
859 | } | ||
860 | |||
861 | return 0; | ||
862 | } | ||
863 | |||
864 | void gic_dev_domain_free(struct irq_domain *d, unsigned int virq, | ||
865 | unsigned int nr_irqs) | ||
866 | { | ||
867 | /* no real allocation is done for dev irqs, so no need to free anything */ | ||
868 | return; | ||
869 | } | ||
870 | |||
871 | static struct irq_domain_ops gic_dev_domain_ops = { | ||
872 | .xlate = gic_dev_domain_xlate, | ||
873 | .alloc = gic_dev_domain_alloc, | ||
874 | .free = gic_dev_domain_free, | ||
875 | }; | ||
876 | |||
877 | static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, | ||
878 | const u32 *intspec, unsigned int intsize, | ||
879 | irq_hw_number_t *out_hwirq, | ||
880 | unsigned int *out_type) | ||
881 | { | ||
882 | /* | ||
883 | * There's nothing to translate here. hwirq is dynamically allocated and | ||
884 | * the irq type is always edge triggered. | ||
885 | * */ | ||
886 | *out_hwirq = 0; | ||
887 | *out_type = IRQ_TYPE_EDGE_RISING; | ||
888 | |||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | static int gic_ipi_domain_alloc(struct irq_domain *d, unsigned int virq, | ||
893 | unsigned int nr_irqs, void *arg) | ||
894 | { | ||
895 | struct cpumask *ipimask = arg; | ||
896 | struct gic_irq_spec spec = { | ||
897 | .type = GIC_IPI, | ||
898 | .ipimask = ipimask | ||
899 | }; | ||
900 | int ret, i; | ||
901 | |||
902 | ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec); | ||
903 | if (ret) | ||
904 | return ret; | ||
905 | |||
906 | /* the parent should have set spec.hwirq to the base_hwirq it allocated */ | ||
907 | for (i = 0; i < nr_irqs; i++) { | ||
908 | ret = irq_domain_set_hwirq_and_chip(d, virq + i, | ||
909 | GIC_SHARED_TO_HWIRQ(spec.hwirq + i), | ||
910 | &gic_edge_irq_controller, | ||
911 | NULL); | ||
912 | if (ret) | ||
913 | goto error; | ||
914 | |||
915 | ret = irq_set_irq_type(virq + i, IRQ_TYPE_EDGE_RISING); | ||
916 | if (ret) | ||
917 | goto error; | ||
918 | } | ||
919 | |||
920 | return 0; | ||
921 | error: | ||
922 | irq_domain_free_irqs_parent(d, virq, nr_irqs); | ||
923 | return ret; | ||
924 | } | ||
925 | |||
926 | void gic_ipi_domain_free(struct irq_domain *d, unsigned int virq, | ||
927 | unsigned int nr_irqs) | ||
928 | { | ||
929 | irq_domain_free_irqs_parent(d, virq, nr_irqs); | ||
930 | } | ||
931 | |||
932 | int gic_ipi_domain_match(struct irq_domain *d, struct device_node *node, | ||
933 | enum irq_domain_bus_token bus_token) | ||
934 | { | ||
935 | bool is_ipi; | ||
936 | |||
937 | switch (bus_token) { | ||
938 | case DOMAIN_BUS_IPI: | ||
939 | is_ipi = d->bus_token == bus_token; | ||
940 | return to_of_node(d->fwnode) == node && is_ipi; | ||
941 | break; | ||
942 | default: | ||
943 | return 0; | ||
944 | } | ||
945 | } | ||
946 | |||
947 | static struct irq_domain_ops gic_ipi_domain_ops = { | ||
948 | .xlate = gic_ipi_domain_xlate, | ||
949 | .alloc = gic_ipi_domain_alloc, | ||
950 | .free = gic_ipi_domain_free, | ||
951 | .match = gic_ipi_domain_match, | ||
804 | }; | 952 | }; |
805 | 953 | ||
806 | static void __init __gic_init(unsigned long gic_base_addr, | 954 | static void __init __gic_init(unsigned long gic_base_addr, |
@@ -809,6 +957,7 @@ static void __init __gic_init(unsigned long gic_base_addr, | |||
809 | struct device_node *node) | 957 | struct device_node *node) |
810 | { | 958 | { |
811 | unsigned int gicconfig; | 959 | unsigned int gicconfig; |
960 | unsigned int v[2]; | ||
812 | 961 | ||
813 | __gic_base_addr = gic_base_addr; | 962 | __gic_base_addr = gic_base_addr; |
814 | 963 | ||
@@ -864,9 +1013,32 @@ static void __init __gic_init(unsigned long gic_base_addr, | |||
864 | if (!gic_irq_domain) | 1013 | if (!gic_irq_domain) |
865 | panic("Failed to add GIC IRQ domain"); | 1014 | panic("Failed to add GIC IRQ domain"); |
866 | 1015 | ||
867 | gic_basic_init(); | 1016 | gic_dev_domain = irq_domain_add_hierarchy(gic_irq_domain, 0, |
1017 | GIC_NUM_LOCAL_INTRS + gic_shared_intrs, | ||
1018 | node, &gic_dev_domain_ops, NULL); | ||
1019 | if (!gic_dev_domain) | ||
1020 | panic("Failed to add GIC DEV domain"); | ||
1021 | |||
1022 | gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain, | ||
1023 | IRQ_DOMAIN_FLAG_IPI_PER_CPU, | ||
1024 | GIC_NUM_LOCAL_INTRS + gic_shared_intrs, | ||
1025 | node, &gic_ipi_domain_ops, NULL); | ||
1026 | if (!gic_ipi_domain) | ||
1027 | panic("Failed to add GIC IPI domain"); | ||
868 | 1028 | ||
869 | gic_ipi_init(); | 1029 | gic_ipi_domain->bus_token = DOMAIN_BUS_IPI; |
1030 | |||
1031 | if (node && | ||
1032 | !of_property_read_u32_array(node, "mti,reserved-ipi-vectors", v, 2)) { | ||
1033 | bitmap_set(ipi_resrv, v[0], v[1]); | ||
1034 | } else { | ||
1035 | /* Make the last 2 * gic_vpes available for IPIs */ | ||
1036 | bitmap_set(ipi_resrv, | ||
1037 | gic_shared_intrs - 2 * gic_vpes, | ||
1038 | 2 * gic_vpes); | ||
1039 | } | ||
1040 | |||
1041 | gic_basic_init(); | ||
870 | } | 1042 | } |
871 | 1043 | ||
872 | void __init gic_init(unsigned long gic_base_addr, | 1044 | void __init gic_init(unsigned long gic_base_addr, |
diff --git a/drivers/irqchip/irq-mvebu-odmi.c b/drivers/irqchip/irq-mvebu-odmi.c new file mode 100644 index 000000000000..b4d367868dbb --- /dev/null +++ b/drivers/irqchip/irq-mvebu-odmi.c | |||
@@ -0,0 +1,236 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 Marvell | ||
3 | * | ||
4 | * Thomas Petazzoni <thomas.petazzoni@free-electrons.com> | ||
5 | * | ||
6 | * This file is licensed under the terms of the GNU General Public | ||
7 | * License version 2. This program is licensed "as is" without any | ||
8 | * warranty of any kind, whether express or implied. | ||
9 | */ | ||
10 | |||
11 | #define pr_fmt(fmt) "GIC-ODMI: " fmt | ||
12 | |||
13 | #include <linux/irq.h> | ||
14 | #include <linux/irqchip.h> | ||
15 | #include <linux/irqdomain.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/msi.h> | ||
18 | #include <linux/of_address.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include <dt-bindings/interrupt-controller/arm-gic.h> | ||
21 | |||
22 | #define GICP_ODMIN_SET 0x40 | ||
23 | #define GICP_ODMI_INT_NUM_SHIFT 12 | ||
24 | #define GICP_ODMIN_GM_EP_R0 0x110 | ||
25 | #define GICP_ODMIN_GM_EP_R1 0x114 | ||
26 | #define GICP_ODMIN_GM_EA_R0 0x108 | ||
27 | #define GICP_ODMIN_GM_EA_R1 0x118 | ||
28 | |||
29 | /* | ||
30 | * We don't support the group events, so we simply have 8 interrupts | ||
31 | * per frame. | ||
32 | */ | ||
33 | #define NODMIS_SHIFT 3 | ||
34 | #define NODMIS_PER_FRAME (1 << NODMIS_SHIFT) | ||
35 | #define NODMIS_MASK (NODMIS_PER_FRAME - 1) | ||
36 | |||
37 | struct odmi_data { | ||
38 | struct resource res; | ||
39 | void __iomem *base; | ||
40 | unsigned int spi_base; | ||
41 | }; | ||
42 | |||
43 | static struct odmi_data *odmis; | ||
44 | static unsigned long *odmis_bm; | ||
45 | static unsigned int odmis_count; | ||
46 | |||
47 | /* Protects odmis_bm */ | ||
48 | static DEFINE_SPINLOCK(odmis_bm_lock); | ||
49 | |||
50 | static void odmi_compose_msi_msg(struct irq_data *d, struct msi_msg *msg) | ||
51 | { | ||
52 | struct odmi_data *odmi; | ||
53 | phys_addr_t addr; | ||
54 | unsigned int odmin; | ||
55 | |||
56 | if (WARN_ON(d->hwirq >= odmis_count * NODMIS_PER_FRAME)) | ||
57 | return; | ||
58 | |||
59 | odmi = &odmis[d->hwirq >> NODMIS_SHIFT]; | ||
60 | odmin = d->hwirq & NODMIS_MASK; | ||
61 | |||
62 | addr = odmi->res.start + GICP_ODMIN_SET; | ||
63 | |||
64 | msg->address_hi = upper_32_bits(addr); | ||
65 | msg->address_lo = lower_32_bits(addr); | ||
66 | msg->data = odmin << GICP_ODMI_INT_NUM_SHIFT; | ||
67 | } | ||
68 | |||
69 | static struct irq_chip odmi_irq_chip = { | ||
70 | .name = "ODMI", | ||
71 | .irq_mask = irq_chip_mask_parent, | ||
72 | .irq_unmask = irq_chip_unmask_parent, | ||
73 | .irq_eoi = irq_chip_eoi_parent, | ||
74 | .irq_set_affinity = irq_chip_set_affinity_parent, | ||
75 | .irq_compose_msi_msg = odmi_compose_msi_msg, | ||
76 | }; | ||
77 | |||
78 | static int odmi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, | ||
79 | unsigned int nr_irqs, void *args) | ||
80 | { | ||
81 | struct odmi_data *odmi = NULL; | ||
82 | struct irq_fwspec fwspec; | ||
83 | struct irq_data *d; | ||
84 | unsigned int hwirq, odmin; | ||
85 | int ret; | ||
86 | |||
87 | spin_lock(&odmis_bm_lock); | ||
88 | hwirq = find_first_zero_bit(odmis_bm, NODMIS_PER_FRAME * odmis_count); | ||
89 | if (hwirq >= NODMIS_PER_FRAME * odmis_count) { | ||
90 | spin_unlock(&odmis_bm_lock); | ||
91 | return -ENOSPC; | ||
92 | } | ||
93 | |||
94 | __set_bit(hwirq, odmis_bm); | ||
95 | spin_unlock(&odmis_bm_lock); | ||
96 | |||
97 | odmi = &odmis[hwirq >> NODMIS_SHIFT]; | ||
98 | odmin = hwirq & NODMIS_MASK; | ||
99 | |||
100 | fwspec.fwnode = domain->parent->fwnode; | ||
101 | fwspec.param_count = 3; | ||
102 | fwspec.param[0] = GIC_SPI; | ||
103 | fwspec.param[1] = odmi->spi_base - 32 + odmin; | ||
104 | fwspec.param[2] = IRQ_TYPE_EDGE_RISING; | ||
105 | |||
106 | ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec); | ||
107 | if (ret) { | ||
108 | pr_err("Cannot allocate parent IRQ\n"); | ||
109 | spin_lock(&odmis_bm_lock); | ||
110 | __clear_bit(odmin, odmis_bm); | ||
111 | spin_unlock(&odmis_bm_lock); | ||
112 | return ret; | ||
113 | } | ||
114 | |||
115 | /* Configure the interrupt line to be edge */ | ||
116 | d = irq_domain_get_irq_data(domain->parent, virq); | ||
117 | d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING); | ||
118 | |||
119 | irq_domain_set_hwirq_and_chip(domain, virq, hwirq, | ||
120 | &odmi_irq_chip, NULL); | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static void odmi_irq_domain_free(struct irq_domain *domain, | ||
126 | unsigned int virq, unsigned int nr_irqs) | ||
127 | { | ||
128 | struct irq_data *d = irq_domain_get_irq_data(domain, virq); | ||
129 | |||
130 | if (d->hwirq >= odmis_count * NODMIS_PER_FRAME) { | ||
131 | pr_err("Failed to teardown msi. Invalid hwirq %lu\n", d->hwirq); | ||
132 | return; | ||
133 | } | ||
134 | |||
135 | irq_domain_free_irqs_parent(domain, virq, nr_irqs); | ||
136 | |||
137 | /* Actually free the MSI */ | ||
138 | spin_lock(&odmis_bm_lock); | ||
139 | __clear_bit(d->hwirq, odmis_bm); | ||
140 | spin_unlock(&odmis_bm_lock); | ||
141 | } | ||
142 | |||
143 | static const struct irq_domain_ops odmi_domain_ops = { | ||
144 | .alloc = odmi_irq_domain_alloc, | ||
145 | .free = odmi_irq_domain_free, | ||
146 | }; | ||
147 | |||
148 | static struct irq_chip odmi_msi_irq_chip = { | ||
149 | .name = "ODMI", | ||
150 | }; | ||
151 | |||
152 | static struct msi_domain_ops odmi_msi_ops = { | ||
153 | }; | ||
154 | |||
155 | static struct msi_domain_info odmi_msi_domain_info = { | ||
156 | .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS), | ||
157 | .ops = &odmi_msi_ops, | ||
158 | .chip = &odmi_msi_irq_chip, | ||
159 | }; | ||
160 | |||
161 | static int __init mvebu_odmi_init(struct device_node *node, | ||
162 | struct device_node *parent) | ||
163 | { | ||
164 | struct irq_domain *inner_domain, *plat_domain; | ||
165 | int ret, i; | ||
166 | |||
167 | if (of_property_read_u32(node, "marvell,odmi-frames", &odmis_count)) | ||
168 | return -EINVAL; | ||
169 | |||
170 | odmis = kcalloc(odmis_count, sizeof(struct odmi_data), GFP_KERNEL); | ||
171 | if (!odmis) | ||
172 | return -ENOMEM; | ||
173 | |||
174 | odmis_bm = kcalloc(BITS_TO_LONGS(odmis_count * NODMIS_PER_FRAME), | ||
175 | sizeof(long), GFP_KERNEL); | ||
176 | if (!odmis_bm) { | ||
177 | ret = -ENOMEM; | ||
178 | goto err_alloc; | ||
179 | } | ||
180 | |||
181 | for (i = 0; i < odmis_count; i++) { | ||
182 | struct odmi_data *odmi = &odmis[i]; | ||
183 | |||
184 | ret = of_address_to_resource(node, i, &odmi->res); | ||
185 | if (ret) | ||
186 | goto err_unmap; | ||
187 | |||
188 | odmi->base = of_io_request_and_map(node, i, "odmi"); | ||
189 | if (IS_ERR(odmi->base)) { | ||
190 | ret = PTR_ERR(odmi->base); | ||
191 | goto err_unmap; | ||
192 | } | ||
193 | |||
194 | if (of_property_read_u32_index(node, "marvell,spi-base", | ||
195 | i, &odmi->spi_base)) { | ||
196 | ret = -EINVAL; | ||
197 | goto err_unmap; | ||
198 | } | ||
199 | } | ||
200 | |||
201 | inner_domain = irq_domain_create_linear(of_node_to_fwnode(node), | ||
202 | odmis_count * NODMIS_PER_FRAME, | ||
203 | &odmi_domain_ops, NULL); | ||
204 | if (!inner_domain) { | ||
205 | ret = -ENOMEM; | ||
206 | goto err_unmap; | ||
207 | } | ||
208 | |||
209 | inner_domain->parent = irq_find_host(parent); | ||
210 | |||
211 | plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(node), | ||
212 | &odmi_msi_domain_info, | ||
213 | inner_domain); | ||
214 | if (!plat_domain) { | ||
215 | ret = -ENOMEM; | ||
216 | goto err_remove_inner; | ||
217 | } | ||
218 | |||
219 | return 0; | ||
220 | |||
221 | err_remove_inner: | ||
222 | irq_domain_remove(inner_domain); | ||
223 | err_unmap: | ||
224 | for (i = 0; i < odmis_count; i++) { | ||
225 | struct odmi_data *odmi = &odmis[i]; | ||
226 | |||
227 | if (odmi->base && !IS_ERR(odmi->base)) | ||
228 | iounmap(odmis[i].base); | ||
229 | } | ||
230 | kfree(odmis_bm); | ||
231 | err_alloc: | ||
232 | kfree(odmis); | ||
233 | return ret; | ||
234 | } | ||
235 | |||
236 | IRQCHIP_DECLARE(mvebu_odmi, "marvell,odmi-controller", mvebu_odmi_init); | ||
diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index efe50845939d..17304705f2cf 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c | |||
@@ -183,7 +183,7 @@ static void __iomem * __init icoll_init_iobase(struct device_node *np) | |||
183 | void __iomem *icoll_base; | 183 | void __iomem *icoll_base; |
184 | 184 | ||
185 | icoll_base = of_io_request_and_map(np, 0, np->name); | 185 | icoll_base = of_io_request_and_map(np, 0, np->name); |
186 | if (!icoll_base) | 186 | if (IS_ERR(icoll_base)) |
187 | panic("%s: unable to map resource", np->full_name); | 187 | panic("%s: unable to map resource", np->full_name); |
188 | return icoll_base; | 188 | return icoll_base; |
189 | } | 189 | } |
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c index 0820f67cc9a7..668730c5cb66 100644 --- a/drivers/irqchip/irq-sunxi-nmi.c +++ b/drivers/irqchip/irq-sunxi-nmi.c | |||
@@ -160,9 +160,9 @@ static int __init sunxi_sc_nmi_irq_init(struct device_node *node, | |||
160 | 160 | ||
161 | gc = irq_get_domain_generic_chip(domain, 0); | 161 | gc = irq_get_domain_generic_chip(domain, 0); |
162 | gc->reg_base = of_io_request_and_map(node, 0, of_node_full_name(node)); | 162 | gc->reg_base = of_io_request_and_map(node, 0, of_node_full_name(node)); |
163 | if (!gc->reg_base) { | 163 | if (IS_ERR(gc->reg_base)) { |
164 | pr_err("unable to map resource\n"); | 164 | pr_err("unable to map resource\n"); |
165 | ret = -ENOMEM; | 165 | ret = PTR_ERR(gc->reg_base); |
166 | goto fail_irqd_remove; | 166 | goto fail_irqd_remove; |
167 | } | 167 | } |
168 | 168 | ||
diff --git a/drivers/irqchip/irq-tango.c b/drivers/irqchip/irq-tango.c new file mode 100644 index 000000000000..bdbb5c0ff7fe --- /dev/null +++ b/drivers/irqchip/irq-tango.c | |||
@@ -0,0 +1,232 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014 Mans Rullgard <mans@mansr.com> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License as published by the | ||
6 | * Free Software Foundation; either version 2 of the License, or (at your | ||
7 | * option) any later version. | ||
8 | */ | ||
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/irq.h> | ||
12 | #include <linux/irqchip.h> | ||
13 | #include <linux/irqchip/chained_irq.h> | ||
14 | #include <linux/ioport.h> | ||
15 | #include <linux/io.h> | ||
16 | #include <linux/of_address.h> | ||
17 | #include <linux/of_irq.h> | ||
18 | #include <linux/slab.h> | ||
19 | |||
20 | #define IRQ0_CTL_BASE 0x0000 | ||
21 | #define IRQ1_CTL_BASE 0x0100 | ||
22 | #define EDGE_CTL_BASE 0x0200 | ||
23 | #define IRQ2_CTL_BASE 0x0300 | ||
24 | |||
25 | #define IRQ_CTL_HI 0x18 | ||
26 | #define EDGE_CTL_HI 0x20 | ||
27 | |||
28 | #define IRQ_STATUS 0x00 | ||
29 | #define IRQ_RAWSTAT 0x04 | ||
30 | #define IRQ_EN_SET 0x08 | ||
31 | #define IRQ_EN_CLR 0x0c | ||
32 | #define IRQ_SOFT_SET 0x10 | ||
33 | #define IRQ_SOFT_CLR 0x14 | ||
34 | |||
35 | #define EDGE_STATUS 0x00 | ||
36 | #define EDGE_RAWSTAT 0x04 | ||
37 | #define EDGE_CFG_RISE 0x08 | ||
38 | #define EDGE_CFG_FALL 0x0c | ||
39 | #define EDGE_CFG_RISE_SET 0x10 | ||
40 | #define EDGE_CFG_RISE_CLR 0x14 | ||
41 | #define EDGE_CFG_FALL_SET 0x18 | ||
42 | #define EDGE_CFG_FALL_CLR 0x1c | ||
43 | |||
44 | struct tangox_irq_chip { | ||
45 | void __iomem *base; | ||
46 | unsigned long ctl; | ||
47 | }; | ||
48 | |||
49 | static inline u32 intc_readl(struct tangox_irq_chip *chip, int reg) | ||
50 | { | ||
51 | return readl_relaxed(chip->base + reg); | ||
52 | } | ||
53 | |||
54 | static inline void intc_writel(struct tangox_irq_chip *chip, int reg, u32 val) | ||
55 | { | ||
56 | writel_relaxed(val, chip->base + reg); | ||
57 | } | ||
58 | |||
59 | static void tangox_dispatch_irqs(struct irq_domain *dom, unsigned int status, | ||
60 | int base) | ||
61 | { | ||
62 | unsigned int hwirq; | ||
63 | unsigned int virq; | ||
64 | |||
65 | while (status) { | ||
66 | hwirq = __ffs(status); | ||
67 | virq = irq_find_mapping(dom, base + hwirq); | ||
68 | if (virq) | ||
69 | generic_handle_irq(virq); | ||
70 | status &= ~BIT(hwirq); | ||
71 | } | ||
72 | } | ||
73 | |||
74 | static void tangox_irq_handler(struct irq_desc *desc) | ||
75 | { | ||
76 | struct irq_domain *dom = irq_desc_get_handler_data(desc); | ||
77 | struct irq_chip *host_chip = irq_desc_get_chip(desc); | ||
78 | struct tangox_irq_chip *chip = dom->host_data; | ||
79 | unsigned int status_lo, status_hi; | ||
80 | |||
81 | chained_irq_enter(host_chip, desc); | ||
82 | |||
83 | status_lo = intc_readl(chip, chip->ctl + IRQ_STATUS); | ||
84 | status_hi = intc_readl(chip, chip->ctl + IRQ_CTL_HI + IRQ_STATUS); | ||
85 | |||
86 | tangox_dispatch_irqs(dom, status_lo, 0); | ||
87 | tangox_dispatch_irqs(dom, status_hi, 32); | ||
88 | |||
89 | chained_irq_exit(host_chip, desc); | ||
90 | } | ||
91 | |||
92 | static int tangox_irq_set_type(struct irq_data *d, unsigned int flow_type) | ||
93 | { | ||
94 | struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); | ||
95 | struct tangox_irq_chip *chip = gc->domain->host_data; | ||
96 | struct irq_chip_regs *regs = &gc->chip_types[0].regs; | ||
97 | |||
98 | switch (flow_type & IRQ_TYPE_SENSE_MASK) { | ||
99 | case IRQ_TYPE_EDGE_RISING: | ||
100 | intc_writel(chip, regs->type + EDGE_CFG_RISE_SET, d->mask); | ||
101 | intc_writel(chip, regs->type + EDGE_CFG_FALL_CLR, d->mask); | ||
102 | break; | ||
103 | |||
104 | case IRQ_TYPE_EDGE_FALLING: | ||
105 | intc_writel(chip, regs->type + EDGE_CFG_RISE_CLR, d->mask); | ||
106 | intc_writel(chip, regs->type + EDGE_CFG_FALL_SET, d->mask); | ||
107 | break; | ||
108 | |||
109 | case IRQ_TYPE_LEVEL_HIGH: | ||
110 | intc_writel(chip, regs->type + EDGE_CFG_RISE_CLR, d->mask); | ||
111 | intc_writel(chip, regs->type + EDGE_CFG_FALL_CLR, d->mask); | ||
112 | break; | ||
113 | |||
114 | case IRQ_TYPE_LEVEL_LOW: | ||
115 | intc_writel(chip, regs->type + EDGE_CFG_RISE_SET, d->mask); | ||
116 | intc_writel(chip, regs->type + EDGE_CFG_FALL_SET, d->mask); | ||
117 | break; | ||
118 | |||
119 | default: | ||
120 | pr_err("Invalid trigger mode %x for IRQ %d\n", | ||
121 | flow_type, d->irq); | ||
122 | return -EINVAL; | ||
123 | } | ||
124 | |||
125 | return irq_setup_alt_chip(d, flow_type); | ||
126 | } | ||
127 | |||
128 | static void __init tangox_irq_init_chip(struct irq_chip_generic *gc, | ||
129 | unsigned long ctl_offs, | ||
130 | unsigned long edge_offs) | ||
131 | { | ||
132 | struct tangox_irq_chip *chip = gc->domain->host_data; | ||
133 | struct irq_chip_type *ct = gc->chip_types; | ||
134 | unsigned long ctl_base = chip->ctl + ctl_offs; | ||
135 | unsigned long edge_base = EDGE_CTL_BASE + edge_offs; | ||
136 | int i; | ||
137 | |||
138 | gc->reg_base = chip->base; | ||
139 | gc->unused = 0; | ||
140 | |||
141 | for (i = 0; i < 2; i++) { | ||
142 | ct[i].chip.irq_ack = irq_gc_ack_set_bit; | ||
143 | ct[i].chip.irq_mask = irq_gc_mask_disable_reg; | ||
144 | ct[i].chip.irq_mask_ack = irq_gc_mask_disable_reg_and_ack; | ||
145 | ct[i].chip.irq_unmask = irq_gc_unmask_enable_reg; | ||
146 | ct[i].chip.irq_set_type = tangox_irq_set_type; | ||
147 | ct[i].chip.name = gc->domain->name; | ||
148 | |||
149 | ct[i].regs.enable = ctl_base + IRQ_EN_SET; | ||
150 | ct[i].regs.disable = ctl_base + IRQ_EN_CLR; | ||
151 | ct[i].regs.ack = edge_base + EDGE_RAWSTAT; | ||
152 | ct[i].regs.type = edge_base; | ||
153 | } | ||
154 | |||
155 | ct[0].type = IRQ_TYPE_LEVEL_MASK; | ||
156 | ct[0].handler = handle_level_irq; | ||
157 | |||
158 | ct[1].type = IRQ_TYPE_EDGE_BOTH; | ||
159 | ct[1].handler = handle_edge_irq; | ||
160 | |||
161 | intc_writel(chip, ct->regs.disable, 0xffffffff); | ||
162 | intc_writel(chip, ct->regs.ack, 0xffffffff); | ||
163 | } | ||
164 | |||
165 | static void __init tangox_irq_domain_init(struct irq_domain *dom) | ||
166 | { | ||
167 | struct irq_chip_generic *gc; | ||
168 | int i; | ||
169 | |||
170 | for (i = 0; i < 2; i++) { | ||
171 | gc = irq_get_domain_generic_chip(dom, i * 32); | ||
172 | tangox_irq_init_chip(gc, i * IRQ_CTL_HI, i * EDGE_CTL_HI); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | static int __init tangox_irq_init(void __iomem *base, struct resource *baseres, | ||
177 | struct device_node *node) | ||
178 | { | ||
179 | struct tangox_irq_chip *chip; | ||
180 | struct irq_domain *dom; | ||
181 | struct resource res; | ||
182 | int irq; | ||
183 | int err; | ||
184 | |||
185 | irq = irq_of_parse_and_map(node, 0); | ||
186 | if (!irq) | ||
187 | panic("%s: failed to get IRQ", node->name); | ||
188 | |||
189 | err = of_address_to_resource(node, 0, &res); | ||
190 | if (err) | ||
191 | panic("%s: failed to get address", node->name); | ||
192 | |||
193 | chip = kzalloc(sizeof(*chip), GFP_KERNEL); | ||
194 | chip->ctl = res.start - baseres->start; | ||
195 | chip->base = base; | ||
196 | |||
197 | dom = irq_domain_add_linear(node, 64, &irq_generic_chip_ops, chip); | ||
198 | if (!dom) | ||
199 | panic("%s: failed to create irqdomain", node->name); | ||
200 | |||
201 | err = irq_alloc_domain_generic_chips(dom, 32, 2, node->name, | ||
202 | handle_level_irq, 0, 0, 0); | ||
203 | if (err) | ||
204 | panic("%s: failed to allocate irqchip", node->name); | ||
205 | |||
206 | tangox_irq_domain_init(dom); | ||
207 | |||
208 | irq_set_chained_handler(irq, tangox_irq_handler); | ||
209 | irq_set_handler_data(irq, dom); | ||
210 | |||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | static int __init tangox_of_irq_init(struct device_node *node, | ||
215 | struct device_node *parent) | ||
216 | { | ||
217 | struct device_node *c; | ||
218 | struct resource res; | ||
219 | void __iomem *base; | ||
220 | |||
221 | base = of_iomap(node, 0); | ||
222 | if (!base) | ||
223 | panic("%s: of_iomap failed", node->name); | ||
224 | |||
225 | of_address_to_resource(node, 0, &res); | ||
226 | |||
227 | for_each_child_of_node(node, c) | ||
228 | tangox_irq_init(base, &res, c); | ||
229 | |||
230 | return 0; | ||
231 | } | ||
232 | IRQCHIP_DECLARE(tangox_intc, "sigma,smp8642-intc", tangox_of_irq_init); | ||
diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c index 4192bdcd2734..2325fb3c482b 100644 --- a/drivers/irqchip/irq-ts4800.c +++ b/drivers/irqchip/irq-ts4800.c | |||
@@ -59,7 +59,7 @@ static int ts4800_irqdomain_map(struct irq_domain *d, unsigned int irq, | |||
59 | return 0; | 59 | return 0; |
60 | } | 60 | } |
61 | 61 | ||
62 | struct irq_domain_ops ts4800_ic_ops = { | 62 | static const struct irq_domain_ops ts4800_ic_ops = { |
63 | .map = ts4800_irqdomain_map, | 63 | .map = ts4800_irqdomain_map, |
64 | .xlate = irq_domain_xlate_onecell, | 64 | .xlate = irq_domain_xlate_onecell, |
65 | }; | 65 | }; |
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index fa593dd3efe1..3772f3ac956e 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig | |||
@@ -83,6 +83,15 @@ config E1000E | |||
83 | To compile this driver as a module, choose M here. The module | 83 | To compile this driver as a module, choose M here. The module |
84 | will be called e1000e. | 84 | will be called e1000e. |
85 | 85 | ||
86 | config E1000E_HWTS | ||
87 | bool "Support HW cross-timestamp on PCH devices" | ||
88 | default y | ||
89 | depends on E1000E && X86 | ||
90 | ---help--- | ||
91 | Say Y to enable hardware supported cross-timestamping on PCH | ||
92 | devices. The cross-timestamp is available through the PTP clock | ||
93 | driver precise cross-timestamp ioctl (PTP_SYS_OFFSET_PRECISE). | ||
94 | |||
86 | config IGB | 95 | config IGB |
87 | tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support" | 96 | tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support" |
88 | depends on PCI | 97 | depends on PCI |
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index f7c7804d79e5..0641c0098738 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h | |||
@@ -528,6 +528,11 @@ | |||
528 | #define E1000_RXCW_C 0x20000000 /* Receive config */ | 528 | #define E1000_RXCW_C 0x20000000 /* Receive config */ |
529 | #define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ | 529 | #define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ |
530 | 530 | ||
531 | /* HH Time Sync */ | ||
532 | #define E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */ | ||
533 | #define E1000_TSYNCTXCTL_SYNC_COMP 0x40000000 /* sync complete */ | ||
534 | #define E1000_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */ | ||
535 | |||
531 | #define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ | 536 | #define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ |
532 | #define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ | 537 | #define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ |
533 | 538 | ||
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 25a0ad5102d6..e2ff3ef75d5d 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c | |||
@@ -26,6 +26,12 @@ | |||
26 | 26 | ||
27 | #include "e1000.h" | 27 | #include "e1000.h" |
28 | 28 | ||
29 | #ifdef CONFIG_E1000E_HWTS | ||
30 | #include <linux/clocksource.h> | ||
31 | #include <linux/ktime.h> | ||
32 | #include <asm/tsc.h> | ||
33 | #endif | ||
34 | |||
29 | /** | 35 | /** |
30 | * e1000e_phc_adjfreq - adjust the frequency of the hardware clock | 36 | * e1000e_phc_adjfreq - adjust the frequency of the hardware clock |
31 | * @ptp: ptp clock structure | 37 | * @ptp: ptp clock structure |
@@ -98,6 +104,78 @@ static int e1000e_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) | |||
98 | return 0; | 104 | return 0; |
99 | } | 105 | } |
100 | 106 | ||
107 | #ifdef CONFIG_E1000E_HWTS | ||
108 | #define MAX_HW_WAIT_COUNT (3) | ||
109 | |||
110 | /** | ||
111 | * e1000e_phc_get_syncdevicetime - Callback given to timekeeping code reads system/device registers | ||
112 | * @device: current device time | ||
113 | * @system: system counter value read synchronously with device time | ||
114 | * @ctx: context provided by timekeeping code | ||
115 | * | ||
116 | * Read device and system (ART) clock simultaneously and return the corrected | ||
117 | * clock values in ns. | ||
118 | **/ | ||
119 | static int e1000e_phc_get_syncdevicetime(ktime_t *device, | ||
120 | struct system_counterval_t *system, | ||
121 | void *ctx) | ||
122 | { | ||
123 | struct e1000_adapter *adapter = (struct e1000_adapter *)ctx; | ||
124 | struct e1000_hw *hw = &adapter->hw; | ||
125 | unsigned long flags; | ||
126 | int i; | ||
127 | u32 tsync_ctrl; | ||
128 | cycle_t dev_cycles; | ||
129 | cycle_t sys_cycles; | ||
130 | |||
131 | tsync_ctrl = er32(TSYNCTXCTL); | ||
132 | tsync_ctrl |= E1000_TSYNCTXCTL_START_SYNC | | ||
133 | E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK; | ||
134 | ew32(TSYNCTXCTL, tsync_ctrl); | ||
135 | for (i = 0; i < MAX_HW_WAIT_COUNT; ++i) { | ||
136 | udelay(1); | ||
137 | tsync_ctrl = er32(TSYNCTXCTL); | ||
138 | if (tsync_ctrl & E1000_TSYNCTXCTL_SYNC_COMP) | ||
139 | break; | ||
140 | } | ||
141 | |||
142 | if (i == MAX_HW_WAIT_COUNT) | ||
143 | return -ETIMEDOUT; | ||
144 | |||
145 | dev_cycles = er32(SYSSTMPH); | ||
146 | dev_cycles <<= 32; | ||
147 | dev_cycles |= er32(SYSSTMPL); | ||
148 | spin_lock_irqsave(&adapter->systim_lock, flags); | ||
149 | *device = ns_to_ktime(timecounter_cyc2time(&adapter->tc, dev_cycles)); | ||
150 | spin_unlock_irqrestore(&adapter->systim_lock, flags); | ||
151 | |||
152 | sys_cycles = er32(PLTSTMPH); | ||
153 | sys_cycles <<= 32; | ||
154 | sys_cycles |= er32(PLTSTMPL); | ||
155 | *system = convert_art_to_tsc(sys_cycles); | ||
156 | |||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | /** | ||
161 | * e1000e_phc_getsynctime - Reads the current system/device cross timestamp | ||
162 | * @ptp: ptp clock structure | ||
163 | * @cts: structure containing timestamp | ||
164 | * | ||
165 | * Read device and system (ART) clock simultaneously and return the scaled | ||
166 | * clock values in ns. | ||
167 | **/ | ||
168 | static int e1000e_phc_getcrosststamp(struct ptp_clock_info *ptp, | ||
169 | struct system_device_crosststamp *xtstamp) | ||
170 | { | ||
171 | struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, | ||
172 | ptp_clock_info); | ||
173 | |||
174 | return get_device_system_crosststamp(e1000e_phc_get_syncdevicetime, | ||
175 | adapter, NULL, xtstamp); | ||
176 | } | ||
177 | #endif/*CONFIG_E1000E_HWTS*/ | ||
178 | |||
101 | /** | 179 | /** |
102 | * e1000e_phc_gettime - Reads the current time from the hardware clock | 180 | * e1000e_phc_gettime - Reads the current time from the hardware clock |
103 | * @ptp: ptp clock structure | 181 | * @ptp: ptp clock structure |
@@ -236,6 +314,13 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) | |||
236 | break; | 314 | break; |
237 | } | 315 | } |
238 | 316 | ||
317 | #ifdef CONFIG_E1000E_HWTS | ||
318 | /* CPU must have ART and GBe must be from Sunrise Point or greater */ | ||
319 | if (hw->mac.type >= e1000_pch_spt && boot_cpu_has(X86_FEATURE_ART)) | ||
320 | adapter->ptp_clock_info.getcrosststamp = | ||
321 | e1000e_phc_getcrosststamp; | ||
322 | #endif/*CONFIG_E1000E_HWTS*/ | ||
323 | |||
239 | INIT_DELAYED_WORK(&adapter->systim_overflow_work, | 324 | INIT_DELAYED_WORK(&adapter->systim_overflow_work, |
240 | e1000e_systim_overflow_work); | 325 | e1000e_systim_overflow_work); |
241 | 326 | ||
diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h index 1d5e0b77062a..0cb4d365e5ad 100644 --- a/drivers/net/ethernet/intel/e1000e/regs.h +++ b/drivers/net/ethernet/intel/e1000e/regs.h | |||
@@ -245,6 +245,10 @@ | |||
245 | #define E1000_SYSTIML 0x0B600 /* System time register Low - RO */ | 245 | #define E1000_SYSTIML 0x0B600 /* System time register Low - RO */ |
246 | #define E1000_SYSTIMH 0x0B604 /* System time register High - RO */ | 246 | #define E1000_SYSTIMH 0x0B604 /* System time register High - RO */ |
247 | #define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ | 247 | #define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ |
248 | #define E1000_SYSSTMPL 0x0B648 /* HH Timesync system stamp low register */ | ||
249 | #define E1000_SYSSTMPH 0x0B64C /* HH Timesync system stamp hi register */ | ||
250 | #define E1000_PLTSTMPL 0x0B640 /* HH Timesync platform stamp low register */ | ||
251 | #define E1000_PLTSTMPH 0x0B644 /* HH Timesync platform stamp hi register */ | ||
248 | #define E1000_RXMTRL 0x0B634 /* Time sync Rx EtherType and Msg Type - RW */ | 252 | #define E1000_RXMTRL 0x0B634 /* Time sync Rx EtherType and Msg Type - RW */ |
249 | #define E1000_RXUDP 0x0B638 /* Time Sync Rx UDP Port - RW */ | 253 | #define E1000_RXUDP 0x0B638 /* Time Sync Rx UDP Port - RW */ |
250 | 254 | ||
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index da7bae991552..579fd65299a0 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/poll.h> | 22 | #include <linux/poll.h> |
23 | #include <linux/sched.h> | 23 | #include <linux/sched.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/timekeeping.h> | ||
25 | 26 | ||
26 | #include "ptp_private.h" | 27 | #include "ptp_private.h" |
27 | 28 | ||
@@ -120,11 +121,13 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) | |||
120 | struct ptp_clock_caps caps; | 121 | struct ptp_clock_caps caps; |
121 | struct ptp_clock_request req; | 122 | struct ptp_clock_request req; |
122 | struct ptp_sys_offset *sysoff = NULL; | 123 | struct ptp_sys_offset *sysoff = NULL; |
124 | struct ptp_sys_offset_precise precise_offset; | ||
123 | struct ptp_pin_desc pd; | 125 | struct ptp_pin_desc pd; |
124 | struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); | 126 | struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); |
125 | struct ptp_clock_info *ops = ptp->info; | 127 | struct ptp_clock_info *ops = ptp->info; |
126 | struct ptp_clock_time *pct; | 128 | struct ptp_clock_time *pct; |
127 | struct timespec64 ts; | 129 | struct timespec64 ts; |
130 | struct system_device_crosststamp xtstamp; | ||
128 | int enable, err = 0; | 131 | int enable, err = 0; |
129 | unsigned int i, pin_index; | 132 | unsigned int i, pin_index; |
130 | 133 | ||
@@ -138,6 +141,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) | |||
138 | caps.n_per_out = ptp->info->n_per_out; | 141 | caps.n_per_out = ptp->info->n_per_out; |
139 | caps.pps = ptp->info->pps; | 142 | caps.pps = ptp->info->pps; |
140 | caps.n_pins = ptp->info->n_pins; | 143 | caps.n_pins = ptp->info->n_pins; |
144 | caps.cross_timestamping = ptp->info->getcrosststamp != NULL; | ||
141 | if (copy_to_user((void __user *)arg, &caps, sizeof(caps))) | 145 | if (copy_to_user((void __user *)arg, &caps, sizeof(caps))) |
142 | err = -EFAULT; | 146 | err = -EFAULT; |
143 | break; | 147 | break; |
@@ -180,6 +184,29 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) | |||
180 | err = ops->enable(ops, &req, enable); | 184 | err = ops->enable(ops, &req, enable); |
181 | break; | 185 | break; |
182 | 186 | ||
187 | case PTP_SYS_OFFSET_PRECISE: | ||
188 | if (!ptp->info->getcrosststamp) { | ||
189 | err = -EOPNOTSUPP; | ||
190 | break; | ||
191 | } | ||
192 | err = ptp->info->getcrosststamp(ptp->info, &xtstamp); | ||
193 | if (err) | ||
194 | break; | ||
195 | |||
196 | ts = ktime_to_timespec64(xtstamp.device); | ||
197 | precise_offset.device.sec = ts.tv_sec; | ||
198 | precise_offset.device.nsec = ts.tv_nsec; | ||
199 | ts = ktime_to_timespec64(xtstamp.sys_realtime); | ||
200 | precise_offset.sys_realtime.sec = ts.tv_sec; | ||
201 | precise_offset.sys_realtime.nsec = ts.tv_nsec; | ||
202 | ts = ktime_to_timespec64(xtstamp.sys_monoraw); | ||
203 | precise_offset.sys_monoraw.sec = ts.tv_sec; | ||
204 | precise_offset.sys_monoraw.nsec = ts.tv_nsec; | ||
205 | if (copy_to_user((void __user *)arg, &precise_offset, | ||
206 | sizeof(precise_offset))) | ||
207 | err = -EFAULT; | ||
208 | break; | ||
209 | |||
183 | case PTP_SYS_OFFSET: | 210 | case PTP_SYS_OFFSET: |
184 | sysoff = kmalloc(sizeof(*sysoff), GFP_KERNEL); | 211 | sysoff = kmalloc(sizeof(*sysoff), GFP_KERNEL); |
185 | if (!sysoff) { | 212 | if (!sysoff) { |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4545e2e2ad45..5699bbc23feb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -931,7 +931,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags) | |||
931 | if (bio_flags & EXTENT_BIO_TREE_LOG) | 931 | if (bio_flags & EXTENT_BIO_TREE_LOG) |
932 | return 0; | 932 | return 0; |
933 | #ifdef CONFIG_X86 | 933 | #ifdef CONFIG_X86 |
934 | if (static_cpu_has_safe(X86_FEATURE_XMM4_2)) | 934 | if (static_cpu_has(X86_FEATURE_XMM4_2)) |
935 | return 0; | 935 | return 0; |
936 | #endif | 936 | #endif |
937 | return 1; | 937 | return 1; |
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index bdcf358dfce2..0d442e34c349 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h | |||
@@ -190,9 +190,9 @@ extern void clockevents_config_and_register(struct clock_event_device *dev, | |||
190 | extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); | 190 | extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); |
191 | 191 | ||
192 | static inline void | 192 | static inline void |
193 | clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) | 193 | clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 maxsec) |
194 | { | 194 | { |
195 | return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec); | 195 | return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, maxsec); |
196 | } | 196 | } |
197 | 197 | ||
198 | extern void clockevents_suspend(void); | 198 | extern void clockevents_suspend(void); |
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 6013021a3b39..a307bf62974f 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h | |||
@@ -118,6 +118,23 @@ struct clocksource { | |||
118 | /* simplify initialization of mask field */ | 118 | /* simplify initialization of mask field */ |
119 | #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) | 119 | #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) |
120 | 120 | ||
121 | static inline u32 clocksource_freq2mult(u32 freq, u32 shift_constant, u64 from) | ||
122 | { | ||
123 | /* freq = cyc/from | ||
124 | * mult/2^shift = ns/cyc | ||
125 | * mult = ns/cyc * 2^shift | ||
126 | * mult = from/freq * 2^shift | ||
127 | * mult = from * 2^shift / freq | ||
128 | * mult = (from<<shift) / freq | ||
129 | */ | ||
130 | u64 tmp = ((u64)from) << shift_constant; | ||
131 | |||
132 | tmp += freq/2; /* round for do_div */ | ||
133 | do_div(tmp, freq); | ||
134 | |||
135 | return (u32)tmp; | ||
136 | } | ||
137 | |||
121 | /** | 138 | /** |
122 | * clocksource_khz2mult - calculates mult from khz and shift | 139 | * clocksource_khz2mult - calculates mult from khz and shift |
123 | * @khz: Clocksource frequency in KHz | 140 | * @khz: Clocksource frequency in KHz |
@@ -128,19 +145,7 @@ struct clocksource { | |||
128 | */ | 145 | */ |
129 | static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant) | 146 | static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant) |
130 | { | 147 | { |
131 | /* khz = cyc/(Million ns) | 148 | return clocksource_freq2mult(khz, shift_constant, NSEC_PER_MSEC); |
132 | * mult/2^shift = ns/cyc | ||
133 | * mult = ns/cyc * 2^shift | ||
134 | * mult = 1Million/khz * 2^shift | ||
135 | * mult = 1000000 * 2^shift / khz | ||
136 | * mult = (1000000<<shift) / khz | ||
137 | */ | ||
138 | u64 tmp = ((u64)1000000) << shift_constant; | ||
139 | |||
140 | tmp += khz/2; /* round for do_div */ | ||
141 | do_div(tmp, khz); | ||
142 | |||
143 | return (u32)tmp; | ||
144 | } | 149 | } |
145 | 150 | ||
146 | /** | 151 | /** |
@@ -154,19 +159,7 @@ static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant) | |||
154 | */ | 159 | */ |
155 | static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) | 160 | static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) |
156 | { | 161 | { |
157 | /* hz = cyc/(Billion ns) | 162 | return clocksource_freq2mult(hz, shift_constant, NSEC_PER_SEC); |
158 | * mult/2^shift = ns/cyc | ||
159 | * mult = ns/cyc * 2^shift | ||
160 | * mult = 1Billion/hz * 2^shift | ||
161 | * mult = 1000000000 * 2^shift / hz | ||
162 | * mult = (1000000000<<shift) / hz | ||
163 | */ | ||
164 | u64 tmp = ((u64)1000000000) << shift_constant; | ||
165 | |||
166 | tmp += hz/2; /* round for do_div */ | ||
167 | do_div(tmp, hz); | ||
168 | |||
169 | return (u32)tmp; | ||
170 | } | 163 | } |
171 | 164 | ||
172 | /** | 165 | /** |
diff --git a/include/linux/compiler.h b/include/linux/compiler.h index a27f4f17c382..b5ff9881bef8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h | |||
@@ -20,12 +20,14 @@ | |||
20 | # define __pmem __attribute__((noderef, address_space(5))) | 20 | # define __pmem __attribute__((noderef, address_space(5))) |
21 | #ifdef CONFIG_SPARSE_RCU_POINTER | 21 | #ifdef CONFIG_SPARSE_RCU_POINTER |
22 | # define __rcu __attribute__((noderef, address_space(4))) | 22 | # define __rcu __attribute__((noderef, address_space(4))) |
23 | #else | 23 | #else /* CONFIG_SPARSE_RCU_POINTER */ |
24 | # define __rcu | 24 | # define __rcu |
25 | #endif | 25 | #endif /* CONFIG_SPARSE_RCU_POINTER */ |
26 | # define __private __attribute__((noderef)) | ||
26 | extern void __chk_user_ptr(const volatile void __user *); | 27 | extern void __chk_user_ptr(const volatile void __user *); |
27 | extern void __chk_io_ptr(const volatile void __iomem *); | 28 | extern void __chk_io_ptr(const volatile void __iomem *); |
28 | #else | 29 | # define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) |
30 | #else /* __CHECKER__ */ | ||
29 | # define __user | 31 | # define __user |
30 | # define __kernel | 32 | # define __kernel |
31 | # define __safe | 33 | # define __safe |
@@ -44,7 +46,9 @@ extern void __chk_io_ptr(const volatile void __iomem *); | |||
44 | # define __percpu | 46 | # define __percpu |
45 | # define __rcu | 47 | # define __rcu |
46 | # define __pmem | 48 | # define __pmem |
47 | #endif | 49 | # define __private |
50 | # define ACCESS_PRIVATE(p, member) ((p)->member) | ||
51 | #endif /* __CHECKER__ */ | ||
48 | 52 | ||
49 | /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ | 53 | /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ |
50 | #define ___PASTE(a,b) a##b | 54 | #define ___PASTE(a,b) a##b |
diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d2ca8c38f9c4..f9b1fab4388a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/node.h> | 16 | #include <linux/node.h> |
17 | #include <linux/compiler.h> | 17 | #include <linux/compiler.h> |
18 | #include <linux/cpumask.h> | 18 | #include <linux/cpumask.h> |
19 | #include <linux/cpuhotplug.h> | ||
19 | 20 | ||
20 | struct device; | 21 | struct device; |
21 | struct device_node; | 22 | struct device_node; |
@@ -27,6 +28,9 @@ struct cpu { | |||
27 | struct device dev; | 28 | struct device dev; |
28 | }; | 29 | }; |
29 | 30 | ||
31 | extern void boot_cpu_init(void); | ||
32 | extern void boot_cpu_state_init(void); | ||
33 | |||
30 | extern int register_cpu(struct cpu *cpu, int num); | 34 | extern int register_cpu(struct cpu *cpu, int num); |
31 | extern struct device *get_cpu_device(unsigned cpu); | 35 | extern struct device *get_cpu_device(unsigned cpu); |
32 | extern bool cpu_is_hotpluggable(unsigned cpu); | 36 | extern bool cpu_is_hotpluggable(unsigned cpu); |
@@ -74,7 +78,7 @@ enum { | |||
74 | /* migration should happen before other stuff but after perf */ | 78 | /* migration should happen before other stuff but after perf */ |
75 | CPU_PRI_PERF = 20, | 79 | CPU_PRI_PERF = 20, |
76 | CPU_PRI_MIGRATION = 10, | 80 | CPU_PRI_MIGRATION = 10, |
77 | CPU_PRI_SMPBOOT = 9, | 81 | |
78 | /* bring up workqueues before normal notifiers and down after */ | 82 | /* bring up workqueues before normal notifiers and down after */ |
79 | CPU_PRI_WORKQUEUE_UP = 5, | 83 | CPU_PRI_WORKQUEUE_UP = 5, |
80 | CPU_PRI_WORKQUEUE_DOWN = -5, | 84 | CPU_PRI_WORKQUEUE_DOWN = -5, |
@@ -97,9 +101,7 @@ enum { | |||
97 | * Called on the new cpu, just before | 101 | * Called on the new cpu, just before |
98 | * enabling interrupts. Must not sleep, | 102 | * enabling interrupts. Must not sleep, |
99 | * must not fail */ | 103 | * must not fail */ |
100 | #define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached | 104 | #define CPU_BROKEN 0x000B /* CPU (unsigned)v did not die properly, |
101 | * idle loop. */ | ||
102 | #define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly, | ||
103 | * perhaps due to preemption. */ | 105 | * perhaps due to preemption. */ |
104 | 106 | ||
105 | /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend | 107 | /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend |
@@ -118,6 +120,7 @@ enum { | |||
118 | 120 | ||
119 | 121 | ||
120 | #ifdef CONFIG_SMP | 122 | #ifdef CONFIG_SMP |
123 | extern bool cpuhp_tasks_frozen; | ||
121 | /* Need to know about CPUs going up/down? */ | 124 | /* Need to know about CPUs going up/down? */ |
122 | #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) | 125 | #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) |
123 | #define cpu_notifier(fn, pri) { \ | 126 | #define cpu_notifier(fn, pri) { \ |
@@ -167,7 +170,6 @@ static inline void __unregister_cpu_notifier(struct notifier_block *nb) | |||
167 | } | 170 | } |
168 | #endif | 171 | #endif |
169 | 172 | ||
170 | void smpboot_thread_init(void); | ||
171 | int cpu_up(unsigned int cpu); | 173 | int cpu_up(unsigned int cpu); |
172 | void notify_cpu_starting(unsigned int cpu); | 174 | void notify_cpu_starting(unsigned int cpu); |
173 | extern void cpu_maps_update_begin(void); | 175 | extern void cpu_maps_update_begin(void); |
@@ -177,6 +179,7 @@ extern void cpu_maps_update_done(void); | |||
177 | #define cpu_notifier_register_done cpu_maps_update_done | 179 | #define cpu_notifier_register_done cpu_maps_update_done |
178 | 180 | ||
179 | #else /* CONFIG_SMP */ | 181 | #else /* CONFIG_SMP */ |
182 | #define cpuhp_tasks_frozen 0 | ||
180 | 183 | ||
181 | #define cpu_notifier(fn, pri) do { (void)(fn); } while (0) | 184 | #define cpu_notifier(fn, pri) do { (void)(fn); } while (0) |
182 | #define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) | 185 | #define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) |
@@ -215,10 +218,6 @@ static inline void cpu_notifier_register_done(void) | |||
215 | { | 218 | { |
216 | } | 219 | } |
217 | 220 | ||
218 | static inline void smpboot_thread_init(void) | ||
219 | { | ||
220 | } | ||
221 | |||
222 | #endif /* CONFIG_SMP */ | 221 | #endif /* CONFIG_SMP */ |
223 | extern struct bus_type cpu_subsys; | 222 | extern struct bus_type cpu_subsys; |
224 | 223 | ||
@@ -265,11 +264,6 @@ static inline int disable_nonboot_cpus(void) { return 0; } | |||
265 | static inline void enable_nonboot_cpus(void) {} | 264 | static inline void enable_nonboot_cpus(void) {} |
266 | #endif /* !CONFIG_PM_SLEEP_SMP */ | 265 | #endif /* !CONFIG_PM_SLEEP_SMP */ |
267 | 266 | ||
268 | enum cpuhp_state { | ||
269 | CPUHP_OFFLINE, | ||
270 | CPUHP_ONLINE, | ||
271 | }; | ||
272 | |||
273 | void cpu_startup_entry(enum cpuhp_state state); | 267 | void cpu_startup_entry(enum cpuhp_state state); |
274 | 268 | ||
275 | void cpu_idle_poll_ctrl(bool enable); | 269 | void cpu_idle_poll_ctrl(bool enable); |
@@ -280,14 +274,15 @@ void arch_cpu_idle_enter(void); | |||
280 | void arch_cpu_idle_exit(void); | 274 | void arch_cpu_idle_exit(void); |
281 | void arch_cpu_idle_dead(void); | 275 | void arch_cpu_idle_dead(void); |
282 | 276 | ||
283 | DECLARE_PER_CPU(bool, cpu_dead_idle); | ||
284 | |||
285 | int cpu_report_state(int cpu); | 277 | int cpu_report_state(int cpu); |
286 | int cpu_check_up_prepare(int cpu); | 278 | int cpu_check_up_prepare(int cpu); |
287 | void cpu_set_state_online(int cpu); | 279 | void cpu_set_state_online(int cpu); |
288 | #ifdef CONFIG_HOTPLUG_CPU | 280 | #ifdef CONFIG_HOTPLUG_CPU |
289 | bool cpu_wait_death(unsigned int cpu, int seconds); | 281 | bool cpu_wait_death(unsigned int cpu, int seconds); |
290 | bool cpu_report_death(void); | 282 | bool cpu_report_death(void); |
283 | void cpuhp_report_idle_dead(void); | ||
284 | #else | ||
285 | static inline void cpuhp_report_idle_dead(void) { } | ||
291 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 286 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
292 | 287 | ||
293 | #endif /* _LINUX_CPU_H_ */ | 288 | #endif /* _LINUX_CPU_H_ */ |
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h new file mode 100644 index 000000000000..5d68e15e46b7 --- /dev/null +++ b/include/linux/cpuhotplug.h | |||
@@ -0,0 +1,93 @@ | |||
1 | #ifndef __CPUHOTPLUG_H | ||
2 | #define __CPUHOTPLUG_H | ||
3 | |||
4 | enum cpuhp_state { | ||
5 | CPUHP_OFFLINE, | ||
6 | CPUHP_CREATE_THREADS, | ||
7 | CPUHP_NOTIFY_PREPARE, | ||
8 | CPUHP_BRINGUP_CPU, | ||
9 | CPUHP_AP_IDLE_DEAD, | ||
10 | CPUHP_AP_OFFLINE, | ||
11 | CPUHP_AP_NOTIFY_STARTING, | ||
12 | CPUHP_AP_ONLINE, | ||
13 | CPUHP_TEARDOWN_CPU, | ||
14 | CPUHP_AP_ONLINE_IDLE, | ||
15 | CPUHP_AP_SMPBOOT_THREADS, | ||
16 | CPUHP_AP_NOTIFY_ONLINE, | ||
17 | CPUHP_AP_ONLINE_DYN, | ||
18 | CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, | ||
19 | CPUHP_ONLINE, | ||
20 | }; | ||
21 | |||
22 | int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke, | ||
23 | int (*startup)(unsigned int cpu), | ||
24 | int (*teardown)(unsigned int cpu)); | ||
25 | |||
26 | /** | ||
27 | * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks | ||
28 | * @state: The state for which the calls are installed | ||
29 | * @name: Name of the callback (will be used in debug output) | ||
30 | * @startup: startup callback function | ||
31 | * @teardown: teardown callback function | ||
32 | * | ||
33 | * Installs the callback functions and invokes the startup callback on | ||
34 | * the present cpus which have already reached the @state. | ||
35 | */ | ||
36 | static inline int cpuhp_setup_state(enum cpuhp_state state, | ||
37 | const char *name, | ||
38 | int (*startup)(unsigned int cpu), | ||
39 | int (*teardown)(unsigned int cpu)) | ||
40 | { | ||
41 | return __cpuhp_setup_state(state, name, true, startup, teardown); | ||
42 | } | ||
43 | |||
44 | /** | ||
45 | * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the | ||
46 | * callbacks | ||
47 | * @state: The state for which the calls are installed | ||
48 | * @name: Name of the callback. | ||
49 | * @startup: startup callback function | ||
50 | * @teardown: teardown callback function | ||
51 | * | ||
52 | * Same as @cpuhp_setup_state except that no calls are executed are invoked | ||
53 | * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n. | ||
54 | */ | ||
55 | static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state, | ||
56 | const char *name, | ||
57 | int (*startup)(unsigned int cpu), | ||
58 | int (*teardown)(unsigned int cpu)) | ||
59 | { | ||
60 | return __cpuhp_setup_state(state, name, false, startup, teardown); | ||
61 | } | ||
62 | |||
63 | void __cpuhp_remove_state(enum cpuhp_state state, bool invoke); | ||
64 | |||
65 | /** | ||
66 | * cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown | ||
67 | * @state: The state for which the calls are removed | ||
68 | * | ||
69 | * Removes the callback functions and invokes the teardown callback on | ||
70 | * the present cpus which have already reached the @state. | ||
71 | */ | ||
72 | static inline void cpuhp_remove_state(enum cpuhp_state state) | ||
73 | { | ||
74 | __cpuhp_remove_state(state, true); | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking | ||
79 | * teardown | ||
80 | * @state: The state for which the calls are removed | ||
81 | */ | ||
82 | static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state) | ||
83 | { | ||
84 | __cpuhp_remove_state(state, false); | ||
85 | } | ||
86 | |||
87 | #ifdef CONFIG_SMP | ||
88 | void cpuhp_online_idle(enum cpuhp_state state); | ||
89 | #else | ||
90 | static inline void cpuhp_online_idle(enum cpuhp_state state) { } | ||
91 | #endif | ||
92 | |||
93 | #endif | ||
diff --git a/include/linux/irq.h b/include/linux/irq.h index 3c1c96786248..c4de62348ff2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h | |||
@@ -133,17 +133,23 @@ struct irq_domain; | |||
133 | * Use accessor functions to deal with it | 133 | * Use accessor functions to deal with it |
134 | * @node: node index useful for balancing | 134 | * @node: node index useful for balancing |
135 | * @handler_data: per-IRQ data for the irq_chip methods | 135 | * @handler_data: per-IRQ data for the irq_chip methods |
136 | * @affinity: IRQ affinity on SMP | 136 | * @affinity: IRQ affinity on SMP. If this is an IPI |
137 | * related irq, then this is the mask of the | ||
138 | * CPUs to which an IPI can be sent. | ||
137 | * @msi_desc: MSI descriptor | 139 | * @msi_desc: MSI descriptor |
140 | * @ipi_offset: Offset of first IPI target cpu in @affinity. Optional. | ||
138 | */ | 141 | */ |
139 | struct irq_common_data { | 142 | struct irq_common_data { |
140 | unsigned int state_use_accessors; | 143 | unsigned int __private state_use_accessors; |
141 | #ifdef CONFIG_NUMA | 144 | #ifdef CONFIG_NUMA |
142 | unsigned int node; | 145 | unsigned int node; |
143 | #endif | 146 | #endif |
144 | void *handler_data; | 147 | void *handler_data; |
145 | struct msi_desc *msi_desc; | 148 | struct msi_desc *msi_desc; |
146 | cpumask_var_t affinity; | 149 | cpumask_var_t affinity; |
150 | #ifdef CONFIG_GENERIC_IRQ_IPI | ||
151 | unsigned int ipi_offset; | ||
152 | #endif | ||
147 | }; | 153 | }; |
148 | 154 | ||
149 | /** | 155 | /** |
@@ -208,7 +214,7 @@ enum { | |||
208 | IRQD_FORWARDED_TO_VCPU = (1 << 20), | 214 | IRQD_FORWARDED_TO_VCPU = (1 << 20), |
209 | }; | 215 | }; |
210 | 216 | ||
211 | #define __irqd_to_state(d) ((d)->common->state_use_accessors) | 217 | #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) |
212 | 218 | ||
213 | static inline bool irqd_is_setaffinity_pending(struct irq_data *d) | 219 | static inline bool irqd_is_setaffinity_pending(struct irq_data *d) |
214 | { | 220 | { |
@@ -299,6 +305,8 @@ static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d) | |||
299 | __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU; | 305 | __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU; |
300 | } | 306 | } |
301 | 307 | ||
308 | #undef __irqd_to_state | ||
309 | |||
302 | static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) | 310 | static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) |
303 | { | 311 | { |
304 | return d->hwirq; | 312 | return d->hwirq; |
@@ -341,6 +349,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) | |||
341 | * @irq_get_irqchip_state: return the internal state of an interrupt | 349 | * @irq_get_irqchip_state: return the internal state of an interrupt |
342 | * @irq_set_irqchip_state: set the internal state of a interrupt | 350 | * @irq_set_irqchip_state: set the internal state of a interrupt |
343 | * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine | 351 | * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine |
352 | * @ipi_send_single: send a single IPI to destination cpus | ||
353 | * @ipi_send_mask: send an IPI to destination cpus in cpumask | ||
344 | * @flags: chip specific flags | 354 | * @flags: chip specific flags |
345 | */ | 355 | */ |
346 | struct irq_chip { | 356 | struct irq_chip { |
@@ -385,6 +395,9 @@ struct irq_chip { | |||
385 | 395 | ||
386 | int (*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info); | 396 | int (*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info); |
387 | 397 | ||
398 | void (*ipi_send_single)(struct irq_data *data, unsigned int cpu); | ||
399 | void (*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest); | ||
400 | |||
388 | unsigned long flags; | 401 | unsigned long flags; |
389 | }; | 402 | }; |
390 | 403 | ||
@@ -934,4 +947,12 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc, | |||
934 | return readl(gc->reg_base + reg_offset); | 947 | return readl(gc->reg_base + reg_offset); |
935 | } | 948 | } |
936 | 949 | ||
950 | /* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */ | ||
951 | #define INVALID_HWIRQ (~0UL) | ||
952 | irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu); | ||
953 | int __ipi_send_single(struct irq_desc *desc, unsigned int cpu); | ||
954 | int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest); | ||
955 | int ipi_send_single(unsigned int virq, unsigned int cpu); | ||
956 | int ipi_send_mask(unsigned int virq, const struct cpumask *dest); | ||
957 | |||
937 | #endif /* _LINUX_IRQ_H */ | 958 | #endif /* _LINUX_IRQ_H */ |
diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index ce824db48d64..80f89e4a29ac 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h | |||
@@ -261,9 +261,6 @@ extern void gic_write_compare(cycle_t cnt); | |||
261 | extern void gic_write_cpu_compare(cycle_t cnt, int cpu); | 261 | extern void gic_write_cpu_compare(cycle_t cnt, int cpu); |
262 | extern void gic_start_count(void); | 262 | extern void gic_start_count(void); |
263 | extern void gic_stop_count(void); | 263 | extern void gic_stop_count(void); |
264 | extern void gic_send_ipi(unsigned int intr); | ||
265 | extern unsigned int plat_ipi_call_int_xlate(unsigned int); | ||
266 | extern unsigned int plat_ipi_resched_int_xlate(unsigned int); | ||
267 | extern int gic_get_c0_compare_int(void); | 264 | extern int gic_get_c0_compare_int(void); |
268 | extern int gic_get_c0_perfcount_int(void); | 265 | extern int gic_get_c0_perfcount_int(void); |
269 | extern int gic_get_c0_fdc_int(void); | 266 | extern int gic_get_c0_fdc_int(void); |
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 04579d9fbce4..ed48594e96d2 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h | |||
@@ -74,6 +74,7 @@ enum irq_domain_bus_token { | |||
74 | DOMAIN_BUS_PCI_MSI, | 74 | DOMAIN_BUS_PCI_MSI, |
75 | DOMAIN_BUS_PLATFORM_MSI, | 75 | DOMAIN_BUS_PLATFORM_MSI, |
76 | DOMAIN_BUS_NEXUS, | 76 | DOMAIN_BUS_NEXUS, |
77 | DOMAIN_BUS_IPI, | ||
77 | }; | 78 | }; |
78 | 79 | ||
79 | /** | 80 | /** |
@@ -172,6 +173,12 @@ enum { | |||
172 | /* Core calls alloc/free recursive through the domain hierarchy. */ | 173 | /* Core calls alloc/free recursive through the domain hierarchy. */ |
173 | IRQ_DOMAIN_FLAG_AUTO_RECURSIVE = (1 << 1), | 174 | IRQ_DOMAIN_FLAG_AUTO_RECURSIVE = (1 << 1), |
174 | 175 | ||
176 | /* Irq domain is an IPI domain with virq per cpu */ | ||
177 | IRQ_DOMAIN_FLAG_IPI_PER_CPU = (1 << 2), | ||
178 | |||
179 | /* Irq domain is an IPI domain with single virq */ | ||
180 | IRQ_DOMAIN_FLAG_IPI_SINGLE = (1 << 3), | ||
181 | |||
175 | /* | 182 | /* |
176 | * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved | 183 | * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved |
177 | * for implementation specific purposes and ignored by the | 184 | * for implementation specific purposes and ignored by the |
@@ -206,6 +213,8 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, | |||
206 | extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, | 213 | extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, |
207 | enum irq_domain_bus_token bus_token); | 214 | enum irq_domain_bus_token bus_token); |
208 | extern void irq_set_default_host(struct irq_domain *host); | 215 | extern void irq_set_default_host(struct irq_domain *host); |
216 | extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, | ||
217 | irq_hw_number_t hwirq, int node); | ||
209 | 218 | ||
210 | static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) | 219 | static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) |
211 | { | 220 | { |
@@ -335,6 +344,11 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, | |||
335 | const u32 *intspec, unsigned int intsize, | 344 | const u32 *intspec, unsigned int intsize, |
336 | irq_hw_number_t *out_hwirq, unsigned int *out_type); | 345 | irq_hw_number_t *out_hwirq, unsigned int *out_type); |
337 | 346 | ||
347 | /* IPI functions */ | ||
348 | unsigned int irq_reserve_ipi(struct irq_domain *domain, | ||
349 | const struct cpumask *dest); | ||
350 | void irq_destroy_ipi(unsigned int irq); | ||
351 | |||
338 | /* V2 interfaces to support hierarchy IRQ domains. */ | 352 | /* V2 interfaces to support hierarchy IRQ domains. */ |
339 | extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, | 353 | extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, |
340 | unsigned int virq); | 354 | unsigned int virq); |
@@ -400,6 +414,22 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) | |||
400 | { | 414 | { |
401 | return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY; | 415 | return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY; |
402 | } | 416 | } |
417 | |||
418 | static inline bool irq_domain_is_ipi(struct irq_domain *domain) | ||
419 | { | ||
420 | return domain->flags & | ||
421 | (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE); | ||
422 | } | ||
423 | |||
424 | static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) | ||
425 | { | ||
426 | return domain->flags & IRQ_DOMAIN_FLAG_IPI_PER_CPU; | ||
427 | } | ||
428 | |||
429 | static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) | ||
430 | { | ||
431 | return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE; | ||
432 | } | ||
403 | #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ | 433 | #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ |
404 | static inline void irq_domain_activate_irq(struct irq_data *data) { } | 434 | static inline void irq_domain_activate_irq(struct irq_data *data) { } |
405 | static inline void irq_domain_deactivate_irq(struct irq_data *data) { } | 435 | static inline void irq_domain_deactivate_irq(struct irq_data *data) { } |
@@ -413,6 +443,21 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) | |||
413 | { | 443 | { |
414 | return false; | 444 | return false; |
415 | } | 445 | } |
446 | |||
447 | static inline bool irq_domain_is_ipi(struct irq_domain *domain) | ||
448 | { | ||
449 | return false; | ||
450 | } | ||
451 | |||
452 | static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) | ||
453 | { | ||
454 | return false; | ||
455 | } | ||
456 | |||
457 | static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) | ||
458 | { | ||
459 | return false; | ||
460 | } | ||
416 | #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ | 461 | #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ |
417 | 462 | ||
418 | #else /* CONFIG_IRQ_DOMAIN */ | 463 | #else /* CONFIG_IRQ_DOMAIN */ |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 2b6e22782699..3579d1e2fe3a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -2139,6 +2139,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr, | |||
2139 | int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); | 2139 | int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); |
2140 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | 2140 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
2141 | unsigned long pfn); | 2141 | unsigned long pfn); |
2142 | int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, | ||
2143 | unsigned long pfn, pgprot_t pgprot); | ||
2142 | int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, | 2144 | int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, |
2143 | pfn_t pfn); | 2145 | pfn_t pfn); |
2144 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); | 2146 | int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 624b78b848b8..944b2b37313b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -566,10 +566,26 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm) | |||
566 | } | 566 | } |
567 | #endif | 567 | #endif |
568 | 568 | ||
569 | struct vm_special_mapping | 569 | struct vm_fault; |
570 | { | 570 | |
571 | const char *name; | 571 | struct vm_special_mapping { |
572 | const char *name; /* The name, e.g. "[vdso]". */ | ||
573 | |||
574 | /* | ||
575 | * If .fault is not provided, this points to a | ||
576 | * NULL-terminated array of pages that back the special mapping. | ||
577 | * | ||
578 | * This must not be NULL unless .fault is provided. | ||
579 | */ | ||
572 | struct page **pages; | 580 | struct page **pages; |
581 | |||
582 | /* | ||
583 | * If non-NULL, then this is called to resolve page faults | ||
584 | * on the special mapping. If used, .pages is not checked. | ||
585 | */ | ||
586 | int (*fault)(const struct vm_special_mapping *sm, | ||
587 | struct vm_area_struct *vma, | ||
588 | struct vm_fault *vmf); | ||
573 | }; | 589 | }; |
574 | 590 | ||
575 | enum tlb_flush_reason { | 591 | enum tlb_flush_reason { |
diff --git a/include/linux/notifier.h b/include/linux/notifier.h index d14a4c362465..4149868de4e6 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h | |||
@@ -47,6 +47,8 @@ | |||
47 | * runtime initialization. | 47 | * runtime initialization. |
48 | */ | 48 | */ |
49 | 49 | ||
50 | struct notifier_block; | ||
51 | |||
50 | typedef int (*notifier_fn_t)(struct notifier_block *nb, | 52 | typedef int (*notifier_fn_t)(struct notifier_block *nb, |
51 | unsigned long action, void *data); | 53 | unsigned long action, void *data); |
52 | 54 | ||
diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index 54bf1484d41f..35ac903956c7 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h | |||
@@ -111,22 +111,17 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt, | |||
111 | kt->nsec = ts.tv_nsec; | 111 | kt->nsec = ts.tv_nsec; |
112 | } | 112 | } |
113 | 113 | ||
114 | #ifdef CONFIG_NTP_PPS | ||
115 | |||
116 | static inline void pps_get_ts(struct pps_event_time *ts) | 114 | static inline void pps_get_ts(struct pps_event_time *ts) |
117 | { | 115 | { |
118 | ktime_get_raw_and_real_ts64(&ts->ts_raw, &ts->ts_real); | 116 | struct system_time_snapshot snap; |
119 | } | ||
120 | 117 | ||
121 | #else /* CONFIG_NTP_PPS */ | 118 | ktime_get_snapshot(&snap); |
122 | 119 | ts->ts_real = ktime_to_timespec64(snap.real); | |
123 | static inline void pps_get_ts(struct pps_event_time *ts) | 120 | #ifdef CONFIG_NTP_PPS |
124 | { | 121 | ts->ts_raw = ktime_to_timespec64(snap.raw); |
125 | ktime_get_real_ts64(&ts->ts_real); | 122 | #endif |
126 | } | 123 | } |
127 | 124 | ||
128 | #endif /* CONFIG_NTP_PPS */ | ||
129 | |||
130 | /* Subtract known time delay from PPS event time(s) */ | 125 | /* Subtract known time delay from PPS event time(s) */ |
131 | static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta) | 126 | static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta) |
132 | { | 127 | { |
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index b8b73066d137..6b15e168148a 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h | |||
@@ -38,6 +38,7 @@ struct ptp_clock_request { | |||
38 | }; | 38 | }; |
39 | }; | 39 | }; |
40 | 40 | ||
41 | struct system_device_crosststamp; | ||
41 | /** | 42 | /** |
42 | * struct ptp_clock_info - decribes a PTP hardware clock | 43 | * struct ptp_clock_info - decribes a PTP hardware clock |
43 | * | 44 | * |
@@ -67,6 +68,11 @@ struct ptp_clock_request { | |||
67 | * @gettime64: Reads the current time from the hardware clock. | 68 | * @gettime64: Reads the current time from the hardware clock. |
68 | * parameter ts: Holds the result. | 69 | * parameter ts: Holds the result. |
69 | * | 70 | * |
71 | * @getcrosststamp: Reads the current time from the hardware clock and | ||
72 | * system clock simultaneously. | ||
73 | * parameter cts: Contains timestamp (device,system) pair, | ||
74 | * where system time is realtime and monotonic. | ||
75 | * | ||
70 | * @settime64: Set the current time on the hardware clock. | 76 | * @settime64: Set the current time on the hardware clock. |
71 | * parameter ts: Time value to set. | 77 | * parameter ts: Time value to set. |
72 | * | 78 | * |
@@ -105,6 +111,8 @@ struct ptp_clock_info { | |||
105 | int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); | 111 | int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); |
106 | int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); | 112 | int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); |
107 | int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); | 113 | int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); |
114 | int (*getcrosststamp)(struct ptp_clock_info *ptp, | ||
115 | struct system_device_crosststamp *cts); | ||
108 | int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); | 116 | int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); |
109 | int (*enable)(struct ptp_clock_info *ptp, | 117 | int (*enable)(struct ptp_clock_info *ptp, |
110 | struct ptp_clock_request *request, int on); | 118 | struct ptp_clock_request *request, int on); |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 14e6f47ee16f..2657aff2725b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -332,9 +332,7 @@ void rcu_init(void); | |||
332 | void rcu_sched_qs(void); | 332 | void rcu_sched_qs(void); |
333 | void rcu_bh_qs(void); | 333 | void rcu_bh_qs(void); |
334 | void rcu_check_callbacks(int user); | 334 | void rcu_check_callbacks(int user); |
335 | struct notifier_block; | 335 | void rcu_report_dead(unsigned int cpu); |
336 | int rcu_cpu_notify(struct notifier_block *self, | ||
337 | unsigned long action, void *hcpu); | ||
338 | 336 | ||
339 | #ifndef CONFIG_TINY_RCU | 337 | #ifndef CONFIG_TINY_RCU |
340 | void rcu_end_inkernel_boot(void); | 338 | void rcu_end_inkernel_boot(void); |
@@ -360,8 +358,6 @@ void rcu_user_exit(void); | |||
360 | #else | 358 | #else |
361 | static inline void rcu_user_enter(void) { } | 359 | static inline void rcu_user_enter(void) { } |
362 | static inline void rcu_user_exit(void) { } | 360 | static inline void rcu_user_exit(void) { } |
363 | static inline void rcu_user_hooks_switch(struct task_struct *prev, | ||
364 | struct task_struct *next) { } | ||
365 | #endif /* CONFIG_NO_HZ_FULL */ | 361 | #endif /* CONFIG_NO_HZ_FULL */ |
366 | 362 | ||
367 | #ifdef CONFIG_RCU_NOCB_CPU | 363 | #ifdef CONFIG_RCU_NOCB_CPU |
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f5f80c5643ac..dc8eb63c6568 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h | |||
@@ -99,8 +99,23 @@ void process_srcu(struct work_struct *work); | |||
99 | } | 99 | } |
100 | 100 | ||
101 | /* | 101 | /* |
102 | * define and init a srcu struct at build time. | 102 | * Define and initialize a srcu struct at build time. |
103 | * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it. | 103 | * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. |
104 | * | ||
105 | * Note that although DEFINE_STATIC_SRCU() hides the name from other | ||
106 | * files, the per-CPU variable rules nevertheless require that the | ||
107 | * chosen name be globally unique. These rules also prohibit use of | ||
108 | * DEFINE_STATIC_SRCU() within a function. If these rules are too | ||
109 | * restrictive, declare the srcu_struct manually. For example, in | ||
110 | * each file: | ||
111 | * | ||
112 | * static struct srcu_struct my_srcu; | ||
113 | * | ||
114 | * Then, before the first use of each my_srcu, manually initialize it: | ||
115 | * | ||
116 | * init_srcu_struct(&my_srcu); | ||
117 | * | ||
118 | * See include/linux/percpu-defs.h for the rules on per-CPU variables. | ||
104 | */ | 119 | */ |
105 | #define __DEFINE_SRCU(name, is_static) \ | 120 | #define __DEFINE_SRCU(name, is_static) \ |
106 | static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ | 121 | static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ |
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 25247220b4b7..e88005459035 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h | |||
@@ -50,6 +50,7 @@ struct tk_read_base { | |||
50 | * @offs_tai: Offset clock monotonic -> clock tai | 50 | * @offs_tai: Offset clock monotonic -> clock tai |
51 | * @tai_offset: The current UTC to TAI offset in seconds | 51 | * @tai_offset: The current UTC to TAI offset in seconds |
52 | * @clock_was_set_seq: The sequence number of clock was set events | 52 | * @clock_was_set_seq: The sequence number of clock was set events |
53 | * @cs_was_changed_seq: The sequence number of clocksource change events | ||
53 | * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second | 54 | * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second |
54 | * @raw_time: Monotonic raw base time in timespec64 format | 55 | * @raw_time: Monotonic raw base time in timespec64 format |
55 | * @cycle_interval: Number of clock cycles in one NTP interval | 56 | * @cycle_interval: Number of clock cycles in one NTP interval |
@@ -91,6 +92,7 @@ struct timekeeper { | |||
91 | ktime_t offs_tai; | 92 | ktime_t offs_tai; |
92 | s32 tai_offset; | 93 | s32 tai_offset; |
93 | unsigned int clock_was_set_seq; | 94 | unsigned int clock_was_set_seq; |
95 | u8 cs_was_changed_seq; | ||
94 | ktime_t next_leap_ktime; | 96 | ktime_t next_leap_ktime; |
95 | struct timespec64 raw_time; | 97 | struct timespec64 raw_time; |
96 | 98 | ||
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index ec89d846324c..96f37bee3bc1 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h | |||
@@ -267,6 +267,64 @@ extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, | |||
267 | struct timespec64 *ts_real); | 267 | struct timespec64 *ts_real); |
268 | 268 | ||
269 | /* | 269 | /* |
270 | * struct system_time_snapshot - simultaneous raw/real time capture with | ||
271 | * counter value | ||
272 | * @cycles: Clocksource counter value to produce the system times | ||
273 | * @real: Realtime system time | ||
274 | * @raw: Monotonic raw system time | ||
275 | * @clock_was_set_seq: The sequence number of clock was set events | ||
276 | * @cs_was_changed_seq: The sequence number of clocksource change events | ||
277 | */ | ||
278 | struct system_time_snapshot { | ||
279 | cycle_t cycles; | ||
280 | ktime_t real; | ||
281 | ktime_t raw; | ||
282 | unsigned int clock_was_set_seq; | ||
283 | u8 cs_was_changed_seq; | ||
284 | }; | ||
285 | |||
286 | /* | ||
287 | * struct system_device_crosststamp - system/device cross-timestamp | ||
288 | * (syncronized capture) | ||
289 | * @device: Device time | ||
290 | * @sys_realtime: Realtime simultaneous with device time | ||
291 | * @sys_monoraw: Monotonic raw simultaneous with device time | ||
292 | */ | ||
293 | struct system_device_crosststamp { | ||
294 | ktime_t device; | ||
295 | ktime_t sys_realtime; | ||
296 | ktime_t sys_monoraw; | ||
297 | }; | ||
298 | |||
299 | /* | ||
300 | * struct system_counterval_t - system counter value with the pointer to the | ||
301 | * corresponding clocksource | ||
302 | * @cycles: System counter value | ||
303 | * @cs: Clocksource corresponding to system counter value. Used by | ||
304 | * timekeeping code to verify comparibility of two cycle values | ||
305 | */ | ||
306 | struct system_counterval_t { | ||
307 | cycle_t cycles; | ||
308 | struct clocksource *cs; | ||
309 | }; | ||
310 | |||
311 | /* | ||
312 | * Get cross timestamp between system clock and device clock | ||
313 | */ | ||
314 | extern int get_device_system_crosststamp( | ||
315 | int (*get_time_fn)(ktime_t *device_time, | ||
316 | struct system_counterval_t *system_counterval, | ||
317 | void *ctx), | ||
318 | void *ctx, | ||
319 | struct system_time_snapshot *history, | ||
320 | struct system_device_crosststamp *xtstamp); | ||
321 | |||
322 | /* | ||
323 | * Simultaneously snapshot realtime and monotonic raw clocks | ||
324 | */ | ||
325 | extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); | ||
326 | |||
327 | /* | ||
270 | * Persistent clock related interfaces | 328 | * Persistent clock related interfaces |
271 | */ | 329 | */ |
272 | extern int persistent_clock_is_local; | 330 | extern int persistent_clock_is_local; |
diff --git a/include/trace/events/cpuhp.h b/include/trace/events/cpuhp.h new file mode 100644 index 000000000000..a72bd93ec7e5 --- /dev/null +++ b/include/trace/events/cpuhp.h | |||
@@ -0,0 +1,66 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM cpuhp | ||
3 | |||
4 | #if !defined(_TRACE_CPUHP_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_CPUHP_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | TRACE_EVENT(cpuhp_enter, | ||
10 | |||
11 | TP_PROTO(unsigned int cpu, | ||
12 | int target, | ||
13 | int idx, | ||
14 | int (*fun)(unsigned int)), | ||
15 | |||
16 | TP_ARGS(cpu, target, idx, fun), | ||
17 | |||
18 | TP_STRUCT__entry( | ||
19 | __field( unsigned int, cpu ) | ||
20 | __field( int, target ) | ||
21 | __field( int, idx ) | ||
22 | __field( void *, fun ) | ||
23 | ), | ||
24 | |||
25 | TP_fast_assign( | ||
26 | __entry->cpu = cpu; | ||
27 | __entry->target = target; | ||
28 | __entry->idx = idx; | ||
29 | __entry->fun = fun; | ||
30 | ), | ||
31 | |||
32 | TP_printk("cpu: %04u target: %3d step: %3d (%pf)", | ||
33 | __entry->cpu, __entry->target, __entry->idx, __entry->fun) | ||
34 | ); | ||
35 | |||
36 | TRACE_EVENT(cpuhp_exit, | ||
37 | |||
38 | TP_PROTO(unsigned int cpu, | ||
39 | int state, | ||
40 | int idx, | ||
41 | int ret), | ||
42 | |||
43 | TP_ARGS(cpu, state, idx, ret), | ||
44 | |||
45 | TP_STRUCT__entry( | ||
46 | __field( unsigned int, cpu ) | ||
47 | __field( int, state ) | ||
48 | __field( int, idx ) | ||
49 | __field( int, ret ) | ||
50 | ), | ||
51 | |||
52 | TP_fast_assign( | ||
53 | __entry->cpu = cpu; | ||
54 | __entry->state = state; | ||
55 | __entry->idx = idx; | ||
56 | __entry->ret = ret; | ||
57 | ), | ||
58 | |||
59 | TP_printk(" cpu: %04u state: %3d step: %3d ret: %d", | ||
60 | __entry->cpu, __entry->state, __entry->idx, __entry->ret) | ||
61 | ); | ||
62 | |||
63 | #endif | ||
64 | |||
65 | /* This part must be outside protection */ | ||
66 | #include <trace/define_trace.h> | ||
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index f0b7bfe5da92..ac6dded80ffa 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h | |||
@@ -51,7 +51,9 @@ struct ptp_clock_caps { | |||
51 | int n_per_out; /* Number of programmable periodic signals. */ | 51 | int n_per_out; /* Number of programmable periodic signals. */ |
52 | int pps; /* Whether the clock supports a PPS callback. */ | 52 | int pps; /* Whether the clock supports a PPS callback. */ |
53 | int n_pins; /* Number of input/output pins. */ | 53 | int n_pins; /* Number of input/output pins. */ |
54 | int rsv[14]; /* Reserved for future use. */ | 54 | /* Whether the clock supports precise system-device cross timestamps */ |
55 | int cross_timestamping; | ||
56 | int rsv[13]; /* Reserved for future use. */ | ||
55 | }; | 57 | }; |
56 | 58 | ||
57 | struct ptp_extts_request { | 59 | struct ptp_extts_request { |
@@ -81,6 +83,13 @@ struct ptp_sys_offset { | |||
81 | struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; | 83 | struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; |
82 | }; | 84 | }; |
83 | 85 | ||
86 | struct ptp_sys_offset_precise { | ||
87 | struct ptp_clock_time device; | ||
88 | struct ptp_clock_time sys_realtime; | ||
89 | struct ptp_clock_time sys_monoraw; | ||
90 | unsigned int rsv[4]; /* Reserved for future use. */ | ||
91 | }; | ||
92 | |||
84 | enum ptp_pin_function { | 93 | enum ptp_pin_function { |
85 | PTP_PF_NONE, | 94 | PTP_PF_NONE, |
86 | PTP_PF_EXTTS, | 95 | PTP_PF_EXTTS, |
@@ -124,6 +133,8 @@ struct ptp_pin_desc { | |||
124 | #define PTP_SYS_OFFSET _IOW(PTP_CLK_MAGIC, 5, struct ptp_sys_offset) | 133 | #define PTP_SYS_OFFSET _IOW(PTP_CLK_MAGIC, 5, struct ptp_sys_offset) |
125 | #define PTP_PIN_GETFUNC _IOWR(PTP_CLK_MAGIC, 6, struct ptp_pin_desc) | 134 | #define PTP_PIN_GETFUNC _IOWR(PTP_CLK_MAGIC, 6, struct ptp_pin_desc) |
126 | #define PTP_PIN_SETFUNC _IOW(PTP_CLK_MAGIC, 7, struct ptp_pin_desc) | 135 | #define PTP_PIN_SETFUNC _IOW(PTP_CLK_MAGIC, 7, struct ptp_pin_desc) |
136 | #define PTP_SYS_OFFSET_PRECISE \ | ||
137 | _IOWR(PTP_CLK_MAGIC, 8, struct ptp_sys_offset_precise) | ||
127 | 138 | ||
128 | struct ptp_extts_event { | 139 | struct ptp_extts_event { |
129 | struct ptp_clock_time t; /* Time event occured. */ | 140 | struct ptp_clock_time t; /* Time event occured. */ |
diff --git a/init/main.c b/init/main.c index 7c27de4577ed..8dc93df20f7f 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -385,7 +385,6 @@ static noinline void __init_refok rest_init(void) | |||
385 | int pid; | 385 | int pid; |
386 | 386 | ||
387 | rcu_scheduler_starting(); | 387 | rcu_scheduler_starting(); |
388 | smpboot_thread_init(); | ||
389 | /* | 388 | /* |
390 | * We need to spawn init first so that it obtains pid 1, however | 389 | * We need to spawn init first so that it obtains pid 1, however |
391 | * the init task will end up wanting to create kthreads, which, if | 390 | * the init task will end up wanting to create kthreads, which, if |
@@ -449,20 +448,6 @@ void __init parse_early_param(void) | |||
449 | done = 1; | 448 | done = 1; |
450 | } | 449 | } |
451 | 450 | ||
452 | /* | ||
453 | * Activate the first processor. | ||
454 | */ | ||
455 | |||
456 | static void __init boot_cpu_init(void) | ||
457 | { | ||
458 | int cpu = smp_processor_id(); | ||
459 | /* Mark the boot cpu "present", "online" etc for SMP and UP case */ | ||
460 | set_cpu_online(cpu, true); | ||
461 | set_cpu_active(cpu, true); | ||
462 | set_cpu_present(cpu, true); | ||
463 | set_cpu_possible(cpu, true); | ||
464 | } | ||
465 | |||
466 | void __init __weak smp_setup_processor_id(void) | 451 | void __init __weak smp_setup_processor_id(void) |
467 | { | 452 | { |
468 | } | 453 | } |
@@ -522,6 +507,7 @@ asmlinkage __visible void __init start_kernel(void) | |||
522 | setup_command_line(command_line); | 507 | setup_command_line(command_line); |
523 | setup_nr_cpu_ids(); | 508 | setup_nr_cpu_ids(); |
524 | setup_per_cpu_areas(); | 509 | setup_per_cpu_areas(); |
510 | boot_cpu_state_init(); | ||
525 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ | 511 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ |
526 | 512 | ||
527 | build_all_zonelists(NULL, NULL); | 513 | build_all_zonelists(NULL, NULL); |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 5b9d39633ce9..6ea42e8da861 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -22,13 +22,88 @@ | |||
22 | #include <linux/lockdep.h> | 22 | #include <linux/lockdep.h> |
23 | #include <linux/tick.h> | 23 | #include <linux/tick.h> |
24 | #include <linux/irq.h> | 24 | #include <linux/irq.h> |
25 | #include <linux/smpboot.h> | ||
26 | |||
25 | #include <trace/events/power.h> | 27 | #include <trace/events/power.h> |
28 | #define CREATE_TRACE_POINTS | ||
29 | #include <trace/events/cpuhp.h> | ||
26 | 30 | ||
27 | #include "smpboot.h" | 31 | #include "smpboot.h" |
28 | 32 | ||
33 | /** | ||
34 | * cpuhp_cpu_state - Per cpu hotplug state storage | ||
35 | * @state: The current cpu state | ||
36 | * @target: The target state | ||
37 | * @thread: Pointer to the hotplug thread | ||
38 | * @should_run: Thread should execute | ||
39 | * @cb_stat: The state for a single callback (install/uninstall) | ||
40 | * @cb: Single callback function (install/uninstall) | ||
41 | * @result: Result of the operation | ||
42 | * @done: Signal completion to the issuer of the task | ||
43 | */ | ||
44 | struct cpuhp_cpu_state { | ||
45 | enum cpuhp_state state; | ||
46 | enum cpuhp_state target; | ||
47 | #ifdef CONFIG_SMP | ||
48 | struct task_struct *thread; | ||
49 | bool should_run; | ||
50 | enum cpuhp_state cb_state; | ||
51 | int (*cb)(unsigned int cpu); | ||
52 | int result; | ||
53 | struct completion done; | ||
54 | #endif | ||
55 | }; | ||
56 | |||
57 | static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state); | ||
58 | |||
59 | /** | ||
60 | * cpuhp_step - Hotplug state machine step | ||
61 | * @name: Name of the step | ||
62 | * @startup: Startup function of the step | ||
63 | * @teardown: Teardown function of the step | ||
64 | * @skip_onerr: Do not invoke the functions on error rollback | ||
65 | * Will go away once the notifiers are gone | ||
66 | * @cant_stop: Bringup/teardown can't be stopped at this step | ||
67 | */ | ||
68 | struct cpuhp_step { | ||
69 | const char *name; | ||
70 | int (*startup)(unsigned int cpu); | ||
71 | int (*teardown)(unsigned int cpu); | ||
72 | bool skip_onerr; | ||
73 | bool cant_stop; | ||
74 | }; | ||
75 | |||
76 | static DEFINE_MUTEX(cpuhp_state_mutex); | ||
77 | static struct cpuhp_step cpuhp_bp_states[]; | ||
78 | static struct cpuhp_step cpuhp_ap_states[]; | ||
79 | |||
80 | /** | ||
81 | * cpuhp_invoke_callback _ Invoke the callbacks for a given state | ||
82 | * @cpu: The cpu for which the callback should be invoked | ||
83 | * @step: The step in the state machine | ||
84 | * @cb: The callback function to invoke | ||
85 | * | ||
86 | * Called from cpu hotplug and from the state register machinery | ||
87 | */ | ||
88 | static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step, | ||
89 | int (*cb)(unsigned int)) | ||
90 | { | ||
91 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | ||
92 | int ret = 0; | ||
93 | |||
94 | if (cb) { | ||
95 | trace_cpuhp_enter(cpu, st->target, step, cb); | ||
96 | ret = cb(cpu); | ||
97 | trace_cpuhp_exit(cpu, st->state, step, ret); | ||
98 | } | ||
99 | return ret; | ||
100 | } | ||
101 | |||
29 | #ifdef CONFIG_SMP | 102 | #ifdef CONFIG_SMP |
30 | /* Serializes the updates to cpu_online_mask, cpu_present_mask */ | 103 | /* Serializes the updates to cpu_online_mask, cpu_present_mask */ |
31 | static DEFINE_MUTEX(cpu_add_remove_lock); | 104 | static DEFINE_MUTEX(cpu_add_remove_lock); |
105 | bool cpuhp_tasks_frozen; | ||
106 | EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen); | ||
32 | 107 | ||
33 | /* | 108 | /* |
34 | * The following two APIs (cpu_maps_update_begin/done) must be used when | 109 | * The following two APIs (cpu_maps_update_begin/done) must be used when |
@@ -207,31 +282,281 @@ int __register_cpu_notifier(struct notifier_block *nb) | |||
207 | return raw_notifier_chain_register(&cpu_chain, nb); | 282 | return raw_notifier_chain_register(&cpu_chain, nb); |
208 | } | 283 | } |
209 | 284 | ||
210 | static int __cpu_notify(unsigned long val, void *v, int nr_to_call, | 285 | static int __cpu_notify(unsigned long val, unsigned int cpu, int nr_to_call, |
211 | int *nr_calls) | 286 | int *nr_calls) |
212 | { | 287 | { |
288 | unsigned long mod = cpuhp_tasks_frozen ? CPU_TASKS_FROZEN : 0; | ||
289 | void *hcpu = (void *)(long)cpu; | ||
290 | |||
213 | int ret; | 291 | int ret; |
214 | 292 | ||
215 | ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call, | 293 | ret = __raw_notifier_call_chain(&cpu_chain, val | mod, hcpu, nr_to_call, |
216 | nr_calls); | 294 | nr_calls); |
217 | 295 | ||
218 | return notifier_to_errno(ret); | 296 | return notifier_to_errno(ret); |
219 | } | 297 | } |
220 | 298 | ||
221 | static int cpu_notify(unsigned long val, void *v) | 299 | static int cpu_notify(unsigned long val, unsigned int cpu) |
222 | { | 300 | { |
223 | return __cpu_notify(val, v, -1, NULL); | 301 | return __cpu_notify(val, cpu, -1, NULL); |
224 | } | 302 | } |
225 | 303 | ||
226 | #ifdef CONFIG_HOTPLUG_CPU | 304 | /* Notifier wrappers for transitioning to state machine */ |
305 | static int notify_prepare(unsigned int cpu) | ||
306 | { | ||
307 | int nr_calls = 0; | ||
308 | int ret; | ||
309 | |||
310 | ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, &nr_calls); | ||
311 | if (ret) { | ||
312 | nr_calls--; | ||
313 | printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n", | ||
314 | __func__, cpu); | ||
315 | __cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL); | ||
316 | } | ||
317 | return ret; | ||
318 | } | ||
319 | |||
320 | static int notify_online(unsigned int cpu) | ||
321 | { | ||
322 | cpu_notify(CPU_ONLINE, cpu); | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | static int notify_starting(unsigned int cpu) | ||
327 | { | ||
328 | cpu_notify(CPU_STARTING, cpu); | ||
329 | return 0; | ||
330 | } | ||
331 | |||
332 | static int bringup_wait_for_ap(unsigned int cpu) | ||
333 | { | ||
334 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | ||
335 | |||
336 | wait_for_completion(&st->done); | ||
337 | return st->result; | ||
338 | } | ||
339 | |||
340 | static int bringup_cpu(unsigned int cpu) | ||
341 | { | ||
342 | struct task_struct *idle = idle_thread_get(cpu); | ||
343 | int ret; | ||
344 | |||
345 | /* Arch-specific enabling code. */ | ||
346 | ret = __cpu_up(cpu, idle); | ||
347 | if (ret) { | ||
348 | cpu_notify(CPU_UP_CANCELED, cpu); | ||
349 | return ret; | ||
350 | } | ||
351 | ret = bringup_wait_for_ap(cpu); | ||
352 | BUG_ON(!cpu_online(cpu)); | ||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Hotplug state machine related functions | ||
358 | */ | ||
359 | static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st, | ||
360 | struct cpuhp_step *steps) | ||
361 | { | ||
362 | for (st->state++; st->state < st->target; st->state++) { | ||
363 | struct cpuhp_step *step = steps + st->state; | ||
364 | |||
365 | if (!step->skip_onerr) | ||
366 | cpuhp_invoke_callback(cpu, st->state, step->startup); | ||
367 | } | ||
368 | } | ||
369 | |||
370 | static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, | ||
371 | struct cpuhp_step *steps, enum cpuhp_state target) | ||
372 | { | ||
373 | enum cpuhp_state prev_state = st->state; | ||
374 | int ret = 0; | ||
375 | |||
376 | for (; st->state > target; st->state--) { | ||
377 | struct cpuhp_step *step = steps + st->state; | ||
378 | |||
379 | ret = cpuhp_invoke_callback(cpu, st->state, step->teardown); | ||
380 | if (ret) { | ||
381 | st->target = prev_state; | ||
382 | undo_cpu_down(cpu, st, steps); | ||
383 | break; | ||
384 | } | ||
385 | } | ||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st, | ||
390 | struct cpuhp_step *steps) | ||
391 | { | ||
392 | for (st->state--; st->state > st->target; st->state--) { | ||
393 | struct cpuhp_step *step = steps + st->state; | ||
394 | |||
395 | if (!step->skip_onerr) | ||
396 | cpuhp_invoke_callback(cpu, st->state, step->teardown); | ||
397 | } | ||
398 | } | ||
399 | |||
400 | static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, | ||
401 | struct cpuhp_step *steps, enum cpuhp_state target) | ||
402 | { | ||
403 | enum cpuhp_state prev_state = st->state; | ||
404 | int ret = 0; | ||
405 | |||
406 | while (st->state < target) { | ||
407 | struct cpuhp_step *step; | ||
408 | |||
409 | st->state++; | ||
410 | step = steps + st->state; | ||
411 | ret = cpuhp_invoke_callback(cpu, st->state, step->startup); | ||
412 | if (ret) { | ||
413 | st->target = prev_state; | ||
414 | undo_cpu_up(cpu, st, steps); | ||
415 | break; | ||
416 | } | ||
417 | } | ||
418 | return ret; | ||
419 | } | ||
420 | |||
421 | /* | ||
422 | * The cpu hotplug threads manage the bringup and teardown of the cpus | ||
423 | */ | ||
424 | static void cpuhp_create(unsigned int cpu) | ||
425 | { | ||
426 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | ||
427 | |||
428 | init_completion(&st->done); | ||
429 | } | ||
430 | |||
431 | static int cpuhp_should_run(unsigned int cpu) | ||
432 | { | ||
433 | struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); | ||
434 | |||
435 | return st->should_run; | ||
436 | } | ||
437 | |||
438 | /* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */ | ||
439 | static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st) | ||
440 | { | ||
441 | enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU); | ||
442 | |||
443 | return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target); | ||
444 | } | ||
445 | |||
446 | /* Execute the online startup callbacks. Used to be CPU_ONLINE */ | ||
447 | static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st) | ||
448 | { | ||
449 | return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target); | ||
450 | } | ||
451 | |||
452 | /* | ||
453 | * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke | ||
454 | * callbacks when a state gets [un]installed at runtime. | ||
455 | */ | ||
456 | static void cpuhp_thread_fun(unsigned int cpu) | ||
457 | { | ||
458 | struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); | ||
459 | int ret = 0; | ||
460 | |||
461 | /* | ||
462 | * Paired with the mb() in cpuhp_kick_ap_work and | ||
463 | * cpuhp_invoke_ap_callback, so the work set is consistent visible. | ||
464 | */ | ||
465 | smp_mb(); | ||
466 | if (!st->should_run) | ||
467 | return; | ||
468 | |||
469 | st->should_run = false; | ||
470 | |||
471 | /* Single callback invocation for [un]install ? */ | ||
472 | if (st->cb) { | ||
473 | if (st->cb_state < CPUHP_AP_ONLINE) { | ||
474 | local_irq_disable(); | ||
475 | ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb); | ||
476 | local_irq_enable(); | ||
477 | } else { | ||
478 | ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb); | ||
479 | } | ||
480 | } else { | ||
481 | /* Cannot happen .... */ | ||
482 | BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); | ||
483 | |||
484 | /* Regular hotplug work */ | ||
485 | if (st->state < st->target) | ||
486 | ret = cpuhp_ap_online(cpu, st); | ||
487 | else if (st->state > st->target) | ||
488 | ret = cpuhp_ap_offline(cpu, st); | ||
489 | } | ||
490 | st->result = ret; | ||
491 | complete(&st->done); | ||
492 | } | ||
227 | 493 | ||
228 | static void cpu_notify_nofail(unsigned long val, void *v) | 494 | /* Invoke a single callback on a remote cpu */ |
495 | static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, | ||
496 | int (*cb)(unsigned int)) | ||
229 | { | 497 | { |
230 | BUG_ON(cpu_notify(val, v)); | 498 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
499 | |||
500 | if (!cpu_online(cpu)) | ||
501 | return 0; | ||
502 | |||
503 | st->cb_state = state; | ||
504 | st->cb = cb; | ||
505 | /* | ||
506 | * Make sure the above stores are visible before should_run becomes | ||
507 | * true. Paired with the mb() above in cpuhp_thread_fun() | ||
508 | */ | ||
509 | smp_mb(); | ||
510 | st->should_run = true; | ||
511 | wake_up_process(st->thread); | ||
512 | wait_for_completion(&st->done); | ||
513 | return st->result; | ||
231 | } | 514 | } |
515 | |||
516 | /* Regular hotplug invocation of the AP hotplug thread */ | ||
517 | static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st) | ||
518 | { | ||
519 | st->result = 0; | ||
520 | st->cb = NULL; | ||
521 | /* | ||
522 | * Make sure the above stores are visible before should_run becomes | ||
523 | * true. Paired with the mb() above in cpuhp_thread_fun() | ||
524 | */ | ||
525 | smp_mb(); | ||
526 | st->should_run = true; | ||
527 | wake_up_process(st->thread); | ||
528 | } | ||
529 | |||
530 | static int cpuhp_kick_ap_work(unsigned int cpu) | ||
531 | { | ||
532 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | ||
533 | enum cpuhp_state state = st->state; | ||
534 | |||
535 | trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work); | ||
536 | __cpuhp_kick_ap_work(st); | ||
537 | wait_for_completion(&st->done); | ||
538 | trace_cpuhp_exit(cpu, st->state, state, st->result); | ||
539 | return st->result; | ||
540 | } | ||
541 | |||
542 | static struct smp_hotplug_thread cpuhp_threads = { | ||
543 | .store = &cpuhp_state.thread, | ||
544 | .create = &cpuhp_create, | ||
545 | .thread_should_run = cpuhp_should_run, | ||
546 | .thread_fn = cpuhp_thread_fun, | ||
547 | .thread_comm = "cpuhp/%u", | ||
548 | .selfparking = true, | ||
549 | }; | ||
550 | |||
551 | void __init cpuhp_threads_init(void) | ||
552 | { | ||
553 | BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads)); | ||
554 | kthread_unpark(this_cpu_read(cpuhp_state.thread)); | ||
555 | } | ||
556 | |||
557 | #ifdef CONFIG_HOTPLUG_CPU | ||
232 | EXPORT_SYMBOL(register_cpu_notifier); | 558 | EXPORT_SYMBOL(register_cpu_notifier); |
233 | EXPORT_SYMBOL(__register_cpu_notifier); | 559 | EXPORT_SYMBOL(__register_cpu_notifier); |
234 | |||
235 | void unregister_cpu_notifier(struct notifier_block *nb) | 560 | void unregister_cpu_notifier(struct notifier_block *nb) |
236 | { | 561 | { |
237 | cpu_maps_update_begin(); | 562 | cpu_maps_update_begin(); |
@@ -311,57 +636,60 @@ static inline void check_for_tasks(int dead_cpu) | |||
311 | read_unlock(&tasklist_lock); | 636 | read_unlock(&tasklist_lock); |
312 | } | 637 | } |
313 | 638 | ||
314 | struct take_cpu_down_param { | 639 | static void cpu_notify_nofail(unsigned long val, unsigned int cpu) |
315 | unsigned long mod; | 640 | { |
316 | void *hcpu; | 641 | BUG_ON(cpu_notify(val, cpu)); |
317 | }; | 642 | } |
643 | |||
644 | static int notify_down_prepare(unsigned int cpu) | ||
645 | { | ||
646 | int err, nr_calls = 0; | ||
647 | |||
648 | err = __cpu_notify(CPU_DOWN_PREPARE, cpu, -1, &nr_calls); | ||
649 | if (err) { | ||
650 | nr_calls--; | ||
651 | __cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL); | ||
652 | pr_warn("%s: attempt to take down CPU %u failed\n", | ||
653 | __func__, cpu); | ||
654 | } | ||
655 | return err; | ||
656 | } | ||
657 | |||
658 | static int notify_dying(unsigned int cpu) | ||
659 | { | ||
660 | cpu_notify(CPU_DYING, cpu); | ||
661 | return 0; | ||
662 | } | ||
318 | 663 | ||
319 | /* Take this CPU down. */ | 664 | /* Take this CPU down. */ |
320 | static int take_cpu_down(void *_param) | 665 | static int take_cpu_down(void *_param) |
321 | { | 666 | { |
322 | struct take_cpu_down_param *param = _param; | 667 | struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); |
323 | int err; | 668 | enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE); |
669 | int err, cpu = smp_processor_id(); | ||
324 | 670 | ||
325 | /* Ensure this CPU doesn't handle any more interrupts. */ | 671 | /* Ensure this CPU doesn't handle any more interrupts. */ |
326 | err = __cpu_disable(); | 672 | err = __cpu_disable(); |
327 | if (err < 0) | 673 | if (err < 0) |
328 | return err; | 674 | return err; |
329 | 675 | ||
330 | cpu_notify(CPU_DYING | param->mod, param->hcpu); | 676 | /* Invoke the former CPU_DYING callbacks */ |
677 | for (; st->state > target; st->state--) { | ||
678 | struct cpuhp_step *step = cpuhp_ap_states + st->state; | ||
679 | |||
680 | cpuhp_invoke_callback(cpu, st->state, step->teardown); | ||
681 | } | ||
331 | /* Give up timekeeping duties */ | 682 | /* Give up timekeeping duties */ |
332 | tick_handover_do_timer(); | 683 | tick_handover_do_timer(); |
333 | /* Park the stopper thread */ | 684 | /* Park the stopper thread */ |
334 | stop_machine_park((long)param->hcpu); | 685 | stop_machine_park(cpu); |
335 | return 0; | 686 | return 0; |
336 | } | 687 | } |
337 | 688 | ||
338 | /* Requires cpu_add_remove_lock to be held */ | 689 | static int takedown_cpu(unsigned int cpu) |
339 | static int _cpu_down(unsigned int cpu, int tasks_frozen) | ||
340 | { | 690 | { |
341 | int err, nr_calls = 0; | 691 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
342 | void *hcpu = (void *)(long)cpu; | 692 | int err; |
343 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | ||
344 | struct take_cpu_down_param tcd_param = { | ||
345 | .mod = mod, | ||
346 | .hcpu = hcpu, | ||
347 | }; | ||
348 | |||
349 | if (num_online_cpus() == 1) | ||
350 | return -EBUSY; | ||
351 | |||
352 | if (!cpu_online(cpu)) | ||
353 | return -EINVAL; | ||
354 | |||
355 | cpu_hotplug_begin(); | ||
356 | |||
357 | err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); | ||
358 | if (err) { | ||
359 | nr_calls--; | ||
360 | __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL); | ||
361 | pr_warn("%s: attempt to take down CPU %u failed\n", | ||
362 | __func__, cpu); | ||
363 | goto out_release; | ||
364 | } | ||
365 | 693 | ||
366 | /* | 694 | /* |
367 | * By now we've cleared cpu_active_mask, wait for all preempt-disabled | 695 | * By now we've cleared cpu_active_mask, wait for all preempt-disabled |
@@ -378,6 +706,8 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
378 | else | 706 | else |
379 | synchronize_rcu(); | 707 | synchronize_rcu(); |
380 | 708 | ||
709 | /* Park the smpboot threads */ | ||
710 | kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); | ||
381 | smpboot_park_threads(cpu); | 711 | smpboot_park_threads(cpu); |
382 | 712 | ||
383 | /* | 713 | /* |
@@ -389,12 +719,12 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
389 | /* | 719 | /* |
390 | * So now all preempt/rcu users must observe !cpu_active(). | 720 | * So now all preempt/rcu users must observe !cpu_active(). |
391 | */ | 721 | */ |
392 | err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); | 722 | err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu)); |
393 | if (err) { | 723 | if (err) { |
394 | /* CPU didn't die: tell everyone. Can't complain. */ | 724 | /* CPU didn't die: tell everyone. Can't complain. */ |
395 | cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); | 725 | cpu_notify_nofail(CPU_DOWN_FAILED, cpu); |
396 | irq_unlock_sparse(); | 726 | irq_unlock_sparse(); |
397 | goto out_release; | 727 | return err; |
398 | } | 728 | } |
399 | BUG_ON(cpu_online(cpu)); | 729 | BUG_ON(cpu_online(cpu)); |
400 | 730 | ||
@@ -405,10 +735,8 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
405 | * | 735 | * |
406 | * Wait for the stop thread to go away. | 736 | * Wait for the stop thread to go away. |
407 | */ | 737 | */ |
408 | while (!per_cpu(cpu_dead_idle, cpu)) | 738 | wait_for_completion(&st->done); |
409 | cpu_relax(); | 739 | BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); |
410 | smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */ | ||
411 | per_cpu(cpu_dead_idle, cpu) = false; | ||
412 | 740 | ||
413 | /* Interrupts are moved away from the dying cpu, reenable alloc/free */ | 741 | /* Interrupts are moved away from the dying cpu, reenable alloc/free */ |
414 | irq_unlock_sparse(); | 742 | irq_unlock_sparse(); |
@@ -417,20 +745,104 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |||
417 | /* This actually kills the CPU. */ | 745 | /* This actually kills the CPU. */ |
418 | __cpu_die(cpu); | 746 | __cpu_die(cpu); |
419 | 747 | ||
420 | /* CPU is completely dead: tell everyone. Too late to complain. */ | ||
421 | tick_cleanup_dead_cpu(cpu); | 748 | tick_cleanup_dead_cpu(cpu); |
422 | cpu_notify_nofail(CPU_DEAD | mod, hcpu); | 749 | return 0; |
750 | } | ||
423 | 751 | ||
752 | static int notify_dead(unsigned int cpu) | ||
753 | { | ||
754 | cpu_notify_nofail(CPU_DEAD, cpu); | ||
424 | check_for_tasks(cpu); | 755 | check_for_tasks(cpu); |
756 | return 0; | ||
757 | } | ||
425 | 758 | ||
426 | out_release: | 759 | static void cpuhp_complete_idle_dead(void *arg) |
760 | { | ||
761 | struct cpuhp_cpu_state *st = arg; | ||
762 | |||
763 | complete(&st->done); | ||
764 | } | ||
765 | |||
766 | void cpuhp_report_idle_dead(void) | ||
767 | { | ||
768 | struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); | ||
769 | |||
770 | BUG_ON(st->state != CPUHP_AP_OFFLINE); | ||
771 | rcu_report_dead(smp_processor_id()); | ||
772 | st->state = CPUHP_AP_IDLE_DEAD; | ||
773 | /* | ||
774 | * We cannot call complete after rcu_report_dead() so we delegate it | ||
775 | * to an online cpu. | ||
776 | */ | ||
777 | smp_call_function_single(cpumask_first(cpu_online_mask), | ||
778 | cpuhp_complete_idle_dead, st, 0); | ||
779 | } | ||
780 | |||
781 | #else | ||
782 | #define notify_down_prepare NULL | ||
783 | #define takedown_cpu NULL | ||
784 | #define notify_dead NULL | ||
785 | #define notify_dying NULL | ||
786 | #endif | ||
787 | |||
788 | #ifdef CONFIG_HOTPLUG_CPU | ||
789 | |||
790 | /* Requires cpu_add_remove_lock to be held */ | ||
791 | static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | ||
792 | enum cpuhp_state target) | ||
793 | { | ||
794 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | ||
795 | int prev_state, ret = 0; | ||
796 | bool hasdied = false; | ||
797 | |||
798 | if (num_online_cpus() == 1) | ||
799 | return -EBUSY; | ||
800 | |||
801 | if (!cpu_present(cpu)) | ||
802 | return -EINVAL; | ||
803 | |||
804 | cpu_hotplug_begin(); | ||
805 | |||
806 | cpuhp_tasks_frozen = tasks_frozen; | ||
807 | |||
808 | prev_state = st->state; | ||
809 | st->target = target; | ||
810 | /* | ||
811 | * If the current CPU state is in the range of the AP hotplug thread, | ||
812 | * then we need to kick the thread. | ||
813 | */ | ||
814 | if (st->state > CPUHP_TEARDOWN_CPU) { | ||
815 | ret = cpuhp_kick_ap_work(cpu); | ||
816 | /* | ||
817 | * The AP side has done the error rollback already. Just | ||
818 | * return the error code.. | ||
819 | */ | ||
820 | if (ret) | ||
821 | goto out; | ||
822 | |||
823 | /* | ||
824 | * We might have stopped still in the range of the AP hotplug | ||
825 | * thread. Nothing to do anymore. | ||
826 | */ | ||
827 | if (st->state > CPUHP_TEARDOWN_CPU) | ||
828 | goto out; | ||
829 | } | ||
830 | /* | ||
831 | * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need | ||
832 | * to do the further cleanups. | ||
833 | */ | ||
834 | ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target); | ||
835 | |||
836 | hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE; | ||
837 | out: | ||
427 | cpu_hotplug_done(); | 838 | cpu_hotplug_done(); |
428 | if (!err) | 839 | /* This post dead nonsense must die */ |
429 | cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu); | 840 | if (!ret && hasdied) |
430 | return err; | 841 | cpu_notify_nofail(CPU_POST_DEAD, cpu); |
842 | return ret; | ||
431 | } | 843 | } |
432 | 844 | ||
433 | int cpu_down(unsigned int cpu) | 845 | static int do_cpu_down(unsigned int cpu, enum cpuhp_state target) |
434 | { | 846 | { |
435 | int err; | 847 | int err; |
436 | 848 | ||
@@ -441,100 +853,131 @@ int cpu_down(unsigned int cpu) | |||
441 | goto out; | 853 | goto out; |
442 | } | 854 | } |
443 | 855 | ||
444 | err = _cpu_down(cpu, 0); | 856 | err = _cpu_down(cpu, 0, target); |
445 | 857 | ||
446 | out: | 858 | out: |
447 | cpu_maps_update_done(); | 859 | cpu_maps_update_done(); |
448 | return err; | 860 | return err; |
449 | } | 861 | } |
862 | int cpu_down(unsigned int cpu) | ||
863 | { | ||
864 | return do_cpu_down(cpu, CPUHP_OFFLINE); | ||
865 | } | ||
450 | EXPORT_SYMBOL(cpu_down); | 866 | EXPORT_SYMBOL(cpu_down); |
451 | #endif /*CONFIG_HOTPLUG_CPU*/ | 867 | #endif /*CONFIG_HOTPLUG_CPU*/ |
452 | 868 | ||
453 | /* | 869 | /** |
454 | * Unpark per-CPU smpboot kthreads at CPU-online time. | 870 | * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers |
871 | * @cpu: cpu that just started | ||
872 | * | ||
873 | * This function calls the cpu_chain notifiers with CPU_STARTING. | ||
874 | * It must be called by the arch code on the new cpu, before the new cpu | ||
875 | * enables interrupts and before the "boot" cpu returns from __cpu_up(). | ||
455 | */ | 876 | */ |
456 | static int smpboot_thread_call(struct notifier_block *nfb, | 877 | void notify_cpu_starting(unsigned int cpu) |
457 | unsigned long action, void *hcpu) | ||
458 | { | 878 | { |
459 | int cpu = (long)hcpu; | 879 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
460 | 880 | enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); | |
461 | switch (action & ~CPU_TASKS_FROZEN) { | ||
462 | 881 | ||
463 | case CPU_DOWN_FAILED: | 882 | while (st->state < target) { |
464 | case CPU_ONLINE: | 883 | struct cpuhp_step *step; |
465 | smpboot_unpark_threads(cpu); | ||
466 | break; | ||
467 | 884 | ||
468 | default: | 885 | st->state++; |
469 | break; | 886 | step = cpuhp_ap_states + st->state; |
887 | cpuhp_invoke_callback(cpu, st->state, step->startup); | ||
470 | } | 888 | } |
471 | |||
472 | return NOTIFY_OK; | ||
473 | } | 889 | } |
474 | 890 | ||
475 | static struct notifier_block smpboot_thread_notifier = { | 891 | /* |
476 | .notifier_call = smpboot_thread_call, | 892 | * Called from the idle task. We need to set active here, so we can kick off |
477 | .priority = CPU_PRI_SMPBOOT, | 893 | * the stopper thread and unpark the smpboot threads. If the target state is |
478 | }; | 894 | * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the |
479 | 895 | * cpu further. | |
480 | void smpboot_thread_init(void) | 896 | */ |
897 | void cpuhp_online_idle(enum cpuhp_state state) | ||
481 | { | 898 | { |
482 | register_cpu_notifier(&smpboot_thread_notifier); | 899 | struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); |
900 | unsigned int cpu = smp_processor_id(); | ||
901 | |||
902 | /* Happens for the boot cpu */ | ||
903 | if (state != CPUHP_AP_ONLINE_IDLE) | ||
904 | return; | ||
905 | |||
906 | st->state = CPUHP_AP_ONLINE_IDLE; | ||
907 | |||
908 | /* The cpu is marked online, set it active now */ | ||
909 | set_cpu_active(cpu, true); | ||
910 | /* Unpark the stopper thread and the hotplug thread of this cpu */ | ||
911 | stop_machine_unpark(cpu); | ||
912 | kthread_unpark(st->thread); | ||
913 | |||
914 | /* Should we go further up ? */ | ||
915 | if (st->target > CPUHP_AP_ONLINE_IDLE) | ||
916 | __cpuhp_kick_ap_work(st); | ||
917 | else | ||
918 | complete(&st->done); | ||
483 | } | 919 | } |
484 | 920 | ||
485 | /* Requires cpu_add_remove_lock to be held */ | 921 | /* Requires cpu_add_remove_lock to be held */ |
486 | static int _cpu_up(unsigned int cpu, int tasks_frozen) | 922 | static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) |
487 | { | 923 | { |
488 | int ret, nr_calls = 0; | 924 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
489 | void *hcpu = (void *)(long)cpu; | ||
490 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | ||
491 | struct task_struct *idle; | 925 | struct task_struct *idle; |
926 | int ret = 0; | ||
492 | 927 | ||
493 | cpu_hotplug_begin(); | 928 | cpu_hotplug_begin(); |
494 | 929 | ||
495 | if (cpu_online(cpu) || !cpu_present(cpu)) { | 930 | if (!cpu_present(cpu)) { |
496 | ret = -EINVAL; | 931 | ret = -EINVAL; |
497 | goto out; | 932 | goto out; |
498 | } | 933 | } |
499 | 934 | ||
500 | idle = idle_thread_get(cpu); | 935 | /* |
501 | if (IS_ERR(idle)) { | 936 | * The caller of do_cpu_up might have raced with another |
502 | ret = PTR_ERR(idle); | 937 | * caller. Ignore it for now. |
503 | goto out; | 938 | */ |
504 | } | 939 | if (st->state >= target) |
505 | |||
506 | ret = smpboot_create_threads(cpu); | ||
507 | if (ret) | ||
508 | goto out; | 940 | goto out; |
509 | 941 | ||
510 | ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); | 942 | if (st->state == CPUHP_OFFLINE) { |
511 | if (ret) { | 943 | /* Let it fail before we try to bring the cpu up */ |
512 | nr_calls--; | 944 | idle = idle_thread_get(cpu); |
513 | pr_warn("%s: attempt to bring up CPU %u failed\n", | 945 | if (IS_ERR(idle)) { |
514 | __func__, cpu); | 946 | ret = PTR_ERR(idle); |
515 | goto out_notify; | 947 | goto out; |
948 | } | ||
516 | } | 949 | } |
517 | 950 | ||
518 | /* Arch-specific enabling code. */ | 951 | cpuhp_tasks_frozen = tasks_frozen; |
519 | ret = __cpu_up(cpu, idle); | ||
520 | |||
521 | if (ret != 0) | ||
522 | goto out_notify; | ||
523 | BUG_ON(!cpu_online(cpu)); | ||
524 | 952 | ||
525 | /* Now call notifier in preparation. */ | 953 | st->target = target; |
526 | cpu_notify(CPU_ONLINE | mod, hcpu); | 954 | /* |
955 | * If the current CPU state is in the range of the AP hotplug thread, | ||
956 | * then we need to kick the thread once more. | ||
957 | */ | ||
958 | if (st->state > CPUHP_BRINGUP_CPU) { | ||
959 | ret = cpuhp_kick_ap_work(cpu); | ||
960 | /* | ||
961 | * The AP side has done the error rollback already. Just | ||
962 | * return the error code.. | ||
963 | */ | ||
964 | if (ret) | ||
965 | goto out; | ||
966 | } | ||
527 | 967 | ||
528 | out_notify: | 968 | /* |
529 | if (ret != 0) | 969 | * Try to reach the target state. We max out on the BP at |
530 | __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); | 970 | * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is |
971 | * responsible for bringing it up to the target state. | ||
972 | */ | ||
973 | target = min((int)target, CPUHP_BRINGUP_CPU); | ||
974 | ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target); | ||
531 | out: | 975 | out: |
532 | cpu_hotplug_done(); | 976 | cpu_hotplug_done(); |
533 | |||
534 | return ret; | 977 | return ret; |
535 | } | 978 | } |
536 | 979 | ||
537 | int cpu_up(unsigned int cpu) | 980 | static int do_cpu_up(unsigned int cpu, enum cpuhp_state target) |
538 | { | 981 | { |
539 | int err = 0; | 982 | int err = 0; |
540 | 983 | ||
@@ -558,12 +1001,16 @@ int cpu_up(unsigned int cpu) | |||
558 | goto out; | 1001 | goto out; |
559 | } | 1002 | } |
560 | 1003 | ||
561 | err = _cpu_up(cpu, 0); | 1004 | err = _cpu_up(cpu, 0, target); |
562 | |||
563 | out: | 1005 | out: |
564 | cpu_maps_update_done(); | 1006 | cpu_maps_update_done(); |
565 | return err; | 1007 | return err; |
566 | } | 1008 | } |
1009 | |||
1010 | int cpu_up(unsigned int cpu) | ||
1011 | { | ||
1012 | return do_cpu_up(cpu, CPUHP_ONLINE); | ||
1013 | } | ||
567 | EXPORT_SYMBOL_GPL(cpu_up); | 1014 | EXPORT_SYMBOL_GPL(cpu_up); |
568 | 1015 | ||
569 | #ifdef CONFIG_PM_SLEEP_SMP | 1016 | #ifdef CONFIG_PM_SLEEP_SMP |
@@ -586,7 +1033,7 @@ int disable_nonboot_cpus(void) | |||
586 | if (cpu == first_cpu) | 1033 | if (cpu == first_cpu) |
587 | continue; | 1034 | continue; |
588 | trace_suspend_resume(TPS("CPU_OFF"), cpu, true); | 1035 | trace_suspend_resume(TPS("CPU_OFF"), cpu, true); |
589 | error = _cpu_down(cpu, 1); | 1036 | error = _cpu_down(cpu, 1, CPUHP_OFFLINE); |
590 | trace_suspend_resume(TPS("CPU_OFF"), cpu, false); | 1037 | trace_suspend_resume(TPS("CPU_OFF"), cpu, false); |
591 | if (!error) | 1038 | if (!error) |
592 | cpumask_set_cpu(cpu, frozen_cpus); | 1039 | cpumask_set_cpu(cpu, frozen_cpus); |
@@ -636,7 +1083,7 @@ void enable_nonboot_cpus(void) | |||
636 | 1083 | ||
637 | for_each_cpu(cpu, frozen_cpus) { | 1084 | for_each_cpu(cpu, frozen_cpus) { |
638 | trace_suspend_resume(TPS("CPU_ON"), cpu, true); | 1085 | trace_suspend_resume(TPS("CPU_ON"), cpu, true); |
639 | error = _cpu_up(cpu, 1); | 1086 | error = _cpu_up(cpu, 1, CPUHP_ONLINE); |
640 | trace_suspend_resume(TPS("CPU_ON"), cpu, false); | 1087 | trace_suspend_resume(TPS("CPU_ON"), cpu, false); |
641 | if (!error) { | 1088 | if (!error) { |
642 | pr_info("CPU%d is up\n", cpu); | 1089 | pr_info("CPU%d is up\n", cpu); |
@@ -709,26 +1156,463 @@ core_initcall(cpu_hotplug_pm_sync_init); | |||
709 | 1156 | ||
710 | #endif /* CONFIG_PM_SLEEP_SMP */ | 1157 | #endif /* CONFIG_PM_SLEEP_SMP */ |
711 | 1158 | ||
1159 | #endif /* CONFIG_SMP */ | ||
1160 | |||
1161 | /* Boot processor state steps */ | ||
1162 | static struct cpuhp_step cpuhp_bp_states[] = { | ||
1163 | [CPUHP_OFFLINE] = { | ||
1164 | .name = "offline", | ||
1165 | .startup = NULL, | ||
1166 | .teardown = NULL, | ||
1167 | }, | ||
1168 | #ifdef CONFIG_SMP | ||
1169 | [CPUHP_CREATE_THREADS]= { | ||
1170 | .name = "threads:create", | ||
1171 | .startup = smpboot_create_threads, | ||
1172 | .teardown = NULL, | ||
1173 | .cant_stop = true, | ||
1174 | }, | ||
1175 | /* | ||
1176 | * Preparatory and dead notifiers. Will be replaced once the notifiers | ||
1177 | * are converted to states. | ||
1178 | */ | ||
1179 | [CPUHP_NOTIFY_PREPARE] = { | ||
1180 | .name = "notify:prepare", | ||
1181 | .startup = notify_prepare, | ||
1182 | .teardown = notify_dead, | ||
1183 | .skip_onerr = true, | ||
1184 | .cant_stop = true, | ||
1185 | }, | ||
1186 | /* Kicks the plugged cpu into life */ | ||
1187 | [CPUHP_BRINGUP_CPU] = { | ||
1188 | .name = "cpu:bringup", | ||
1189 | .startup = bringup_cpu, | ||
1190 | .teardown = NULL, | ||
1191 | .cant_stop = true, | ||
1192 | }, | ||
1193 | /* | ||
1194 | * Handled on controll processor until the plugged processor manages | ||
1195 | * this itself. | ||
1196 | */ | ||
1197 | [CPUHP_TEARDOWN_CPU] = { | ||
1198 | .name = "cpu:teardown", | ||
1199 | .startup = NULL, | ||
1200 | .teardown = takedown_cpu, | ||
1201 | .cant_stop = true, | ||
1202 | }, | ||
1203 | #endif | ||
1204 | }; | ||
1205 | |||
1206 | /* Application processor state steps */ | ||
1207 | static struct cpuhp_step cpuhp_ap_states[] = { | ||
1208 | #ifdef CONFIG_SMP | ||
1209 | /* Final state before CPU kills itself */ | ||
1210 | [CPUHP_AP_IDLE_DEAD] = { | ||
1211 | .name = "idle:dead", | ||
1212 | }, | ||
1213 | /* | ||
1214 | * Last state before CPU enters the idle loop to die. Transient state | ||
1215 | * for synchronization. | ||
1216 | */ | ||
1217 | [CPUHP_AP_OFFLINE] = { | ||
1218 | .name = "ap:offline", | ||
1219 | .cant_stop = true, | ||
1220 | }, | ||
1221 | /* | ||
1222 | * Low level startup/teardown notifiers. Run with interrupts | ||
1223 | * disabled. Will be removed once the notifiers are converted to | ||
1224 | * states. | ||
1225 | */ | ||
1226 | [CPUHP_AP_NOTIFY_STARTING] = { | ||
1227 | .name = "notify:starting", | ||
1228 | .startup = notify_starting, | ||
1229 | .teardown = notify_dying, | ||
1230 | .skip_onerr = true, | ||
1231 | .cant_stop = true, | ||
1232 | }, | ||
1233 | /* Entry state on starting. Interrupts enabled from here on. Transient | ||
1234 | * state for synchronsization */ | ||
1235 | [CPUHP_AP_ONLINE] = { | ||
1236 | .name = "ap:online", | ||
1237 | }, | ||
1238 | /* Handle smpboot threads park/unpark */ | ||
1239 | [CPUHP_AP_SMPBOOT_THREADS] = { | ||
1240 | .name = "smpboot:threads", | ||
1241 | .startup = smpboot_unpark_threads, | ||
1242 | .teardown = NULL, | ||
1243 | }, | ||
1244 | /* | ||
1245 | * Online/down_prepare notifiers. Will be removed once the notifiers | ||
1246 | * are converted to states. | ||
1247 | */ | ||
1248 | [CPUHP_AP_NOTIFY_ONLINE] = { | ||
1249 | .name = "notify:online", | ||
1250 | .startup = notify_online, | ||
1251 | .teardown = notify_down_prepare, | ||
1252 | }, | ||
1253 | #endif | ||
1254 | /* | ||
1255 | * The dynamically registered state space is here | ||
1256 | */ | ||
1257 | |||
1258 | /* CPU is fully up and running. */ | ||
1259 | [CPUHP_ONLINE] = { | ||
1260 | .name = "online", | ||
1261 | .startup = NULL, | ||
1262 | .teardown = NULL, | ||
1263 | }, | ||
1264 | }; | ||
1265 | |||
1266 | /* Sanity check for callbacks */ | ||
1267 | static int cpuhp_cb_check(enum cpuhp_state state) | ||
1268 | { | ||
1269 | if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE) | ||
1270 | return -EINVAL; | ||
1271 | return 0; | ||
1272 | } | ||
1273 | |||
1274 | static bool cpuhp_is_ap_state(enum cpuhp_state state) | ||
1275 | { | ||
1276 | /* | ||
1277 | * The extra check for CPUHP_TEARDOWN_CPU is only for documentation | ||
1278 | * purposes as that state is handled explicitely in cpu_down. | ||
1279 | */ | ||
1280 | return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU; | ||
1281 | } | ||
1282 | |||
1283 | static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state) | ||
1284 | { | ||
1285 | struct cpuhp_step *sp; | ||
1286 | |||
1287 | sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states; | ||
1288 | return sp + state; | ||
1289 | } | ||
1290 | |||
1291 | static void cpuhp_store_callbacks(enum cpuhp_state state, | ||
1292 | const char *name, | ||
1293 | int (*startup)(unsigned int cpu), | ||
1294 | int (*teardown)(unsigned int cpu)) | ||
1295 | { | ||
1296 | /* (Un)Install the callbacks for further cpu hotplug operations */ | ||
1297 | struct cpuhp_step *sp; | ||
1298 | |||
1299 | mutex_lock(&cpuhp_state_mutex); | ||
1300 | sp = cpuhp_get_step(state); | ||
1301 | sp->startup = startup; | ||
1302 | sp->teardown = teardown; | ||
1303 | sp->name = name; | ||
1304 | mutex_unlock(&cpuhp_state_mutex); | ||
1305 | } | ||
1306 | |||
1307 | static void *cpuhp_get_teardown_cb(enum cpuhp_state state) | ||
1308 | { | ||
1309 | return cpuhp_get_step(state)->teardown; | ||
1310 | } | ||
1311 | |||
1312 | /* | ||
1313 | * Call the startup/teardown function for a step either on the AP or | ||
1314 | * on the current CPU. | ||
1315 | */ | ||
1316 | static int cpuhp_issue_call(int cpu, enum cpuhp_state state, | ||
1317 | int (*cb)(unsigned int), bool bringup) | ||
1318 | { | ||
1319 | int ret; | ||
1320 | |||
1321 | if (!cb) | ||
1322 | return 0; | ||
1323 | /* | ||
1324 | * The non AP bound callbacks can fail on bringup. On teardown | ||
1325 | * e.g. module removal we crash for now. | ||
1326 | */ | ||
1327 | #ifdef CONFIG_SMP | ||
1328 | if (cpuhp_is_ap_state(state)) | ||
1329 | ret = cpuhp_invoke_ap_callback(cpu, state, cb); | ||
1330 | else | ||
1331 | ret = cpuhp_invoke_callback(cpu, state, cb); | ||
1332 | #else | ||
1333 | ret = cpuhp_invoke_callback(cpu, state, cb); | ||
1334 | #endif | ||
1335 | BUG_ON(ret && !bringup); | ||
1336 | return ret; | ||
1337 | } | ||
1338 | |||
1339 | /* | ||
1340 | * Called from __cpuhp_setup_state on a recoverable failure. | ||
1341 | * | ||
1342 | * Note: The teardown callbacks for rollback are not allowed to fail! | ||
1343 | */ | ||
1344 | static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state, | ||
1345 | int (*teardown)(unsigned int cpu)) | ||
1346 | { | ||
1347 | int cpu; | ||
1348 | |||
1349 | if (!teardown) | ||
1350 | return; | ||
1351 | |||
1352 | /* Roll back the already executed steps on the other cpus */ | ||
1353 | for_each_present_cpu(cpu) { | ||
1354 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | ||
1355 | int cpustate = st->state; | ||
1356 | |||
1357 | if (cpu >= failedcpu) | ||
1358 | break; | ||
1359 | |||
1360 | /* Did we invoke the startup call on that cpu ? */ | ||
1361 | if (cpustate >= state) | ||
1362 | cpuhp_issue_call(cpu, state, teardown, false); | ||
1363 | } | ||
1364 | } | ||
1365 | |||
1366 | /* | ||
1367 | * Returns a free for dynamic slot assignment of the Online state. The states | ||
1368 | * are protected by the cpuhp_slot_states mutex and an empty slot is identified | ||
1369 | * by having no name assigned. | ||
1370 | */ | ||
1371 | static int cpuhp_reserve_state(enum cpuhp_state state) | ||
1372 | { | ||
1373 | enum cpuhp_state i; | ||
1374 | |||
1375 | mutex_lock(&cpuhp_state_mutex); | ||
1376 | for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) { | ||
1377 | if (cpuhp_ap_states[i].name) | ||
1378 | continue; | ||
1379 | |||
1380 | cpuhp_ap_states[i].name = "Reserved"; | ||
1381 | mutex_unlock(&cpuhp_state_mutex); | ||
1382 | return i; | ||
1383 | } | ||
1384 | mutex_unlock(&cpuhp_state_mutex); | ||
1385 | WARN(1, "No more dynamic states available for CPU hotplug\n"); | ||
1386 | return -ENOSPC; | ||
1387 | } | ||
1388 | |||
712 | /** | 1389 | /** |
713 | * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers | 1390 | * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state |
714 | * @cpu: cpu that just started | 1391 | * @state: The state to setup |
1392 | * @invoke: If true, the startup function is invoked for cpus where | ||
1393 | * cpu state >= @state | ||
1394 | * @startup: startup callback function | ||
1395 | * @teardown: teardown callback function | ||
715 | * | 1396 | * |
716 | * This function calls the cpu_chain notifiers with CPU_STARTING. | 1397 | * Returns 0 if successful, otherwise a proper error code |
717 | * It must be called by the arch code on the new cpu, before the new cpu | ||
718 | * enables interrupts and before the "boot" cpu returns from __cpu_up(). | ||
719 | */ | 1398 | */ |
720 | void notify_cpu_starting(unsigned int cpu) | 1399 | int __cpuhp_setup_state(enum cpuhp_state state, |
1400 | const char *name, bool invoke, | ||
1401 | int (*startup)(unsigned int cpu), | ||
1402 | int (*teardown)(unsigned int cpu)) | ||
721 | { | 1403 | { |
722 | unsigned long val = CPU_STARTING; | 1404 | int cpu, ret = 0; |
1405 | int dyn_state = 0; | ||
723 | 1406 | ||
724 | #ifdef CONFIG_PM_SLEEP_SMP | 1407 | if (cpuhp_cb_check(state) || !name) |
725 | if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus)) | 1408 | return -EINVAL; |
726 | val = CPU_STARTING_FROZEN; | 1409 | |
727 | #endif /* CONFIG_PM_SLEEP_SMP */ | 1410 | get_online_cpus(); |
728 | cpu_notify(val, (void *)(long)cpu); | 1411 | |
1412 | /* currently assignments for the ONLINE state are possible */ | ||
1413 | if (state == CPUHP_AP_ONLINE_DYN) { | ||
1414 | dyn_state = 1; | ||
1415 | ret = cpuhp_reserve_state(state); | ||
1416 | if (ret < 0) | ||
1417 | goto out; | ||
1418 | state = ret; | ||
1419 | } | ||
1420 | |||
1421 | cpuhp_store_callbacks(state, name, startup, teardown); | ||
1422 | |||
1423 | if (!invoke || !startup) | ||
1424 | goto out; | ||
1425 | |||
1426 | /* | ||
1427 | * Try to call the startup callback for each present cpu | ||
1428 | * depending on the hotplug state of the cpu. | ||
1429 | */ | ||
1430 | for_each_present_cpu(cpu) { | ||
1431 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | ||
1432 | int cpustate = st->state; | ||
1433 | |||
1434 | if (cpustate < state) | ||
1435 | continue; | ||
1436 | |||
1437 | ret = cpuhp_issue_call(cpu, state, startup, true); | ||
1438 | if (ret) { | ||
1439 | cpuhp_rollback_install(cpu, state, teardown); | ||
1440 | cpuhp_store_callbacks(state, NULL, NULL, NULL); | ||
1441 | goto out; | ||
1442 | } | ||
1443 | } | ||
1444 | out: | ||
1445 | put_online_cpus(); | ||
1446 | if (!ret && dyn_state) | ||
1447 | return state; | ||
1448 | return ret; | ||
729 | } | 1449 | } |
1450 | EXPORT_SYMBOL(__cpuhp_setup_state); | ||
730 | 1451 | ||
731 | #endif /* CONFIG_SMP */ | 1452 | /** |
1453 | * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state | ||
1454 | * @state: The state to remove | ||
1455 | * @invoke: If true, the teardown function is invoked for cpus where | ||
1456 | * cpu state >= @state | ||
1457 | * | ||
1458 | * The teardown callback is currently not allowed to fail. Think | ||
1459 | * about module removal! | ||
1460 | */ | ||
1461 | void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) | ||
1462 | { | ||
1463 | int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state); | ||
1464 | int cpu; | ||
1465 | |||
1466 | BUG_ON(cpuhp_cb_check(state)); | ||
1467 | |||
1468 | get_online_cpus(); | ||
1469 | |||
1470 | if (!invoke || !teardown) | ||
1471 | goto remove; | ||
1472 | |||
1473 | /* | ||
1474 | * Call the teardown callback for each present cpu depending | ||
1475 | * on the hotplug state of the cpu. This function is not | ||
1476 | * allowed to fail currently! | ||
1477 | */ | ||
1478 | for_each_present_cpu(cpu) { | ||
1479 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | ||
1480 | int cpustate = st->state; | ||
1481 | |||
1482 | if (cpustate >= state) | ||
1483 | cpuhp_issue_call(cpu, state, teardown, false); | ||
1484 | } | ||
1485 | remove: | ||
1486 | cpuhp_store_callbacks(state, NULL, NULL, NULL); | ||
1487 | put_online_cpus(); | ||
1488 | } | ||
1489 | EXPORT_SYMBOL(__cpuhp_remove_state); | ||
1490 | |||
1491 | #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU) | ||
1492 | static ssize_t show_cpuhp_state(struct device *dev, | ||
1493 | struct device_attribute *attr, char *buf) | ||
1494 | { | ||
1495 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); | ||
1496 | |||
1497 | return sprintf(buf, "%d\n", st->state); | ||
1498 | } | ||
1499 | static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL); | ||
1500 | |||
1501 | static ssize_t write_cpuhp_target(struct device *dev, | ||
1502 | struct device_attribute *attr, | ||
1503 | const char *buf, size_t count) | ||
1504 | { | ||
1505 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); | ||
1506 | struct cpuhp_step *sp; | ||
1507 | int target, ret; | ||
1508 | |||
1509 | ret = kstrtoint(buf, 10, &target); | ||
1510 | if (ret) | ||
1511 | return ret; | ||
1512 | |||
1513 | #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL | ||
1514 | if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE) | ||
1515 | return -EINVAL; | ||
1516 | #else | ||
1517 | if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE) | ||
1518 | return -EINVAL; | ||
1519 | #endif | ||
1520 | |||
1521 | ret = lock_device_hotplug_sysfs(); | ||
1522 | if (ret) | ||
1523 | return ret; | ||
1524 | |||
1525 | mutex_lock(&cpuhp_state_mutex); | ||
1526 | sp = cpuhp_get_step(target); | ||
1527 | ret = !sp->name || sp->cant_stop ? -EINVAL : 0; | ||
1528 | mutex_unlock(&cpuhp_state_mutex); | ||
1529 | if (ret) | ||
1530 | return ret; | ||
1531 | |||
1532 | if (st->state < target) | ||
1533 | ret = do_cpu_up(dev->id, target); | ||
1534 | else | ||
1535 | ret = do_cpu_down(dev->id, target); | ||
1536 | |||
1537 | unlock_device_hotplug(); | ||
1538 | return ret ? ret : count; | ||
1539 | } | ||
1540 | |||
1541 | static ssize_t show_cpuhp_target(struct device *dev, | ||
1542 | struct device_attribute *attr, char *buf) | ||
1543 | { | ||
1544 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); | ||
1545 | |||
1546 | return sprintf(buf, "%d\n", st->target); | ||
1547 | } | ||
1548 | static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target); | ||
1549 | |||
1550 | static struct attribute *cpuhp_cpu_attrs[] = { | ||
1551 | &dev_attr_state.attr, | ||
1552 | &dev_attr_target.attr, | ||
1553 | NULL | ||
1554 | }; | ||
1555 | |||
1556 | static struct attribute_group cpuhp_cpu_attr_group = { | ||
1557 | .attrs = cpuhp_cpu_attrs, | ||
1558 | .name = "hotplug", | ||
1559 | NULL | ||
1560 | }; | ||
1561 | |||
1562 | static ssize_t show_cpuhp_states(struct device *dev, | ||
1563 | struct device_attribute *attr, char *buf) | ||
1564 | { | ||
1565 | ssize_t cur, res = 0; | ||
1566 | int i; | ||
1567 | |||
1568 | mutex_lock(&cpuhp_state_mutex); | ||
1569 | for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) { | ||
1570 | struct cpuhp_step *sp = cpuhp_get_step(i); | ||
1571 | |||
1572 | if (sp->name) { | ||
1573 | cur = sprintf(buf, "%3d: %s\n", i, sp->name); | ||
1574 | buf += cur; | ||
1575 | res += cur; | ||
1576 | } | ||
1577 | } | ||
1578 | mutex_unlock(&cpuhp_state_mutex); | ||
1579 | return res; | ||
1580 | } | ||
1581 | static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL); | ||
1582 | |||
1583 | static struct attribute *cpuhp_cpu_root_attrs[] = { | ||
1584 | &dev_attr_states.attr, | ||
1585 | NULL | ||
1586 | }; | ||
1587 | |||
1588 | static struct attribute_group cpuhp_cpu_root_attr_group = { | ||
1589 | .attrs = cpuhp_cpu_root_attrs, | ||
1590 | .name = "hotplug", | ||
1591 | NULL | ||
1592 | }; | ||
1593 | |||
1594 | static int __init cpuhp_sysfs_init(void) | ||
1595 | { | ||
1596 | int cpu, ret; | ||
1597 | |||
1598 | ret = sysfs_create_group(&cpu_subsys.dev_root->kobj, | ||
1599 | &cpuhp_cpu_root_attr_group); | ||
1600 | if (ret) | ||
1601 | return ret; | ||
1602 | |||
1603 | for_each_possible_cpu(cpu) { | ||
1604 | struct device *dev = get_cpu_device(cpu); | ||
1605 | |||
1606 | if (!dev) | ||
1607 | continue; | ||
1608 | ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group); | ||
1609 | if (ret) | ||
1610 | return ret; | ||
1611 | } | ||
1612 | return 0; | ||
1613 | } | ||
1614 | device_initcall(cpuhp_sysfs_init); | ||
1615 | #endif | ||
732 | 1616 | ||
733 | /* | 1617 | /* |
734 | * cpu_bit_bitmap[] is a special, "compressed" data structure that | 1618 | * cpu_bit_bitmap[] is a special, "compressed" data structure that |
@@ -789,3 +1673,25 @@ void init_cpu_online(const struct cpumask *src) | |||
789 | { | 1673 | { |
790 | cpumask_copy(&__cpu_online_mask, src); | 1674 | cpumask_copy(&__cpu_online_mask, src); |
791 | } | 1675 | } |
1676 | |||
1677 | /* | ||
1678 | * Activate the first processor. | ||
1679 | */ | ||
1680 | void __init boot_cpu_init(void) | ||
1681 | { | ||
1682 | int cpu = smp_processor_id(); | ||
1683 | |||
1684 | /* Mark the boot cpu "present", "online" etc for SMP and UP case */ | ||
1685 | set_cpu_online(cpu, true); | ||
1686 | set_cpu_active(cpu, true); | ||
1687 | set_cpu_present(cpu, true); | ||
1688 | set_cpu_possible(cpu, true); | ||
1689 | } | ||
1690 | |||
1691 | /* | ||
1692 | * Must be called _AFTER_ setting up the per_cpu areas | ||
1693 | */ | ||
1694 | void __init boot_cpu_state_init(void) | ||
1695 | { | ||
1696 | per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE; | ||
1697 | } | ||
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 0167679182c0..5f6ce931f1ea 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -1178,6 +1178,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) | |||
1178 | goto free_area; | 1178 | goto free_area; |
1179 | 1179 | ||
1180 | area->xol_mapping.name = "[uprobes]"; | 1180 | area->xol_mapping.name = "[uprobes]"; |
1181 | area->xol_mapping.fault = NULL; | ||
1181 | area->xol_mapping.pages = area->pages; | 1182 | area->xol_mapping.pages = area->pages; |
1182 | area->pages[0] = alloc_page(GFP_HIGHUSER); | 1183 | area->pages[0] = alloc_page(GFP_HIGHUSER); |
1183 | if (!area->pages[0]) | 1184 | if (!area->pages[0]) |
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 3b48dab80164..3bbfd6a9c475 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig | |||
@@ -64,6 +64,10 @@ config IRQ_DOMAIN_HIERARCHY | |||
64 | bool | 64 | bool |
65 | select IRQ_DOMAIN | 65 | select IRQ_DOMAIN |
66 | 66 | ||
67 | # Generic IRQ IPI support | ||
68 | config GENERIC_IRQ_IPI | ||
69 | bool | ||
70 | |||
67 | # Generic MSI interrupt support | 71 | # Generic MSI interrupt support |
68 | config GENERIC_MSI_IRQ | 72 | config GENERIC_MSI_IRQ |
69 | bool | 73 | bool |
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 2fc9cbdf35b6..2ee42e95a3ce 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile | |||
@@ -8,3 +8,4 @@ obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o | |||
8 | obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o | 8 | obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o |
9 | obj-$(CONFIG_PM_SLEEP) += pm.o | 9 | obj-$(CONFIG_PM_SLEEP) += pm.o |
10 | obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o | 10 | obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o |
11 | obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o | ||
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 5797909f4e5b..2f9f2b0e79f2 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -961,6 +961,7 @@ void irq_chip_mask_parent(struct irq_data *data) | |||
961 | data = data->parent_data; | 961 | data = data->parent_data; |
962 | data->chip->irq_mask(data); | 962 | data->chip->irq_mask(data); |
963 | } | 963 | } |
964 | EXPORT_SYMBOL_GPL(irq_chip_mask_parent); | ||
964 | 965 | ||
965 | /** | 966 | /** |
966 | * irq_chip_unmask_parent - Unmask the parent interrupt | 967 | * irq_chip_unmask_parent - Unmask the parent interrupt |
@@ -971,6 +972,7 @@ void irq_chip_unmask_parent(struct irq_data *data) | |||
971 | data = data->parent_data; | 972 | data = data->parent_data; |
972 | data->chip->irq_unmask(data); | 973 | data->chip->irq_unmask(data); |
973 | } | 974 | } |
975 | EXPORT_SYMBOL_GPL(irq_chip_unmask_parent); | ||
974 | 976 | ||
975 | /** | 977 | /** |
976 | * irq_chip_eoi_parent - Invoke EOI on the parent interrupt | 978 | * irq_chip_eoi_parent - Invoke EOI on the parent interrupt |
@@ -981,6 +983,7 @@ void irq_chip_eoi_parent(struct irq_data *data) | |||
981 | data = data->parent_data; | 983 | data = data->parent_data; |
982 | data->chip->irq_eoi(data); | 984 | data->chip->irq_eoi(data); |
983 | } | 985 | } |
986 | EXPORT_SYMBOL_GPL(irq_chip_eoi_parent); | ||
984 | 987 | ||
985 | /** | 988 | /** |
986 | * irq_chip_set_affinity_parent - Set affinity on the parent interrupt | 989 | * irq_chip_set_affinity_parent - Set affinity on the parent interrupt |
@@ -1016,6 +1019,7 @@ int irq_chip_set_type_parent(struct irq_data *data, unsigned int type) | |||
1016 | 1019 | ||
1017 | return -ENOSYS; | 1020 | return -ENOSYS; |
1018 | } | 1021 | } |
1022 | EXPORT_SYMBOL_GPL(irq_chip_set_type_parent); | ||
1019 | 1023 | ||
1020 | /** | 1024 | /** |
1021 | * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware | 1025 | * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 57bff7857e87..a15b5485b446 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -136,10 +136,9 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) | |||
136 | { | 136 | { |
137 | irqreturn_t retval = IRQ_NONE; | 137 | irqreturn_t retval = IRQ_NONE; |
138 | unsigned int flags = 0, irq = desc->irq_data.irq; | 138 | unsigned int flags = 0, irq = desc->irq_data.irq; |
139 | struct irqaction *action = desc->action; | 139 | struct irqaction *action; |
140 | 140 | ||
141 | /* action might have become NULL since we dropped the lock */ | 141 | for_each_action_of_desc(desc, action) { |
142 | while (action) { | ||
143 | irqreturn_t res; | 142 | irqreturn_t res; |
144 | 143 | ||
145 | trace_irq_handler_entry(irq, action); | 144 | trace_irq_handler_entry(irq, action); |
@@ -173,7 +172,6 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) | |||
173 | } | 172 | } |
174 | 173 | ||
175 | retval |= res; | 174 | retval |= res; |
176 | action = action->next; | ||
177 | } | 175 | } |
178 | 176 | ||
179 | add_interrupt_randomness(irq, flags); | 177 | add_interrupt_randomness(irq, flags); |
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index fcab63c66905..09be2c903c6d 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
@@ -131,6 +131,9 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc) | |||
131 | #define IRQ_GET_DESC_CHECK_GLOBAL (_IRQ_DESC_CHECK) | 131 | #define IRQ_GET_DESC_CHECK_GLOBAL (_IRQ_DESC_CHECK) |
132 | #define IRQ_GET_DESC_CHECK_PERCPU (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU) | 132 | #define IRQ_GET_DESC_CHECK_PERCPU (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU) |
133 | 133 | ||
134 | #define for_each_action_of_desc(desc, act) \ | ||
135 | for (act = desc->act; act; act = act->next) | ||
136 | |||
134 | struct irq_desc * | 137 | struct irq_desc * |
135 | __irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, | 138 | __irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, |
136 | unsigned int check); | 139 | unsigned int check); |
@@ -160,6 +163,8 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags) | |||
160 | __irq_put_desc_unlock(desc, flags, false); | 163 | __irq_put_desc_unlock(desc, flags, false); |
161 | } | 164 | } |
162 | 165 | ||
166 | #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) | ||
167 | |||
163 | /* | 168 | /* |
164 | * Manipulation functions for irq_data.state | 169 | * Manipulation functions for irq_data.state |
165 | */ | 170 | */ |
@@ -188,6 +193,8 @@ static inline bool irqd_has_set(struct irq_data *d, unsigned int mask) | |||
188 | return __irqd_to_state(d) & mask; | 193 | return __irqd_to_state(d) & mask; |
189 | } | 194 | } |
190 | 195 | ||
196 | #undef __irqd_to_state | ||
197 | |||
191 | static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) | 198 | static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) |
192 | { | 199 | { |
193 | __this_cpu_inc(*desc->kstat_irqs); | 200 | __this_cpu_inc(*desc->kstat_irqs); |
diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c new file mode 100644 index 000000000000..c37f34b00a11 --- /dev/null +++ b/kernel/irq/ipi.c | |||
@@ -0,0 +1,326 @@ | |||
1 | /* | ||
2 | * linux/kernel/irq/ipi.c | ||
3 | * | ||
4 | * Copyright (C) 2015 Imagination Technologies Ltd | ||
5 | * Author: Qais Yousef <qais.yousef@imgtec.com> | ||
6 | * | ||
7 | * This file contains driver APIs to the IPI subsystem. | ||
8 | */ | ||
9 | |||
10 | #define pr_fmt(fmt) "genirq/ipi: " fmt | ||
11 | |||
12 | #include <linux/irqdomain.h> | ||
13 | #include <linux/irq.h> | ||
14 | |||
15 | /** | ||
16 | * irq_reserve_ipi() - Setup an IPI to destination cpumask | ||
17 | * @domain: IPI domain | ||
18 | * @dest: cpumask of cpus which can receive the IPI | ||
19 | * | ||
20 | * Allocate a virq that can be used to send IPI to any CPU in dest mask. | ||
21 | * | ||
22 | * On success it'll return linux irq number and 0 on failure | ||
23 | */ | ||
24 | unsigned int irq_reserve_ipi(struct irq_domain *domain, | ||
25 | const struct cpumask *dest) | ||
26 | { | ||
27 | unsigned int nr_irqs, offset; | ||
28 | struct irq_data *data; | ||
29 | int virq, i; | ||
30 | |||
31 | if (!domain ||!irq_domain_is_ipi(domain)) { | ||
32 | pr_warn("Reservation on a non IPI domain\n"); | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | if (!cpumask_subset(dest, cpu_possible_mask)) { | ||
37 | pr_warn("Reservation is not in possible_cpu_mask\n"); | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | nr_irqs = cpumask_weight(dest); | ||
42 | if (!nr_irqs) { | ||
43 | pr_warn("Reservation for empty destination mask\n"); | ||
44 | return 0; | ||
45 | } | ||
46 | |||
47 | if (irq_domain_is_ipi_single(domain)) { | ||
48 | /* | ||
49 | * If the underlying implementation uses a single HW irq on | ||
50 | * all cpus then we only need a single Linux irq number for | ||
51 | * it. We have no restrictions vs. the destination mask. The | ||
52 | * underlying implementation can deal with holes nicely. | ||
53 | */ | ||
54 | nr_irqs = 1; | ||
55 | offset = 0; | ||
56 | } else { | ||
57 | unsigned int next; | ||
58 | |||
59 | /* | ||
60 | * The IPI requires a seperate HW irq on each CPU. We require | ||
61 | * that the destination mask is consecutive. If an | ||
62 | * implementation needs to support holes, it can reserve | ||
63 | * several IPI ranges. | ||
64 | */ | ||
65 | offset = cpumask_first(dest); | ||
66 | /* | ||
67 | * Find a hole and if found look for another set bit after the | ||
68 | * hole. For now we don't support this scenario. | ||
69 | */ | ||
70 | next = cpumask_next_zero(offset, dest); | ||
71 | if (next < nr_cpu_ids) | ||
72 | next = cpumask_next(next, dest); | ||
73 | if (next < nr_cpu_ids) { | ||
74 | pr_warn("Destination mask has holes\n"); | ||
75 | return 0; | ||
76 | } | ||
77 | } | ||
78 | |||
79 | virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE); | ||
80 | if (virq <= 0) { | ||
81 | pr_warn("Can't reserve IPI, failed to alloc descs\n"); | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | virq = __irq_domain_alloc_irqs(domain, virq, nr_irqs, NUMA_NO_NODE, | ||
86 | (void *) dest, true); | ||
87 | |||
88 | if (virq <= 0) { | ||
89 | pr_warn("Can't reserve IPI, failed to alloc hw irqs\n"); | ||
90 | goto free_descs; | ||
91 | } | ||
92 | |||
93 | for (i = 0; i < nr_irqs; i++) { | ||
94 | data = irq_get_irq_data(virq + i); | ||
95 | cpumask_copy(data->common->affinity, dest); | ||
96 | data->common->ipi_offset = offset; | ||
97 | } | ||
98 | return virq; | ||
99 | |||
100 | free_descs: | ||
101 | irq_free_descs(virq, nr_irqs); | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * irq_destroy_ipi() - unreserve an IPI that was previously allocated | ||
107 | * @irq: linux irq number to be destroyed | ||
108 | * | ||
109 | * Return the IPIs allocated with irq_reserve_ipi() to the system destroying | ||
110 | * all virqs associated with them. | ||
111 | */ | ||
112 | void irq_destroy_ipi(unsigned int irq) | ||
113 | { | ||
114 | struct irq_data *data = irq_get_irq_data(irq); | ||
115 | struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL; | ||
116 | struct irq_domain *domain; | ||
117 | unsigned int nr_irqs; | ||
118 | |||
119 | if (!irq || !data || !ipimask) | ||
120 | return; | ||
121 | |||
122 | domain = data->domain; | ||
123 | if (WARN_ON(domain == NULL)) | ||
124 | return; | ||
125 | |||
126 | if (!irq_domain_is_ipi(domain)) { | ||
127 | pr_warn("Trying to destroy a non IPI domain!\n"); | ||
128 | return; | ||
129 | } | ||
130 | |||
131 | if (irq_domain_is_ipi_per_cpu(domain)) | ||
132 | nr_irqs = cpumask_weight(ipimask); | ||
133 | else | ||
134 | nr_irqs = 1; | ||
135 | |||
136 | irq_domain_free_irqs(irq, nr_irqs); | ||
137 | } | ||
138 | |||
139 | /** | ||
140 | * ipi_get_hwirq - Get the hwirq associated with an IPI to a cpu | ||
141 | * @irq: linux irq number | ||
142 | * @cpu: the target cpu | ||
143 | * | ||
144 | * When dealing with coprocessors IPI, we need to inform the coprocessor of | ||
145 | * the hwirq it needs to use to receive and send IPIs. | ||
146 | * | ||
147 | * Returns hwirq value on success and INVALID_HWIRQ on failure. | ||
148 | */ | ||
149 | irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu) | ||
150 | { | ||
151 | struct irq_data *data = irq_get_irq_data(irq); | ||
152 | struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL; | ||
153 | |||
154 | if (!data || !ipimask || cpu > nr_cpu_ids) | ||
155 | return INVALID_HWIRQ; | ||
156 | |||
157 | if (!cpumask_test_cpu(cpu, ipimask)) | ||
158 | return INVALID_HWIRQ; | ||
159 | |||
160 | /* | ||
161 | * Get the real hardware irq number if the underlying implementation | ||
162 | * uses a seperate irq per cpu. If the underlying implementation uses | ||
163 | * a single hardware irq for all cpus then the IPI send mechanism | ||
164 | * needs to take care of the cpu destinations. | ||
165 | */ | ||
166 | if (irq_domain_is_ipi_per_cpu(data->domain)) | ||
167 | data = irq_get_irq_data(irq + cpu - data->common->ipi_offset); | ||
168 | |||
169 | return data ? irqd_to_hwirq(data) : INVALID_HWIRQ; | ||
170 | } | ||
171 | EXPORT_SYMBOL_GPL(ipi_get_hwirq); | ||
172 | |||
173 | static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data, | ||
174 | const struct cpumask *dest, unsigned int cpu) | ||
175 | { | ||
176 | struct cpumask *ipimask = irq_data_get_affinity_mask(data); | ||
177 | |||
178 | if (!chip || !ipimask) | ||
179 | return -EINVAL; | ||
180 | |||
181 | if (!chip->ipi_send_single && !chip->ipi_send_mask) | ||
182 | return -EINVAL; | ||
183 | |||
184 | if (cpu > nr_cpu_ids) | ||
185 | return -EINVAL; | ||
186 | |||
187 | if (dest) { | ||
188 | if (!cpumask_subset(dest, ipimask)) | ||
189 | return -EINVAL; | ||
190 | } else { | ||
191 | if (!cpumask_test_cpu(cpu, ipimask)) | ||
192 | return -EINVAL; | ||
193 | } | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | /** | ||
198 | * __ipi_send_single - send an IPI to a target Linux SMP CPU | ||
199 | * @desc: pointer to irq_desc of the IRQ | ||
200 | * @cpu: destination CPU, must in the destination mask passed to | ||
201 | * irq_reserve_ipi() | ||
202 | * | ||
203 | * This function is for architecture or core code to speed up IPI sending. Not | ||
204 | * usable from driver code. | ||
205 | * | ||
206 | * Returns zero on success and negative error number on failure. | ||
207 | */ | ||
208 | int __ipi_send_single(struct irq_desc *desc, unsigned int cpu) | ||
209 | { | ||
210 | struct irq_data *data = irq_desc_get_irq_data(desc); | ||
211 | struct irq_chip *chip = irq_data_get_irq_chip(data); | ||
212 | |||
213 | #ifdef DEBUG | ||
214 | /* | ||
215 | * Minimise the overhead by omitting the checks for Linux SMP IPIs. | ||
216 | * Since the callers should be arch or core code which is generally | ||
217 | * trusted, only check for errors when debugging. | ||
218 | */ | ||
219 | if (WARN_ON_ONCE(ipi_send_verify(chip, data, NULL, cpu))) | ||
220 | return -EINVAL; | ||
221 | #endif | ||
222 | if (!chip->ipi_send_single) { | ||
223 | chip->ipi_send_mask(data, cpumask_of(cpu)); | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | /* FIXME: Store this information in irqdata flags */ | ||
228 | if (irq_domain_is_ipi_per_cpu(data->domain) && | ||
229 | cpu != data->common->ipi_offset) { | ||
230 | /* use the correct data for that cpu */ | ||
231 | unsigned irq = data->irq + cpu - data->common->ipi_offset; | ||
232 | |||
233 | data = irq_get_irq_data(irq); | ||
234 | } | ||
235 | chip->ipi_send_single(data, cpu); | ||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * ipi_send_mask - send an IPI to target Linux SMP CPU(s) | ||
241 | * @desc: pointer to irq_desc of the IRQ | ||
242 | * @dest: dest CPU(s), must be a subset of the mask passed to | ||
243 | * irq_reserve_ipi() | ||
244 | * | ||
245 | * This function is for architecture or core code to speed up IPI sending. Not | ||
246 | * usable from driver code. | ||
247 | * | ||
248 | * Returns zero on success and negative error number on failure. | ||
249 | */ | ||
250 | int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest) | ||
251 | { | ||
252 | struct irq_data *data = irq_desc_get_irq_data(desc); | ||
253 | struct irq_chip *chip = irq_data_get_irq_chip(data); | ||
254 | unsigned int cpu; | ||
255 | |||
256 | #ifdef DEBUG | ||
257 | /* | ||
258 | * Minimise the overhead by omitting the checks for Linux SMP IPIs. | ||
259 | * Since the callers should be arch or core code which is generally | ||
260 | * trusted, only check for errors when debugging. | ||
261 | */ | ||
262 | if (WARN_ON_ONCE(ipi_send_verify(chip, data, dest, 0))) | ||
263 | return -EINVAL; | ||
264 | #endif | ||
265 | if (chip->ipi_send_mask) { | ||
266 | chip->ipi_send_mask(data, dest); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | if (irq_domain_is_ipi_per_cpu(data->domain)) { | ||
271 | unsigned int base = data->irq; | ||
272 | |||
273 | for_each_cpu(cpu, dest) { | ||
274 | unsigned irq = base + cpu - data->common->ipi_offset; | ||
275 | |||
276 | data = irq_get_irq_data(irq); | ||
277 | chip->ipi_send_single(data, cpu); | ||
278 | } | ||
279 | } else { | ||
280 | for_each_cpu(cpu, dest) | ||
281 | chip->ipi_send_single(data, cpu); | ||
282 | } | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | /** | ||
287 | * ipi_send_single - Send an IPI to a single CPU | ||
288 | * @virq: linux irq number from irq_reserve_ipi() | ||
289 | * @cpu: destination CPU, must in the destination mask passed to | ||
290 | * irq_reserve_ipi() | ||
291 | * | ||
292 | * Returns zero on success and negative error number on failure. | ||
293 | */ | ||
294 | int ipi_send_single(unsigned int virq, unsigned int cpu) | ||
295 | { | ||
296 | struct irq_desc *desc = irq_to_desc(virq); | ||
297 | struct irq_data *data = desc ? irq_desc_get_irq_data(desc) : NULL; | ||
298 | struct irq_chip *chip = data ? irq_data_get_irq_chip(data) : NULL; | ||
299 | |||
300 | if (WARN_ON_ONCE(ipi_send_verify(chip, data, NULL, cpu))) | ||
301 | return -EINVAL; | ||
302 | |||
303 | return __ipi_send_single(desc, cpu); | ||
304 | } | ||
305 | EXPORT_SYMBOL_GPL(ipi_send_single); | ||
306 | |||
307 | /** | ||
308 | * ipi_send_mask - Send an IPI to target CPU(s) | ||
309 | * @virq: linux irq number from irq_reserve_ipi() | ||
310 | * @dest: dest CPU(s), must be a subset of the mask passed to | ||
311 | * irq_reserve_ipi() | ||
312 | * | ||
313 | * Returns zero on success and negative error number on failure. | ||
314 | */ | ||
315 | int ipi_send_mask(unsigned int virq, const struct cpumask *dest) | ||
316 | { | ||
317 | struct irq_desc *desc = irq_to_desc(virq); | ||
318 | struct irq_data *data = desc ? irq_desc_get_irq_data(desc) : NULL; | ||
319 | struct irq_chip *chip = data ? irq_data_get_irq_chip(data) : NULL; | ||
320 | |||
321 | if (WARN_ON_ONCE(ipi_send_verify(chip, data, dest, 0))) | ||
322 | return -EINVAL; | ||
323 | |||
324 | return __ipi_send_mask(desc, dest); | ||
325 | } | ||
326 | EXPORT_SYMBOL_GPL(ipi_send_mask); | ||
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 0409da0bcc33..0ccd028817d7 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -24,10 +24,27 @@ | |||
24 | static struct lock_class_key irq_desc_lock_class; | 24 | static struct lock_class_key irq_desc_lock_class; |
25 | 25 | ||
26 | #if defined(CONFIG_SMP) | 26 | #if defined(CONFIG_SMP) |
27 | static int __init irq_affinity_setup(char *str) | ||
28 | { | ||
29 | zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); | ||
30 | cpulist_parse(str, irq_default_affinity); | ||
31 | /* | ||
32 | * Set at least the boot cpu. We don't want to end up with | ||
33 | * bugreports caused by random comandline masks | ||
34 | */ | ||
35 | cpumask_set_cpu(smp_processor_id(), irq_default_affinity); | ||
36 | return 1; | ||
37 | } | ||
38 | __setup("irqaffinity=", irq_affinity_setup); | ||
39 | |||
27 | static void __init init_irq_default_affinity(void) | 40 | static void __init init_irq_default_affinity(void) |
28 | { | 41 | { |
29 | alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); | 42 | #ifdef CONFIG_CPUMASK_OFFSTACK |
30 | cpumask_setall(irq_default_affinity); | 43 | if (!irq_default_affinity) |
44 | zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); | ||
45 | #endif | ||
46 | if (cpumask_empty(irq_default_affinity)) | ||
47 | cpumask_setall(irq_default_affinity); | ||
31 | } | 48 | } |
32 | #else | 49 | #else |
33 | static void __init init_irq_default_affinity(void) | 50 | static void __init init_irq_default_affinity(void) |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 3e56d2f03e24..3a519a01118b 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
@@ -23,8 +23,6 @@ static DEFINE_MUTEX(irq_domain_mutex); | |||
23 | static DEFINE_MUTEX(revmap_trees_mutex); | 23 | static DEFINE_MUTEX(revmap_trees_mutex); |
24 | static struct irq_domain *irq_default_domain; | 24 | static struct irq_domain *irq_default_domain; |
25 | 25 | ||
26 | static int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, | ||
27 | irq_hw_number_t hwirq, int node); | ||
28 | static void irq_domain_check_hierarchy(struct irq_domain *domain); | 26 | static void irq_domain_check_hierarchy(struct irq_domain *domain); |
29 | 27 | ||
30 | struct irqchip_fwid { | 28 | struct irqchip_fwid { |
@@ -840,8 +838,8 @@ const struct irq_domain_ops irq_domain_simple_ops = { | |||
840 | }; | 838 | }; |
841 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | 839 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); |
842 | 840 | ||
843 | static int irq_domain_alloc_descs(int virq, unsigned int cnt, | 841 | int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, |
844 | irq_hw_number_t hwirq, int node) | 842 | int node) |
845 | { | 843 | { |
846 | unsigned int hint; | 844 | unsigned int hint; |
847 | 845 | ||
@@ -895,6 +893,7 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, | |||
895 | 893 | ||
896 | return domain; | 894 | return domain; |
897 | } | 895 | } |
896 | EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy); | ||
898 | 897 | ||
899 | static void irq_domain_insert_irq(int virq) | 898 | static void irq_domain_insert_irq(int virq) |
900 | { | 899 | { |
@@ -1045,6 +1044,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, | |||
1045 | 1044 | ||
1046 | return 0; | 1045 | return 0; |
1047 | } | 1046 | } |
1047 | EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip); | ||
1048 | 1048 | ||
1049 | /** | 1049 | /** |
1050 | * irq_domain_set_info - Set the complete data for a @virq in @domain | 1050 | * irq_domain_set_info - Set the complete data for a @virq in @domain |
@@ -1078,6 +1078,7 @@ void irq_domain_reset_irq_data(struct irq_data *irq_data) | |||
1078 | irq_data->chip = &no_irq_chip; | 1078 | irq_data->chip = &no_irq_chip; |
1079 | irq_data->chip_data = NULL; | 1079 | irq_data->chip_data = NULL; |
1080 | } | 1080 | } |
1081 | EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data); | ||
1081 | 1082 | ||
1082 | /** | 1083 | /** |
1083 | * irq_domain_free_irqs_common - Clear irq_data and free the parent | 1084 | * irq_domain_free_irqs_common - Clear irq_data and free the parent |
@@ -1275,6 +1276,7 @@ int irq_domain_alloc_irqs_parent(struct irq_domain *domain, | |||
1275 | nr_irqs, arg); | 1276 | nr_irqs, arg); |
1276 | return -ENOSYS; | 1277 | return -ENOSYS; |
1277 | } | 1278 | } |
1279 | EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent); | ||
1278 | 1280 | ||
1279 | /** | 1281 | /** |
1280 | * irq_domain_free_irqs_parent - Free interrupts from parent domain | 1282 | * irq_domain_free_irqs_parent - Free interrupts from parent domain |
@@ -1292,6 +1294,7 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain, | |||
1292 | irq_domain_free_irqs_recursive(domain->parent, irq_base, | 1294 | irq_domain_free_irqs_recursive(domain->parent, irq_base, |
1293 | nr_irqs); | 1295 | nr_irqs); |
1294 | } | 1296 | } |
1297 | EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); | ||
1295 | 1298 | ||
1296 | /** | 1299 | /** |
1297 | * irq_domain_activate_irq - Call domain_ops->activate recursively to activate | 1300 | * irq_domain_activate_irq - Call domain_ops->activate recursively to activate |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 841187239adc..3ddd2297ee95 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -144,13 +144,11 @@ int irq_can_set_affinity(unsigned int irq) | |||
144 | */ | 144 | */ |
145 | void irq_set_thread_affinity(struct irq_desc *desc) | 145 | void irq_set_thread_affinity(struct irq_desc *desc) |
146 | { | 146 | { |
147 | struct irqaction *action = desc->action; | 147 | struct irqaction *action; |
148 | 148 | ||
149 | while (action) { | 149 | for_each_action_of_desc(desc, action) |
150 | if (action->thread) | 150 | if (action->thread) |
151 | set_bit(IRQTF_AFFINITY, &action->thread_flags); | 151 | set_bit(IRQTF_AFFINITY, &action->thread_flags); |
152 | action = action->next; | ||
153 | } | ||
154 | } | 152 | } |
155 | 153 | ||
156 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 154 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
@@ -994,7 +992,7 @@ void irq_wake_thread(unsigned int irq, void *dev_id) | |||
994 | return; | 992 | return; |
995 | 993 | ||
996 | raw_spin_lock_irqsave(&desc->lock, flags); | 994 | raw_spin_lock_irqsave(&desc->lock, flags); |
997 | for (action = desc->action; action; action = action->next) { | 995 | for_each_action_of_desc(desc, action) { |
998 | if (action->dev_id == dev_id) { | 996 | if (action->dev_id == dev_id) { |
999 | if (action->thread) | 997 | if (action->thread) |
1000 | __irq_wake_thread(desc, action); | 998 | __irq_wake_thread(desc, action); |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index a2c02fd5d6d0..4e1b94726818 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -291,7 +291,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action) | |||
291 | int ret = 1; | 291 | int ret = 1; |
292 | 292 | ||
293 | raw_spin_lock_irqsave(&desc->lock, flags); | 293 | raw_spin_lock_irqsave(&desc->lock, flags); |
294 | for (action = desc->action ; action; action = action->next) { | 294 | for_each_action_of_desc(desc, action) { |
295 | if ((action != new_action) && action->name && | 295 | if ((action != new_action) && action->name && |
296 | !strcmp(new_action->name, action->name)) { | 296 | !strcmp(new_action->name, action->name)) { |
297 | ret = 0; | 297 | ret = 0; |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 32144175458d..5707f97a3e6a 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -211,14 +211,12 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) | |||
211 | * desc->lock here. See synchronize_irq(). | 211 | * desc->lock here. See synchronize_irq(). |
212 | */ | 212 | */ |
213 | raw_spin_lock_irqsave(&desc->lock, flags); | 213 | raw_spin_lock_irqsave(&desc->lock, flags); |
214 | action = desc->action; | 214 | for_each_action_of_desc(desc, action) { |
215 | while (action) { | ||
216 | printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler); | 215 | printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler); |
217 | if (action->thread_fn) | 216 | if (action->thread_fn) |
218 | printk(KERN_CONT " threaded [<%p>] %pf", | 217 | printk(KERN_CONT " threaded [<%p>] %pf", |
219 | action->thread_fn, action->thread_fn); | 218 | action->thread_fn, action->thread_fn); |
220 | printk(KERN_CONT "\n"); | 219 | printk(KERN_CONT "\n"); |
221 | action = action->next; | ||
222 | } | 220 | } |
223 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 221 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
224 | } | 222 | } |
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d2988d047d66..65ae0e5c35da 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c | |||
@@ -932,12 +932,14 @@ rcu_torture_writer(void *arg) | |||
932 | int nsynctypes = 0; | 932 | int nsynctypes = 0; |
933 | 933 | ||
934 | VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); | 934 | VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); |
935 | pr_alert("%s" TORTURE_FLAG | 935 | if (!can_expedite) { |
936 | " Grace periods expedited from boot/sysfs for %s,\n", | 936 | pr_alert("%s" TORTURE_FLAG |
937 | torture_type, cur_ops->name); | 937 | " Grace periods expedited from boot/sysfs for %s,\n", |
938 | pr_alert("%s" TORTURE_FLAG | 938 | torture_type, cur_ops->name); |
939 | " Testing of dynamic grace-period expediting diabled.\n", | 939 | pr_alert("%s" TORTURE_FLAG |
940 | torture_type); | 940 | " Disabled dynamic grace-period expediting.\n", |
941 | torture_type); | ||
942 | } | ||
941 | 943 | ||
942 | /* Initialize synctype[] array. If none set, take default. */ | 944 | /* Initialize synctype[] array. If none set, take default. */ |
943 | if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1) | 945 | if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1) |
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index e492a5253e0f..196f0302e2f4 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h | |||
@@ -23,7 +23,7 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/kthread.h> | 25 | #include <linux/kthread.h> |
26 | #include <linux/module.h> | 26 | #include <linux/init.h> |
27 | #include <linux/debugfs.h> | 27 | #include <linux/debugfs.h> |
28 | #include <linux/seq_file.h> | 28 | #include <linux/seq_file.h> |
29 | 29 | ||
@@ -122,18 +122,7 @@ free_out: | |||
122 | debugfs_remove_recursive(rcudir); | 122 | debugfs_remove_recursive(rcudir); |
123 | return 1; | 123 | return 1; |
124 | } | 124 | } |
125 | 125 | device_initcall(rcutiny_trace_init); | |
126 | static void __exit rcutiny_trace_cleanup(void) | ||
127 | { | ||
128 | debugfs_remove_recursive(rcudir); | ||
129 | } | ||
130 | |||
131 | module_init(rcutiny_trace_init); | ||
132 | module_exit(rcutiny_trace_cleanup); | ||
133 | |||
134 | MODULE_AUTHOR("Paul E. McKenney"); | ||
135 | MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); | ||
136 | MODULE_LICENSE("GPL"); | ||
137 | 126 | ||
138 | static void check_cpu_stall(struct rcu_ctrlblk *rcp) | 127 | static void check_cpu_stall(struct rcu_ctrlblk *rcp) |
139 | { | 128 | { |
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9fd5b628a88d..9a535a86e732 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -108,7 +108,6 @@ RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); | |||
108 | RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); | 108 | RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); |
109 | 109 | ||
110 | static struct rcu_state *const rcu_state_p; | 110 | static struct rcu_state *const rcu_state_p; |
111 | static struct rcu_data __percpu *const rcu_data_p; | ||
112 | LIST_HEAD(rcu_struct_flavors); | 111 | LIST_HEAD(rcu_struct_flavors); |
113 | 112 | ||
114 | /* Dump rcu_node combining tree at boot to verify correct setup. */ | 113 | /* Dump rcu_node combining tree at boot to verify correct setup. */ |
@@ -1083,13 +1082,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, | |||
1083 | rcu_sysidle_check_cpu(rdp, isidle, maxj); | 1082 | rcu_sysidle_check_cpu(rdp, isidle, maxj); |
1084 | if ((rdp->dynticks_snap & 0x1) == 0) { | 1083 | if ((rdp->dynticks_snap & 0x1) == 0) { |
1085 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); | 1084 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); |
1086 | return 1; | ||
1087 | } else { | ||
1088 | if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, | 1085 | if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, |
1089 | rdp->mynode->gpnum)) | 1086 | rdp->mynode->gpnum)) |
1090 | WRITE_ONCE(rdp->gpwrap, true); | 1087 | WRITE_ONCE(rdp->gpwrap, true); |
1091 | return 0; | 1088 | return 1; |
1092 | } | 1089 | } |
1090 | return 0; | ||
1093 | } | 1091 | } |
1094 | 1092 | ||
1095 | /* | 1093 | /* |
@@ -1173,15 +1171,16 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, | |||
1173 | smp_mb(); /* ->cond_resched_completed before *rcrmp. */ | 1171 | smp_mb(); /* ->cond_resched_completed before *rcrmp. */ |
1174 | WRITE_ONCE(*rcrmp, | 1172 | WRITE_ONCE(*rcrmp, |
1175 | READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask); | 1173 | READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask); |
1176 | resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ | ||
1177 | rdp->rsp->jiffies_resched += 5; /* Enable beating. */ | ||
1178 | } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { | ||
1179 | /* Time to beat on that CPU again! */ | ||
1180 | resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ | ||
1181 | rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ | ||
1182 | } | 1174 | } |
1175 | rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ | ||
1183 | } | 1176 | } |
1184 | 1177 | ||
1178 | /* And if it has been a really long time, kick the CPU as well. */ | ||
1179 | if (ULONG_CMP_GE(jiffies, | ||
1180 | rdp->rsp->gp_start + 2 * jiffies_till_sched_qs) || | ||
1181 | ULONG_CMP_GE(jiffies, rdp->rsp->gp_start + jiffies_till_sched_qs)) | ||
1182 | resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ | ||
1183 | |||
1185 | return 0; | 1184 | return 0; |
1186 | } | 1185 | } |
1187 | 1186 | ||
@@ -1246,7 +1245,7 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp) | |||
1246 | if (rnp->qsmask & (1UL << cpu)) | 1245 | if (rnp->qsmask & (1UL << cpu)) |
1247 | dump_cpu_task(rnp->grplo + cpu); | 1246 | dump_cpu_task(rnp->grplo + cpu); |
1248 | } | 1247 | } |
1249 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1248 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1250 | } | 1249 | } |
1251 | } | 1250 | } |
1252 | 1251 | ||
@@ -1266,12 +1265,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) | |||
1266 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 1265 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
1267 | delta = jiffies - READ_ONCE(rsp->jiffies_stall); | 1266 | delta = jiffies - READ_ONCE(rsp->jiffies_stall); |
1268 | if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { | 1267 | if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { |
1269 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1268 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1270 | return; | 1269 | return; |
1271 | } | 1270 | } |
1272 | WRITE_ONCE(rsp->jiffies_stall, | 1271 | WRITE_ONCE(rsp->jiffies_stall, |
1273 | jiffies + 3 * rcu_jiffies_till_stall_check() + 3); | 1272 | jiffies + 3 * rcu_jiffies_till_stall_check() + 3); |
1274 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1273 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1275 | 1274 | ||
1276 | /* | 1275 | /* |
1277 | * OK, time to rat on our buddy... | 1276 | * OK, time to rat on our buddy... |
@@ -1292,7 +1291,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) | |||
1292 | ndetected++; | 1291 | ndetected++; |
1293 | } | 1292 | } |
1294 | } | 1293 | } |
1295 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1294 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1296 | } | 1295 | } |
1297 | 1296 | ||
1298 | print_cpu_stall_info_end(); | 1297 | print_cpu_stall_info_end(); |
@@ -1357,7 +1356,7 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
1357 | if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) | 1356 | if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) |
1358 | WRITE_ONCE(rsp->jiffies_stall, | 1357 | WRITE_ONCE(rsp->jiffies_stall, |
1359 | jiffies + 3 * rcu_jiffies_till_stall_check() + 3); | 1358 | jiffies + 3 * rcu_jiffies_till_stall_check() + 3); |
1360 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1359 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1361 | 1360 | ||
1362 | /* | 1361 | /* |
1363 | * Attempt to revive the RCU machinery by forcing a context switch. | 1362 | * Attempt to revive the RCU machinery by forcing a context switch. |
@@ -1595,7 +1594,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, | |||
1595 | } | 1594 | } |
1596 | unlock_out: | 1595 | unlock_out: |
1597 | if (rnp != rnp_root) | 1596 | if (rnp != rnp_root) |
1598 | raw_spin_unlock(&rnp_root->lock); | 1597 | raw_spin_unlock_rcu_node(rnp_root); |
1599 | out: | 1598 | out: |
1600 | if (c_out != NULL) | 1599 | if (c_out != NULL) |
1601 | *c_out = c; | 1600 | *c_out = c; |
@@ -1814,7 +1813,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1814 | return; | 1813 | return; |
1815 | } | 1814 | } |
1816 | needwake = __note_gp_changes(rsp, rnp, rdp); | 1815 | needwake = __note_gp_changes(rsp, rnp, rdp); |
1817 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1816 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1818 | if (needwake) | 1817 | if (needwake) |
1819 | rcu_gp_kthread_wake(rsp); | 1818 | rcu_gp_kthread_wake(rsp); |
1820 | } | 1819 | } |
@@ -1839,7 +1838,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) | |||
1839 | raw_spin_lock_irq_rcu_node(rnp); | 1838 | raw_spin_lock_irq_rcu_node(rnp); |
1840 | if (!READ_ONCE(rsp->gp_flags)) { | 1839 | if (!READ_ONCE(rsp->gp_flags)) { |
1841 | /* Spurious wakeup, tell caller to go back to sleep. */ | 1840 | /* Spurious wakeup, tell caller to go back to sleep. */ |
1842 | raw_spin_unlock_irq(&rnp->lock); | 1841 | raw_spin_unlock_irq_rcu_node(rnp); |
1843 | return false; | 1842 | return false; |
1844 | } | 1843 | } |
1845 | WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */ | 1844 | WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */ |
@@ -1849,7 +1848,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) | |||
1849 | * Grace period already in progress, don't start another. | 1848 | * Grace period already in progress, don't start another. |
1850 | * Not supposed to be able to happen. | 1849 | * Not supposed to be able to happen. |
1851 | */ | 1850 | */ |
1852 | raw_spin_unlock_irq(&rnp->lock); | 1851 | raw_spin_unlock_irq_rcu_node(rnp); |
1853 | return false; | 1852 | return false; |
1854 | } | 1853 | } |
1855 | 1854 | ||
@@ -1858,7 +1857,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) | |||
1858 | /* Record GP times before starting GP, hence smp_store_release(). */ | 1857 | /* Record GP times before starting GP, hence smp_store_release(). */ |
1859 | smp_store_release(&rsp->gpnum, rsp->gpnum + 1); | 1858 | smp_store_release(&rsp->gpnum, rsp->gpnum + 1); |
1860 | trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); | 1859 | trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); |
1861 | raw_spin_unlock_irq(&rnp->lock); | 1860 | raw_spin_unlock_irq_rcu_node(rnp); |
1862 | 1861 | ||
1863 | /* | 1862 | /* |
1864 | * Apply per-leaf buffered online and offline operations to the | 1863 | * Apply per-leaf buffered online and offline operations to the |
@@ -1872,7 +1871,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) | |||
1872 | if (rnp->qsmaskinit == rnp->qsmaskinitnext && | 1871 | if (rnp->qsmaskinit == rnp->qsmaskinitnext && |
1873 | !rnp->wait_blkd_tasks) { | 1872 | !rnp->wait_blkd_tasks) { |
1874 | /* Nothing to do on this leaf rcu_node structure. */ | 1873 | /* Nothing to do on this leaf rcu_node structure. */ |
1875 | raw_spin_unlock_irq(&rnp->lock); | 1874 | raw_spin_unlock_irq_rcu_node(rnp); |
1876 | continue; | 1875 | continue; |
1877 | } | 1876 | } |
1878 | 1877 | ||
@@ -1906,7 +1905,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) | |||
1906 | rcu_cleanup_dead_rnp(rnp); | 1905 | rcu_cleanup_dead_rnp(rnp); |
1907 | } | 1906 | } |
1908 | 1907 | ||
1909 | raw_spin_unlock_irq(&rnp->lock); | 1908 | raw_spin_unlock_irq_rcu_node(rnp); |
1910 | } | 1909 | } |
1911 | 1910 | ||
1912 | /* | 1911 | /* |
@@ -1937,7 +1936,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) | |||
1937 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | 1936 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, |
1938 | rnp->level, rnp->grplo, | 1937 | rnp->level, rnp->grplo, |
1939 | rnp->grphi, rnp->qsmask); | 1938 | rnp->grphi, rnp->qsmask); |
1940 | raw_spin_unlock_irq(&rnp->lock); | 1939 | raw_spin_unlock_irq_rcu_node(rnp); |
1941 | cond_resched_rcu_qs(); | 1940 | cond_resched_rcu_qs(); |
1942 | WRITE_ONCE(rsp->gp_activity, jiffies); | 1941 | WRITE_ONCE(rsp->gp_activity, jiffies); |
1943 | } | 1942 | } |
@@ -1995,7 +1994,7 @@ static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time) | |||
1995 | raw_spin_lock_irq_rcu_node(rnp); | 1994 | raw_spin_lock_irq_rcu_node(rnp); |
1996 | WRITE_ONCE(rsp->gp_flags, | 1995 | WRITE_ONCE(rsp->gp_flags, |
1997 | READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS); | 1996 | READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS); |
1998 | raw_spin_unlock_irq(&rnp->lock); | 1997 | raw_spin_unlock_irq_rcu_node(rnp); |
1999 | } | 1998 | } |
2000 | } | 1999 | } |
2001 | 2000 | ||
@@ -2025,7 +2024,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |||
2025 | * safe for us to drop the lock in order to mark the grace | 2024 | * safe for us to drop the lock in order to mark the grace |
2026 | * period as completed in all of the rcu_node structures. | 2025 | * period as completed in all of the rcu_node structures. |
2027 | */ | 2026 | */ |
2028 | raw_spin_unlock_irq(&rnp->lock); | 2027 | raw_spin_unlock_irq_rcu_node(rnp); |
2029 | 2028 | ||
2030 | /* | 2029 | /* |
2031 | * Propagate new ->completed value to rcu_node structures so | 2030 | * Propagate new ->completed value to rcu_node structures so |
@@ -2047,7 +2046,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |||
2047 | /* smp_mb() provided by prior unlock-lock pair. */ | 2046 | /* smp_mb() provided by prior unlock-lock pair. */ |
2048 | nocb += rcu_future_gp_cleanup(rsp, rnp); | 2047 | nocb += rcu_future_gp_cleanup(rsp, rnp); |
2049 | sq = rcu_nocb_gp_get(rnp); | 2048 | sq = rcu_nocb_gp_get(rnp); |
2050 | raw_spin_unlock_irq(&rnp->lock); | 2049 | raw_spin_unlock_irq_rcu_node(rnp); |
2051 | rcu_nocb_gp_cleanup(sq); | 2050 | rcu_nocb_gp_cleanup(sq); |
2052 | cond_resched_rcu_qs(); | 2051 | cond_resched_rcu_qs(); |
2053 | WRITE_ONCE(rsp->gp_activity, jiffies); | 2052 | WRITE_ONCE(rsp->gp_activity, jiffies); |
@@ -2070,7 +2069,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |||
2070 | READ_ONCE(rsp->gpnum), | 2069 | READ_ONCE(rsp->gpnum), |
2071 | TPS("newreq")); | 2070 | TPS("newreq")); |
2072 | } | 2071 | } |
2073 | raw_spin_unlock_irq(&rnp->lock); | 2072 | raw_spin_unlock_irq_rcu_node(rnp); |
2074 | } | 2073 | } |
2075 | 2074 | ||
2076 | /* | 2075 | /* |
@@ -2236,18 +2235,20 @@ static bool rcu_start_gp(struct rcu_state *rsp) | |||
2236 | } | 2235 | } |
2237 | 2236 | ||
2238 | /* | 2237 | /* |
2239 | * Report a full set of quiescent states to the specified rcu_state | 2238 | * Report a full set of quiescent states to the specified rcu_state data |
2240 | * data structure. This involves cleaning up after the prior grace | 2239 | * structure. Invoke rcu_gp_kthread_wake() to awaken the grace-period |
2241 | * period and letting rcu_start_gp() start up the next grace period | 2240 | * kthread if another grace period is required. Whether we wake |
2242 | * if one is needed. Note that the caller must hold rnp->lock, which | 2241 | * the grace-period kthread or it awakens itself for the next round |
2243 | * is released before return. | 2242 | * of quiescent-state forcing, that kthread will clean up after the |
2243 | * just-completed grace period. Note that the caller must hold rnp->lock, | ||
2244 | * which is released before return. | ||
2244 | */ | 2245 | */ |
2245 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | 2246 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) |
2246 | __releases(rcu_get_root(rsp)->lock) | 2247 | __releases(rcu_get_root(rsp)->lock) |
2247 | { | 2248 | { |
2248 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 2249 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
2249 | WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); | 2250 | WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); |
2250 | raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); | 2251 | raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); |
2251 | swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */ | 2252 | swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */ |
2252 | } | 2253 | } |
2253 | 2254 | ||
@@ -2277,7 +2278,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
2277 | * Our bit has already been cleared, or the | 2278 | * Our bit has already been cleared, or the |
2278 | * relevant grace period is already over, so done. | 2279 | * relevant grace period is already over, so done. |
2279 | */ | 2280 | */ |
2280 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2281 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
2281 | return; | 2282 | return; |
2282 | } | 2283 | } |
2283 | WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ | 2284 | WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ |
@@ -2289,7 +2290,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
2289 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | 2290 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { |
2290 | 2291 | ||
2291 | /* Other bits still set at this level, so done. */ | 2292 | /* Other bits still set at this level, so done. */ |
2292 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2293 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
2293 | return; | 2294 | return; |
2294 | } | 2295 | } |
2295 | mask = rnp->grpmask; | 2296 | mask = rnp->grpmask; |
@@ -2299,7 +2300,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
2299 | 2300 | ||
2300 | break; | 2301 | break; |
2301 | } | 2302 | } |
2302 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2303 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
2303 | rnp_c = rnp; | 2304 | rnp_c = rnp; |
2304 | rnp = rnp->parent; | 2305 | rnp = rnp->parent; |
2305 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 2306 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
@@ -2331,7 +2332,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp, | |||
2331 | 2332 | ||
2332 | if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || | 2333 | if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || |
2333 | rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | 2334 | rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { |
2334 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2335 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
2335 | return; /* Still need more quiescent states! */ | 2336 | return; /* Still need more quiescent states! */ |
2336 | } | 2337 | } |
2337 | 2338 | ||
@@ -2348,19 +2349,14 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp, | |||
2348 | /* Report up the rest of the hierarchy, tracking current ->gpnum. */ | 2349 | /* Report up the rest of the hierarchy, tracking current ->gpnum. */ |
2349 | gps = rnp->gpnum; | 2350 | gps = rnp->gpnum; |
2350 | mask = rnp->grpmask; | 2351 | mask = rnp->grpmask; |
2351 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 2352 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ |
2352 | raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */ | 2353 | raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */ |
2353 | rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags); | 2354 | rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags); |
2354 | } | 2355 | } |
2355 | 2356 | ||
2356 | /* | 2357 | /* |
2357 | * Record a quiescent state for the specified CPU to that CPU's rcu_data | 2358 | * Record a quiescent state for the specified CPU to that CPU's rcu_data |
2358 | * structure. This must be either called from the specified CPU, or | 2359 | * structure. This must be called from the specified CPU. |
2359 | * called when the specified CPU is known to be offline (and when it is | ||
2360 | * also known that no other CPU is concurrently trying to help the offline | ||
2361 | * CPU). The lastcomp argument is used to make sure we are still in the | ||
2362 | * grace period of interest. We don't want to end the current grace period | ||
2363 | * based on quiescent states detected in an earlier grace period! | ||
2364 | */ | 2360 | */ |
2365 | static void | 2361 | static void |
2366 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) | 2362 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) |
@@ -2385,14 +2381,14 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) | |||
2385 | */ | 2381 | */ |
2386 | rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */ | 2382 | rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */ |
2387 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); | 2383 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); |
2388 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2384 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
2389 | return; | 2385 | return; |
2390 | } | 2386 | } |
2391 | mask = rdp->grpmask; | 2387 | mask = rdp->grpmask; |
2392 | if ((rnp->qsmask & mask) == 0) { | 2388 | if ((rnp->qsmask & mask) == 0) { |
2393 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2389 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
2394 | } else { | 2390 | } else { |
2395 | rdp->core_needs_qs = 0; | 2391 | rdp->core_needs_qs = false; |
2396 | 2392 | ||
2397 | /* | 2393 | /* |
2398 | * This GP can't end until cpu checks in, so all of our | 2394 | * This GP can't end until cpu checks in, so all of our |
@@ -2601,36 +2597,15 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) | |||
2601 | rnp->qsmaskinit &= ~mask; | 2597 | rnp->qsmaskinit &= ~mask; |
2602 | rnp->qsmask &= ~mask; | 2598 | rnp->qsmask &= ~mask; |
2603 | if (rnp->qsmaskinit) { | 2599 | if (rnp->qsmaskinit) { |
2604 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 2600 | raw_spin_unlock_rcu_node(rnp); |
2601 | /* irqs remain disabled. */ | ||
2605 | return; | 2602 | return; |
2606 | } | 2603 | } |
2607 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 2604 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ |
2608 | } | 2605 | } |
2609 | } | 2606 | } |
2610 | 2607 | ||
2611 | /* | 2608 | /* |
2612 | * The CPU is exiting the idle loop into the arch_cpu_idle_dead() | ||
2613 | * function. We now remove it from the rcu_node tree's ->qsmaskinit | ||
2614 | * bit masks. | ||
2615 | */ | ||
2616 | static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) | ||
2617 | { | ||
2618 | unsigned long flags; | ||
2619 | unsigned long mask; | ||
2620 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
2621 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ | ||
2622 | |||
2623 | if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) | ||
2624 | return; | ||
2625 | |||
2626 | /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ | ||
2627 | mask = rdp->grpmask; | ||
2628 | raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ | ||
2629 | rnp->qsmaskinitnext &= ~mask; | ||
2630 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
2631 | } | ||
2632 | |||
2633 | /* | ||
2634 | * The CPU has been completely removed, and some other CPU is reporting | 2609 | * The CPU has been completely removed, and some other CPU is reporting |
2635 | * this fact from process context. Do the remainder of the cleanup, | 2610 | * this fact from process context. Do the remainder of the cleanup, |
2636 | * including orphaning the outgoing CPU's RCU callbacks, and also | 2611 | * including orphaning the outgoing CPU's RCU callbacks, and also |
@@ -2861,7 +2836,7 @@ static void force_qs_rnp(struct rcu_state *rsp, | |||
2861 | rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags); | 2836 | rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags); |
2862 | } else { | 2837 | } else { |
2863 | /* Nothing to do here, so just drop the lock. */ | 2838 | /* Nothing to do here, so just drop the lock. */ |
2864 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2839 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
2865 | } | 2840 | } |
2866 | } | 2841 | } |
2867 | } | 2842 | } |
@@ -2897,11 +2872,11 @@ static void force_quiescent_state(struct rcu_state *rsp) | |||
2897 | raw_spin_unlock(&rnp_old->fqslock); | 2872 | raw_spin_unlock(&rnp_old->fqslock); |
2898 | if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { | 2873 | if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { |
2899 | rsp->n_force_qs_lh++; | 2874 | rsp->n_force_qs_lh++; |
2900 | raw_spin_unlock_irqrestore(&rnp_old->lock, flags); | 2875 | raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); |
2901 | return; /* Someone beat us to it. */ | 2876 | return; /* Someone beat us to it. */ |
2902 | } | 2877 | } |
2903 | WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); | 2878 | WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); |
2904 | raw_spin_unlock_irqrestore(&rnp_old->lock, flags); | 2879 | raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); |
2905 | swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */ | 2880 | swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */ |
2906 | } | 2881 | } |
2907 | 2882 | ||
@@ -2927,7 +2902,7 @@ __rcu_process_callbacks(struct rcu_state *rsp) | |||
2927 | if (cpu_needs_another_gp(rsp, rdp)) { | 2902 | if (cpu_needs_another_gp(rsp, rdp)) { |
2928 | raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */ | 2903 | raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */ |
2929 | needwake = rcu_start_gp(rsp); | 2904 | needwake = rcu_start_gp(rsp); |
2930 | raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); | 2905 | raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); |
2931 | if (needwake) | 2906 | if (needwake) |
2932 | rcu_gp_kthread_wake(rsp); | 2907 | rcu_gp_kthread_wake(rsp); |
2933 | } else { | 2908 | } else { |
@@ -3018,7 +2993,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, | |||
3018 | 2993 | ||
3019 | raw_spin_lock_rcu_node(rnp_root); | 2994 | raw_spin_lock_rcu_node(rnp_root); |
3020 | needwake = rcu_start_gp(rsp); | 2995 | needwake = rcu_start_gp(rsp); |
3021 | raw_spin_unlock(&rnp_root->lock); | 2996 | raw_spin_unlock_rcu_node(rnp_root); |
3022 | if (needwake) | 2997 | if (needwake) |
3023 | rcu_gp_kthread_wake(rsp); | 2998 | rcu_gp_kthread_wake(rsp); |
3024 | } else { | 2999 | } else { |
@@ -3438,14 +3413,14 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp) | |||
3438 | rcu_for_each_leaf_node(rsp, rnp) { | 3413 | rcu_for_each_leaf_node(rsp, rnp) { |
3439 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 3414 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
3440 | if (rnp->expmaskinit == rnp->expmaskinitnext) { | 3415 | if (rnp->expmaskinit == rnp->expmaskinitnext) { |
3441 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3416 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3442 | continue; /* No new CPUs, nothing to do. */ | 3417 | continue; /* No new CPUs, nothing to do. */ |
3443 | } | 3418 | } |
3444 | 3419 | ||
3445 | /* Update this node's mask, track old value for propagation. */ | 3420 | /* Update this node's mask, track old value for propagation. */ |
3446 | oldmask = rnp->expmaskinit; | 3421 | oldmask = rnp->expmaskinit; |
3447 | rnp->expmaskinit = rnp->expmaskinitnext; | 3422 | rnp->expmaskinit = rnp->expmaskinitnext; |
3448 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3423 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3449 | 3424 | ||
3450 | /* If was already nonzero, nothing to propagate. */ | 3425 | /* If was already nonzero, nothing to propagate. */ |
3451 | if (oldmask) | 3426 | if (oldmask) |
@@ -3460,7 +3435,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp) | |||
3460 | if (rnp_up->expmaskinit) | 3435 | if (rnp_up->expmaskinit) |
3461 | done = true; | 3436 | done = true; |
3462 | rnp_up->expmaskinit |= mask; | 3437 | rnp_up->expmaskinit |= mask; |
3463 | raw_spin_unlock_irqrestore(&rnp_up->lock, flags); | 3438 | raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags); |
3464 | if (done) | 3439 | if (done) |
3465 | break; | 3440 | break; |
3466 | mask = rnp_up->grpmask; | 3441 | mask = rnp_up->grpmask; |
@@ -3483,7 +3458,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp) | |||
3483 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 3458 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
3484 | WARN_ON_ONCE(rnp->expmask); | 3459 | WARN_ON_ONCE(rnp->expmask); |
3485 | rnp->expmask = rnp->expmaskinit; | 3460 | rnp->expmask = rnp->expmaskinit; |
3486 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3461 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3487 | } | 3462 | } |
3488 | } | 3463 | } |
3489 | 3464 | ||
@@ -3524,11 +3499,11 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | |||
3524 | if (!rnp->expmask) | 3499 | if (!rnp->expmask) |
3525 | rcu_initiate_boost(rnp, flags); | 3500 | rcu_initiate_boost(rnp, flags); |
3526 | else | 3501 | else |
3527 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3502 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3528 | break; | 3503 | break; |
3529 | } | 3504 | } |
3530 | if (rnp->parent == NULL) { | 3505 | if (rnp->parent == NULL) { |
3531 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3506 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3532 | if (wake) { | 3507 | if (wake) { |
3533 | smp_mb(); /* EGP done before wake_up(). */ | 3508 | smp_mb(); /* EGP done before wake_up(). */ |
3534 | swake_up(&rsp->expedited_wq); | 3509 | swake_up(&rsp->expedited_wq); |
@@ -3536,7 +3511,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | |||
3536 | break; | 3511 | break; |
3537 | } | 3512 | } |
3538 | mask = rnp->grpmask; | 3513 | mask = rnp->grpmask; |
3539 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 3514 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */ |
3540 | rnp = rnp->parent; | 3515 | rnp = rnp->parent; |
3541 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled */ | 3516 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled */ |
3542 | WARN_ON_ONCE(!(rnp->expmask & mask)); | 3517 | WARN_ON_ONCE(!(rnp->expmask & mask)); |
@@ -3571,7 +3546,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp, | |||
3571 | 3546 | ||
3572 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 3547 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
3573 | if (!(rnp->expmask & mask)) { | 3548 | if (!(rnp->expmask & mask)) { |
3574 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3549 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3575 | return; | 3550 | return; |
3576 | } | 3551 | } |
3577 | rnp->expmask &= ~mask; | 3552 | rnp->expmask &= ~mask; |
@@ -3732,7 +3707,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, | |||
3732 | */ | 3707 | */ |
3733 | if (rcu_preempt_has_tasks(rnp)) | 3708 | if (rcu_preempt_has_tasks(rnp)) |
3734 | rnp->exp_tasks = rnp->blkd_tasks.next; | 3709 | rnp->exp_tasks = rnp->blkd_tasks.next; |
3735 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3710 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3736 | 3711 | ||
3737 | /* IPI the remaining CPUs for expedited quiescent state. */ | 3712 | /* IPI the remaining CPUs for expedited quiescent state. */ |
3738 | mask = 1; | 3713 | mask = 1; |
@@ -3749,7 +3724,7 @@ retry_ipi: | |||
3749 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 3724 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
3750 | if (cpu_online(cpu) && | 3725 | if (cpu_online(cpu) && |
3751 | (rnp->expmask & mask)) { | 3726 | (rnp->expmask & mask)) { |
3752 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3727 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3753 | schedule_timeout_uninterruptible(1); | 3728 | schedule_timeout_uninterruptible(1); |
3754 | if (cpu_online(cpu) && | 3729 | if (cpu_online(cpu) && |
3755 | (rnp->expmask & mask)) | 3730 | (rnp->expmask & mask)) |
@@ -3758,7 +3733,7 @@ retry_ipi: | |||
3758 | } | 3733 | } |
3759 | if (!(rnp->expmask & mask)) | 3734 | if (!(rnp->expmask & mask)) |
3760 | mask_ofl_ipi &= ~mask; | 3735 | mask_ofl_ipi &= ~mask; |
3761 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3736 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3762 | } | 3737 | } |
3763 | /* Report quiescent states for those that went offline. */ | 3738 | /* Report quiescent states for those that went offline. */ |
3764 | mask_ofl_test |= mask_ofl_ipi; | 3739 | mask_ofl_test |= mask_ofl_ipi; |
@@ -4165,7 +4140,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) | |||
4165 | return; | 4140 | return; |
4166 | raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */ | 4141 | raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */ |
4167 | rnp->qsmaskinit |= mask; | 4142 | rnp->qsmaskinit |= mask; |
4168 | raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ | 4143 | raw_spin_unlock_rcu_node(rnp); /* Interrupts remain disabled. */ |
4169 | } | 4144 | } |
4170 | } | 4145 | } |
4171 | 4146 | ||
@@ -4189,7 +4164,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
4189 | rdp->rsp = rsp; | 4164 | rdp->rsp = rsp; |
4190 | mutex_init(&rdp->exp_funnel_mutex); | 4165 | mutex_init(&rdp->exp_funnel_mutex); |
4191 | rcu_boot_init_nocb_percpu_data(rdp); | 4166 | rcu_boot_init_nocb_percpu_data(rdp); |
4192 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 4167 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
4193 | } | 4168 | } |
4194 | 4169 | ||
4195 | /* | 4170 | /* |
@@ -4217,7 +4192,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
4217 | rcu_sysidle_init_percpu_data(rdp->dynticks); | 4192 | rcu_sysidle_init_percpu_data(rdp->dynticks); |
4218 | atomic_set(&rdp->dynticks->dynticks, | 4193 | atomic_set(&rdp->dynticks->dynticks, |
4219 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); | 4194 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); |
4220 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 4195 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ |
4221 | 4196 | ||
4222 | /* | 4197 | /* |
4223 | * Add CPU to leaf rcu_node pending-online bitmask. Any needed | 4198 | * Add CPU to leaf rcu_node pending-online bitmask. Any needed |
@@ -4238,7 +4213,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
4238 | rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); | 4213 | rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); |
4239 | rdp->core_needs_qs = false; | 4214 | rdp->core_needs_qs = false; |
4240 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); | 4215 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); |
4241 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 4216 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
4242 | } | 4217 | } |
4243 | 4218 | ||
4244 | static void rcu_prepare_cpu(int cpu) | 4219 | static void rcu_prepare_cpu(int cpu) |
@@ -4249,6 +4224,46 @@ static void rcu_prepare_cpu(int cpu) | |||
4249 | rcu_init_percpu_data(cpu, rsp); | 4224 | rcu_init_percpu_data(cpu, rsp); |
4250 | } | 4225 | } |
4251 | 4226 | ||
4227 | #ifdef CONFIG_HOTPLUG_CPU | ||
4228 | /* | ||
4229 | * The CPU is exiting the idle loop into the arch_cpu_idle_dead() | ||
4230 | * function. We now remove it from the rcu_node tree's ->qsmaskinit | ||
4231 | * bit masks. | ||
4232 | * The CPU is exiting the idle loop into the arch_cpu_idle_dead() | ||
4233 | * function. We now remove it from the rcu_node tree's ->qsmaskinit | ||
4234 | * bit masks. | ||
4235 | */ | ||
4236 | static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) | ||
4237 | { | ||
4238 | unsigned long flags; | ||
4239 | unsigned long mask; | ||
4240 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
4241 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ | ||
4242 | |||
4243 | if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) | ||
4244 | return; | ||
4245 | |||
4246 | /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ | ||
4247 | mask = rdp->grpmask; | ||
4248 | raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ | ||
4249 | rnp->qsmaskinitnext &= ~mask; | ||
4250 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
4251 | } | ||
4252 | |||
4253 | void rcu_report_dead(unsigned int cpu) | ||
4254 | { | ||
4255 | struct rcu_state *rsp; | ||
4256 | |||
4257 | /* QS for any half-done expedited RCU-sched GP. */ | ||
4258 | preempt_disable(); | ||
4259 | rcu_report_exp_rdp(&rcu_sched_state, | ||
4260 | this_cpu_ptr(rcu_sched_state.rda), true); | ||
4261 | preempt_enable(); | ||
4262 | for_each_rcu_flavor(rsp) | ||
4263 | rcu_cleanup_dying_idle_cpu(cpu, rsp); | ||
4264 | } | ||
4265 | #endif | ||
4266 | |||
4252 | /* | 4267 | /* |
4253 | * Handle CPU online/offline notification events. | 4268 | * Handle CPU online/offline notification events. |
4254 | */ | 4269 | */ |
@@ -4280,17 +4295,6 @@ int rcu_cpu_notify(struct notifier_block *self, | |||
4280 | for_each_rcu_flavor(rsp) | 4295 | for_each_rcu_flavor(rsp) |
4281 | rcu_cleanup_dying_cpu(rsp); | 4296 | rcu_cleanup_dying_cpu(rsp); |
4282 | break; | 4297 | break; |
4283 | case CPU_DYING_IDLE: | ||
4284 | /* QS for any half-done expedited RCU-sched GP. */ | ||
4285 | preempt_disable(); | ||
4286 | rcu_report_exp_rdp(&rcu_sched_state, | ||
4287 | this_cpu_ptr(rcu_sched_state.rda), true); | ||
4288 | preempt_enable(); | ||
4289 | |||
4290 | for_each_rcu_flavor(rsp) { | ||
4291 | rcu_cleanup_dying_idle_cpu(cpu, rsp); | ||
4292 | } | ||
4293 | break; | ||
4294 | case CPU_DEAD: | 4298 | case CPU_DEAD: |
4295 | case CPU_DEAD_FROZEN: | 4299 | case CPU_DEAD_FROZEN: |
4296 | case CPU_UP_CANCELED: | 4300 | case CPU_UP_CANCELED: |
@@ -4360,7 +4364,7 @@ static int __init rcu_spawn_gp_kthread(void) | |||
4360 | sp.sched_priority = kthread_prio; | 4364 | sp.sched_priority = kthread_prio; |
4361 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 4365 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
4362 | } | 4366 | } |
4363 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 4367 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
4364 | wake_up_process(t); | 4368 | wake_up_process(t); |
4365 | } | 4369 | } |
4366 | rcu_spawn_nocb_kthreads(); | 4370 | rcu_spawn_nocb_kthreads(); |
@@ -4451,8 +4455,8 @@ static void __init rcu_init_one(struct rcu_state *rsp) | |||
4451 | cpustride *= levelspread[i]; | 4455 | cpustride *= levelspread[i]; |
4452 | rnp = rsp->level[i]; | 4456 | rnp = rsp->level[i]; |
4453 | for (j = 0; j < levelcnt[i]; j++, rnp++) { | 4457 | for (j = 0; j < levelcnt[i]; j++, rnp++) { |
4454 | raw_spin_lock_init(&rnp->lock); | 4458 | raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock)); |
4455 | lockdep_set_class_and_name(&rnp->lock, | 4459 | lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock), |
4456 | &rcu_node_class[i], buf[i]); | 4460 | &rcu_node_class[i], buf[i]); |
4457 | raw_spin_lock_init(&rnp->fqslock); | 4461 | raw_spin_lock_init(&rnp->fqslock); |
4458 | lockdep_set_class_and_name(&rnp->fqslock, | 4462 | lockdep_set_class_and_name(&rnp->fqslock, |
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index bbd235d0e71f..df668c0f9e64 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
@@ -150,8 +150,9 @@ struct rcu_dynticks { | |||
150 | * Definition for node within the RCU grace-period-detection hierarchy. | 150 | * Definition for node within the RCU grace-period-detection hierarchy. |
151 | */ | 151 | */ |
152 | struct rcu_node { | 152 | struct rcu_node { |
153 | raw_spinlock_t lock; /* Root rcu_node's lock protects some */ | 153 | raw_spinlock_t __private lock; /* Root rcu_node's lock protects */ |
154 | /* rcu_state fields as well as following. */ | 154 | /* some rcu_state fields as well as */ |
155 | /* following. */ | ||
155 | unsigned long gpnum; /* Current grace period for this node. */ | 156 | unsigned long gpnum; /* Current grace period for this node. */ |
156 | /* This will either be equal to or one */ | 157 | /* This will either be equal to or one */ |
157 | /* behind the root rcu_node's gpnum. */ | 158 | /* behind the root rcu_node's gpnum. */ |
@@ -682,7 +683,7 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) | |||
682 | #endif /* #else #ifdef CONFIG_PPC */ | 683 | #endif /* #else #ifdef CONFIG_PPC */ |
683 | 684 | ||
684 | /* | 685 | /* |
685 | * Wrappers for the rcu_node::lock acquire. | 686 | * Wrappers for the rcu_node::lock acquire and release. |
686 | * | 687 | * |
687 | * Because the rcu_nodes form a tree, the tree traversal locking will observe | 688 | * Because the rcu_nodes form a tree, the tree traversal locking will observe |
688 | * different lock values, this in turn means that an UNLOCK of one level | 689 | * different lock values, this in turn means that an UNLOCK of one level |
@@ -691,29 +692,48 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) | |||
691 | * | 692 | * |
692 | * In order to restore full ordering between tree levels, augment the regular | 693 | * In order to restore full ordering between tree levels, augment the regular |
693 | * lock acquire functions with smp_mb__after_unlock_lock(). | 694 | * lock acquire functions with smp_mb__after_unlock_lock(). |
695 | * | ||
696 | * As ->lock of struct rcu_node is a __private field, therefore one should use | ||
697 | * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock. | ||
694 | */ | 698 | */ |
695 | static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp) | 699 | static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp) |
696 | { | 700 | { |
697 | raw_spin_lock(&rnp->lock); | 701 | raw_spin_lock(&ACCESS_PRIVATE(rnp, lock)); |
698 | smp_mb__after_unlock_lock(); | 702 | smp_mb__after_unlock_lock(); |
699 | } | 703 | } |
700 | 704 | ||
705 | static inline void raw_spin_unlock_rcu_node(struct rcu_node *rnp) | ||
706 | { | ||
707 | raw_spin_unlock(&ACCESS_PRIVATE(rnp, lock)); | ||
708 | } | ||
709 | |||
701 | static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp) | 710 | static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp) |
702 | { | 711 | { |
703 | raw_spin_lock_irq(&rnp->lock); | 712 | raw_spin_lock_irq(&ACCESS_PRIVATE(rnp, lock)); |
704 | smp_mb__after_unlock_lock(); | 713 | smp_mb__after_unlock_lock(); |
705 | } | 714 | } |
706 | 715 | ||
707 | #define raw_spin_lock_irqsave_rcu_node(rnp, flags) \ | 716 | static inline void raw_spin_unlock_irq_rcu_node(struct rcu_node *rnp) |
708 | do { \ | 717 | { |
709 | typecheck(unsigned long, flags); \ | 718 | raw_spin_unlock_irq(&ACCESS_PRIVATE(rnp, lock)); |
710 | raw_spin_lock_irqsave(&(rnp)->lock, flags); \ | 719 | } |
711 | smp_mb__after_unlock_lock(); \ | 720 | |
721 | #define raw_spin_lock_irqsave_rcu_node(rnp, flags) \ | ||
722 | do { \ | ||
723 | typecheck(unsigned long, flags); \ | ||
724 | raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags); \ | ||
725 | smp_mb__after_unlock_lock(); \ | ||
726 | } while (0) | ||
727 | |||
728 | #define raw_spin_unlock_irqrestore_rcu_node(rnp, flags) \ | ||
729 | do { \ | ||
730 | typecheck(unsigned long, flags); \ | ||
731 | raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags); \ | ||
712 | } while (0) | 732 | } while (0) |
713 | 733 | ||
714 | static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp) | 734 | static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp) |
715 | { | 735 | { |
716 | bool locked = raw_spin_trylock(&rnp->lock); | 736 | bool locked = raw_spin_trylock(&ACCESS_PRIVATE(rnp, lock)); |
717 | 737 | ||
718 | if (locked) | 738 | if (locked) |
719 | smp_mb__after_unlock_lock(); | 739 | smp_mb__after_unlock_lock(); |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 080bd202d360..efdf7b61ce12 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
@@ -235,7 +235,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) | |||
235 | rnp->gp_tasks = &t->rcu_node_entry; | 235 | rnp->gp_tasks = &t->rcu_node_entry; |
236 | if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) | 236 | if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) |
237 | rnp->exp_tasks = &t->rcu_node_entry; | 237 | rnp->exp_tasks = &t->rcu_node_entry; |
238 | raw_spin_unlock(&rnp->lock); /* rrupts remain disabled. */ | 238 | raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */ |
239 | 239 | ||
240 | /* | 240 | /* |
241 | * Report the quiescent state for the expedited GP. This expedited | 241 | * Report the quiescent state for the expedited GP. This expedited |
@@ -489,7 +489,7 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
489 | !!rnp->gp_tasks); | 489 | !!rnp->gp_tasks); |
490 | rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags); | 490 | rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags); |
491 | } else { | 491 | } else { |
492 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 492 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
493 | } | 493 | } |
494 | 494 | ||
495 | /* Unboost if we were boosted. */ | 495 | /* Unboost if we were boosted. */ |
@@ -518,14 +518,14 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) | |||
518 | 518 | ||
519 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 519 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
520 | if (!rcu_preempt_blocked_readers_cgp(rnp)) { | 520 | if (!rcu_preempt_blocked_readers_cgp(rnp)) { |
521 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 521 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
522 | return; | 522 | return; |
523 | } | 523 | } |
524 | t = list_entry(rnp->gp_tasks->prev, | 524 | t = list_entry(rnp->gp_tasks->prev, |
525 | struct task_struct, rcu_node_entry); | 525 | struct task_struct, rcu_node_entry); |
526 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) | 526 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) |
527 | sched_show_task(t); | 527 | sched_show_task(t); |
528 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 528 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
529 | } | 529 | } |
530 | 530 | ||
531 | /* | 531 | /* |
@@ -807,7 +807,6 @@ void exit_rcu(void) | |||
807 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | 807 | #else /* #ifdef CONFIG_PREEMPT_RCU */ |
808 | 808 | ||
809 | static struct rcu_state *const rcu_state_p = &rcu_sched_state; | 809 | static struct rcu_state *const rcu_state_p = &rcu_sched_state; |
810 | static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data; | ||
811 | 810 | ||
812 | /* | 811 | /* |
813 | * Tell them what RCU they are running. | 812 | * Tell them what RCU they are running. |
@@ -991,7 +990,7 @@ static int rcu_boost(struct rcu_node *rnp) | |||
991 | * might exit their RCU read-side critical sections on their own. | 990 | * might exit their RCU read-side critical sections on their own. |
992 | */ | 991 | */ |
993 | if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) { | 992 | if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) { |
994 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 993 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
995 | return 0; | 994 | return 0; |
996 | } | 995 | } |
997 | 996 | ||
@@ -1028,7 +1027,7 @@ static int rcu_boost(struct rcu_node *rnp) | |||
1028 | */ | 1027 | */ |
1029 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1028 | t = container_of(tb, struct task_struct, rcu_node_entry); |
1030 | rt_mutex_init_proxy_locked(&rnp->boost_mtx, t); | 1029 | rt_mutex_init_proxy_locked(&rnp->boost_mtx, t); |
1031 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1030 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1032 | /* Lock only for side effect: boosts task t's priority. */ | 1031 | /* Lock only for side effect: boosts task t's priority. */ |
1033 | rt_mutex_lock(&rnp->boost_mtx); | 1032 | rt_mutex_lock(&rnp->boost_mtx); |
1034 | rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ | 1033 | rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ |
@@ -1088,7 +1087,7 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |||
1088 | 1087 | ||
1089 | if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { | 1088 | if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { |
1090 | rnp->n_balk_exp_gp_tasks++; | 1089 | rnp->n_balk_exp_gp_tasks++; |
1091 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1090 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1092 | return; | 1091 | return; |
1093 | } | 1092 | } |
1094 | if (rnp->exp_tasks != NULL || | 1093 | if (rnp->exp_tasks != NULL || |
@@ -1098,13 +1097,13 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |||
1098 | ULONG_CMP_GE(jiffies, rnp->boost_time))) { | 1097 | ULONG_CMP_GE(jiffies, rnp->boost_time))) { |
1099 | if (rnp->exp_tasks == NULL) | 1098 | if (rnp->exp_tasks == NULL) |
1100 | rnp->boost_tasks = rnp->gp_tasks; | 1099 | rnp->boost_tasks = rnp->gp_tasks; |
1101 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1100 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1102 | t = rnp->boost_kthread_task; | 1101 | t = rnp->boost_kthread_task; |
1103 | if (t) | 1102 | if (t) |
1104 | rcu_wake_cond(t, rnp->boost_kthread_status); | 1103 | rcu_wake_cond(t, rnp->boost_kthread_status); |
1105 | } else { | 1104 | } else { |
1106 | rcu_initiate_boost_trace(rnp); | 1105 | rcu_initiate_boost_trace(rnp); |
1107 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1106 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1108 | } | 1107 | } |
1109 | } | 1108 | } |
1110 | 1109 | ||
@@ -1172,7 +1171,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1172 | return PTR_ERR(t); | 1171 | return PTR_ERR(t); |
1173 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 1172 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
1174 | rnp->boost_kthread_task = t; | 1173 | rnp->boost_kthread_task = t; |
1175 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1174 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1176 | sp.sched_priority = kthread_prio; | 1175 | sp.sched_priority = kthread_prio; |
1177 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1176 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1178 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | 1177 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ |
@@ -1308,7 +1307,7 @@ static void rcu_prepare_kthreads(int cpu) | |||
1308 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | 1307 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) |
1309 | __releases(rnp->lock) | 1308 | __releases(rnp->lock) |
1310 | { | 1309 | { |
1311 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1310 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
1312 | } | 1311 | } |
1313 | 1312 | ||
1314 | static void invoke_rcu_callbacks_kthread(void) | 1313 | static void invoke_rcu_callbacks_kthread(void) |
@@ -1559,7 +1558,7 @@ static void rcu_prepare_for_idle(void) | |||
1559 | rnp = rdp->mynode; | 1558 | rnp = rdp->mynode; |
1560 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ | 1559 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ |
1561 | needwake = rcu_accelerate_cbs(rsp, rnp, rdp); | 1560 | needwake = rcu_accelerate_cbs(rsp, rnp, rdp); |
1562 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1561 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ |
1563 | if (needwake) | 1562 | if (needwake) |
1564 | rcu_gp_kthread_wake(rsp); | 1563 | rcu_gp_kthread_wake(rsp); |
1565 | } | 1564 | } |
@@ -2064,7 +2063,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) | |||
2064 | 2063 | ||
2065 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 2064 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
2066 | needwake = rcu_start_future_gp(rnp, rdp, &c); | 2065 | needwake = rcu_start_future_gp(rnp, rdp, &c); |
2067 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2066 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
2068 | if (needwake) | 2067 | if (needwake) |
2069 | rcu_gp_kthread_wake(rdp->rsp); | 2068 | rcu_gp_kthread_wake(rdp->rsp); |
2070 | 2069 | ||
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 76b94e19430b..ca828b41c938 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c | |||
@@ -128,6 +128,7 @@ bool rcu_gp_is_normal(void) | |||
128 | { | 128 | { |
129 | return READ_ONCE(rcu_normal); | 129 | return READ_ONCE(rcu_normal); |
130 | } | 130 | } |
131 | EXPORT_SYMBOL_GPL(rcu_gp_is_normal); | ||
131 | 132 | ||
132 | static atomic_t rcu_expedited_nesting = | 133 | static atomic_t rcu_expedited_nesting = |
133 | ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); | 134 | ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e5725b931bee..ea8f49ae0062 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -5434,16 +5434,6 @@ static int sched_cpu_active(struct notifier_block *nfb, | |||
5434 | set_cpu_rq_start_time(); | 5434 | set_cpu_rq_start_time(); |
5435 | return NOTIFY_OK; | 5435 | return NOTIFY_OK; |
5436 | 5436 | ||
5437 | case CPU_ONLINE: | ||
5438 | /* | ||
5439 | * At this point a starting CPU has marked itself as online via | ||
5440 | * set_cpu_online(). But it might not yet have marked itself | ||
5441 | * as active, which is essential from here on. | ||
5442 | */ | ||
5443 | set_cpu_active(cpu, true); | ||
5444 | stop_machine_unpark(cpu); | ||
5445 | return NOTIFY_OK; | ||
5446 | |||
5447 | case CPU_DOWN_FAILED: | 5437 | case CPU_DOWN_FAILED: |
5448 | set_cpu_active(cpu, true); | 5438 | set_cpu_active(cpu, true); |
5449 | return NOTIFY_OK; | 5439 | return NOTIFY_OK; |
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 544a7133cbd1..bd12c6c714ec 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | #include <linux/cpu.h> | 5 | #include <linux/cpu.h> |
6 | #include <linux/cpuidle.h> | 6 | #include <linux/cpuidle.h> |
7 | #include <linux/cpuhotplug.h> | ||
7 | #include <linux/tick.h> | 8 | #include <linux/tick.h> |
8 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
9 | #include <linux/stackprotector.h> | 10 | #include <linux/stackprotector.h> |
@@ -193,8 +194,6 @@ exit_idle: | |||
193 | rcu_idle_exit(); | 194 | rcu_idle_exit(); |
194 | } | 195 | } |
195 | 196 | ||
196 | DEFINE_PER_CPU(bool, cpu_dead_idle); | ||
197 | |||
198 | /* | 197 | /* |
199 | * Generic idle loop implementation | 198 | * Generic idle loop implementation |
200 | * | 199 | * |
@@ -221,10 +220,7 @@ static void cpu_idle_loop(void) | |||
221 | rmb(); | 220 | rmb(); |
222 | 221 | ||
223 | if (cpu_is_offline(smp_processor_id())) { | 222 | if (cpu_is_offline(smp_processor_id())) { |
224 | rcu_cpu_notify(NULL, CPU_DYING_IDLE, | 223 | cpuhp_report_idle_dead(); |
225 | (void *)(long)smp_processor_id()); | ||
226 | smp_mb(); /* all activity before dead. */ | ||
227 | this_cpu_write(cpu_dead_idle, true); | ||
228 | arch_cpu_idle_dead(); | 224 | arch_cpu_idle_dead(); |
229 | } | 225 | } |
230 | 226 | ||
@@ -291,5 +287,6 @@ void cpu_startup_entry(enum cpuhp_state state) | |||
291 | boot_init_stack_canary(); | 287 | boot_init_stack_canary(); |
292 | #endif | 288 | #endif |
293 | arch_cpu_idle_prepare(); | 289 | arch_cpu_idle_prepare(); |
290 | cpuhp_online_idle(state); | ||
294 | cpu_idle_loop(); | 291 | cpu_idle_loop(); |
295 | } | 292 | } |
diff --git a/kernel/smp.c b/kernel/smp.c index 300d29391e07..74165443c240 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -568,6 +568,7 @@ void __init smp_init(void) | |||
568 | unsigned int cpu; | 568 | unsigned int cpu; |
569 | 569 | ||
570 | idle_threads_init(); | 570 | idle_threads_init(); |
571 | cpuhp_threads_init(); | ||
571 | 572 | ||
572 | /* FIXME: This should be done in userspace --RR */ | 573 | /* FIXME: This should be done in userspace --RR */ |
573 | for_each_present_cpu(cpu) { | 574 | for_each_present_cpu(cpu) { |
diff --git a/kernel/smpboot.c b/kernel/smpboot.c index d264f59bff56..13bc43d1fb22 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c | |||
@@ -226,7 +226,7 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp | |||
226 | kthread_unpark(tsk); | 226 | kthread_unpark(tsk); |
227 | } | 227 | } |
228 | 228 | ||
229 | void smpboot_unpark_threads(unsigned int cpu) | 229 | int smpboot_unpark_threads(unsigned int cpu) |
230 | { | 230 | { |
231 | struct smp_hotplug_thread *cur; | 231 | struct smp_hotplug_thread *cur; |
232 | 232 | ||
@@ -235,6 +235,7 @@ void smpboot_unpark_threads(unsigned int cpu) | |||
235 | if (cpumask_test_cpu(cpu, cur->cpumask)) | 235 | if (cpumask_test_cpu(cpu, cur->cpumask)) |
236 | smpboot_unpark_thread(cur, cpu); | 236 | smpboot_unpark_thread(cur, cpu); |
237 | mutex_unlock(&smpboot_threads_lock); | 237 | mutex_unlock(&smpboot_threads_lock); |
238 | return 0; | ||
238 | } | 239 | } |
239 | 240 | ||
240 | static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu) | 241 | static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu) |
@@ -245,7 +246,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu) | |||
245 | kthread_park(tsk); | 246 | kthread_park(tsk); |
246 | } | 247 | } |
247 | 248 | ||
248 | void smpboot_park_threads(unsigned int cpu) | 249 | int smpboot_park_threads(unsigned int cpu) |
249 | { | 250 | { |
250 | struct smp_hotplug_thread *cur; | 251 | struct smp_hotplug_thread *cur; |
251 | 252 | ||
@@ -253,6 +254,7 @@ void smpboot_park_threads(unsigned int cpu) | |||
253 | list_for_each_entry_reverse(cur, &hotplug_threads, list) | 254 | list_for_each_entry_reverse(cur, &hotplug_threads, list) |
254 | smpboot_park_thread(cur, cpu); | 255 | smpboot_park_thread(cur, cpu); |
255 | mutex_unlock(&smpboot_threads_lock); | 256 | mutex_unlock(&smpboot_threads_lock); |
257 | return 0; | ||
256 | } | 258 | } |
257 | 259 | ||
258 | static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) | 260 | static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) |
diff --git a/kernel/smpboot.h b/kernel/smpboot.h index 72415a0eb955..485b81cfab34 100644 --- a/kernel/smpboot.h +++ b/kernel/smpboot.h | |||
@@ -14,7 +14,9 @@ static inline void idle_threads_init(void) { } | |||
14 | #endif | 14 | #endif |
15 | 15 | ||
16 | int smpboot_create_threads(unsigned int cpu); | 16 | int smpboot_create_threads(unsigned int cpu); |
17 | void smpboot_park_threads(unsigned int cpu); | 17 | int smpboot_park_threads(unsigned int cpu); |
18 | void smpboot_unpark_threads(unsigned int cpu); | 18 | int smpboot_unpark_threads(unsigned int cpu); |
19 | |||
20 | void __init cpuhp_threads_init(void); | ||
19 | 21 | ||
20 | #endif | 22 | #endif |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 664de539299b..56ece145a814 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -323,13 +323,42 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) | |||
323 | /* cs is a watchdog. */ | 323 | /* cs is a watchdog. */ |
324 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | 324 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) |
325 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 325 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
326 | } | ||
327 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
328 | } | ||
329 | |||
330 | static void clocksource_select_watchdog(bool fallback) | ||
331 | { | ||
332 | struct clocksource *cs, *old_wd; | ||
333 | unsigned long flags; | ||
334 | |||
335 | spin_lock_irqsave(&watchdog_lock, flags); | ||
336 | /* save current watchdog */ | ||
337 | old_wd = watchdog; | ||
338 | if (fallback) | ||
339 | watchdog = NULL; | ||
340 | |||
341 | list_for_each_entry(cs, &clocksource_list, list) { | ||
342 | /* cs is a clocksource to be watched. */ | ||
343 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) | ||
344 | continue; | ||
345 | |||
346 | /* Skip current if we were requested for a fallback. */ | ||
347 | if (fallback && cs == old_wd) | ||
348 | continue; | ||
349 | |||
326 | /* Pick the best watchdog. */ | 350 | /* Pick the best watchdog. */ |
327 | if (!watchdog || cs->rating > watchdog->rating) { | 351 | if (!watchdog || cs->rating > watchdog->rating) |
328 | watchdog = cs; | 352 | watchdog = cs; |
329 | /* Reset watchdog cycles */ | ||
330 | clocksource_reset_watchdog(); | ||
331 | } | ||
332 | } | 353 | } |
354 | /* If we failed to find a fallback restore the old one. */ | ||
355 | if (!watchdog) | ||
356 | watchdog = old_wd; | ||
357 | |||
358 | /* If we changed the watchdog we need to reset cycles. */ | ||
359 | if (watchdog != old_wd) | ||
360 | clocksource_reset_watchdog(); | ||
361 | |||
333 | /* Check if the watchdog timer needs to be started. */ | 362 | /* Check if the watchdog timer needs to be started. */ |
334 | clocksource_start_watchdog(); | 363 | clocksource_start_watchdog(); |
335 | spin_unlock_irqrestore(&watchdog_lock, flags); | 364 | spin_unlock_irqrestore(&watchdog_lock, flags); |
@@ -404,6 +433,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) | |||
404 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 433 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
405 | } | 434 | } |
406 | 435 | ||
436 | static void clocksource_select_watchdog(bool fallback) { } | ||
407 | static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } | 437 | static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } |
408 | static inline void clocksource_resume_watchdog(void) { } | 438 | static inline void clocksource_resume_watchdog(void) { } |
409 | static inline int __clocksource_watchdog_kthread(void) { return 0; } | 439 | static inline int __clocksource_watchdog_kthread(void) { return 0; } |
@@ -736,6 +766,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) | |||
736 | clocksource_enqueue(cs); | 766 | clocksource_enqueue(cs); |
737 | clocksource_enqueue_watchdog(cs); | 767 | clocksource_enqueue_watchdog(cs); |
738 | clocksource_select(); | 768 | clocksource_select(); |
769 | clocksource_select_watchdog(false); | ||
739 | mutex_unlock(&clocksource_mutex); | 770 | mutex_unlock(&clocksource_mutex); |
740 | return 0; | 771 | return 0; |
741 | } | 772 | } |
@@ -758,6 +789,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating) | |||
758 | mutex_lock(&clocksource_mutex); | 789 | mutex_lock(&clocksource_mutex); |
759 | __clocksource_change_rating(cs, rating); | 790 | __clocksource_change_rating(cs, rating); |
760 | clocksource_select(); | 791 | clocksource_select(); |
792 | clocksource_select_watchdog(false); | ||
761 | mutex_unlock(&clocksource_mutex); | 793 | mutex_unlock(&clocksource_mutex); |
762 | } | 794 | } |
763 | EXPORT_SYMBOL(clocksource_change_rating); | 795 | EXPORT_SYMBOL(clocksource_change_rating); |
@@ -767,12 +799,12 @@ EXPORT_SYMBOL(clocksource_change_rating); | |||
767 | */ | 799 | */ |
768 | static int clocksource_unbind(struct clocksource *cs) | 800 | static int clocksource_unbind(struct clocksource *cs) |
769 | { | 801 | { |
770 | /* | 802 | if (clocksource_is_watchdog(cs)) { |
771 | * I really can't convince myself to support this on hardware | 803 | /* Select and try to install a replacement watchdog. */ |
772 | * designed by lobotomized monkeys. | 804 | clocksource_select_watchdog(true); |
773 | */ | 805 | if (clocksource_is_watchdog(cs)) |
774 | if (clocksource_is_watchdog(cs)) | 806 | return -EBUSY; |
775 | return -EBUSY; | 807 | } |
776 | 808 | ||
777 | if (cs == curr_clocksource) { | 809 | if (cs == curr_clocksource) { |
778 | /* Select and try to install a replacement clock source */ | 810 | /* Select and try to install a replacement clock source */ |
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 347fecf86a3f..555e21f7b966 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
@@ -68,7 +68,7 @@ static struct clocksource clocksource_jiffies = { | |||
68 | .name = "jiffies", | 68 | .name = "jiffies", |
69 | .rating = 1, /* lowest valid rating*/ | 69 | .rating = 1, /* lowest valid rating*/ |
70 | .read = jiffies_read, | 70 | .read = jiffies_read, |
71 | .mask = 0xffffffff, /*32bits*/ | 71 | .mask = CLOCKSOURCE_MASK(32), |
72 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ | 72 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ |
73 | .shift = JIFFIES_SHIFT, | 73 | .shift = JIFFIES_SHIFT, |
74 | .max_cycles = 10, | 74 | .max_cycles = 10, |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 34b4cedfa80d..9c629bbed572 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -233,6 +233,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) | |||
233 | u64 tmp, ntpinterval; | 233 | u64 tmp, ntpinterval; |
234 | struct clocksource *old_clock; | 234 | struct clocksource *old_clock; |
235 | 235 | ||
236 | ++tk->cs_was_changed_seq; | ||
236 | old_clock = tk->tkr_mono.clock; | 237 | old_clock = tk->tkr_mono.clock; |
237 | tk->tkr_mono.clock = clock; | 238 | tk->tkr_mono.clock = clock; |
238 | tk->tkr_mono.read = clock->read; | 239 | tk->tkr_mono.read = clock->read; |
@@ -298,17 +299,34 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset; | |||
298 | static inline u32 arch_gettimeoffset(void) { return 0; } | 299 | static inline u32 arch_gettimeoffset(void) { return 0; } |
299 | #endif | 300 | #endif |
300 | 301 | ||
302 | static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr, | ||
303 | cycle_t delta) | ||
304 | { | ||
305 | s64 nsec; | ||
306 | |||
307 | nsec = delta * tkr->mult + tkr->xtime_nsec; | ||
308 | nsec >>= tkr->shift; | ||
309 | |||
310 | /* If arch requires, add in get_arch_timeoffset() */ | ||
311 | return nsec + arch_gettimeoffset(); | ||
312 | } | ||
313 | |||
301 | static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) | 314 | static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) |
302 | { | 315 | { |
303 | cycle_t delta; | 316 | cycle_t delta; |
304 | s64 nsec; | ||
305 | 317 | ||
306 | delta = timekeeping_get_delta(tkr); | 318 | delta = timekeeping_get_delta(tkr); |
319 | return timekeeping_delta_to_ns(tkr, delta); | ||
320 | } | ||
307 | 321 | ||
308 | nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift; | 322 | static inline s64 timekeeping_cycles_to_ns(struct tk_read_base *tkr, |
323 | cycle_t cycles) | ||
324 | { | ||
325 | cycle_t delta; | ||
309 | 326 | ||
310 | /* If arch requires, add in get_arch_timeoffset() */ | 327 | /* calculate the delta since the last update_wall_time */ |
311 | return nsec + arch_gettimeoffset(); | 328 | delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); |
329 | return timekeeping_delta_to_ns(tkr, delta); | ||
312 | } | 330 | } |
313 | 331 | ||
314 | /** | 332 | /** |
@@ -857,44 +875,262 @@ time64_t __ktime_get_real_seconds(void) | |||
857 | return tk->xtime_sec; | 875 | return tk->xtime_sec; |
858 | } | 876 | } |
859 | 877 | ||
878 | /** | ||
879 | * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter | ||
880 | * @systime_snapshot: pointer to struct receiving the system time snapshot | ||
881 | */ | ||
882 | void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) | ||
883 | { | ||
884 | struct timekeeper *tk = &tk_core.timekeeper; | ||
885 | unsigned long seq; | ||
886 | ktime_t base_raw; | ||
887 | ktime_t base_real; | ||
888 | s64 nsec_raw; | ||
889 | s64 nsec_real; | ||
890 | cycle_t now; | ||
860 | 891 | ||
861 | #ifdef CONFIG_NTP_PPS | 892 | WARN_ON_ONCE(timekeeping_suspended); |
893 | |||
894 | do { | ||
895 | seq = read_seqcount_begin(&tk_core.seq); | ||
896 | |||
897 | now = tk->tkr_mono.read(tk->tkr_mono.clock); | ||
898 | systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; | ||
899 | systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; | ||
900 | base_real = ktime_add(tk->tkr_mono.base, | ||
901 | tk_core.timekeeper.offs_real); | ||
902 | base_raw = tk->tkr_raw.base; | ||
903 | nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now); | ||
904 | nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now); | ||
905 | } while (read_seqcount_retry(&tk_core.seq, seq)); | ||
906 | |||
907 | systime_snapshot->cycles = now; | ||
908 | systime_snapshot->real = ktime_add_ns(base_real, nsec_real); | ||
909 | systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw); | ||
910 | } | ||
911 | EXPORT_SYMBOL_GPL(ktime_get_snapshot); | ||
912 | |||
913 | /* Scale base by mult/div checking for overflow */ | ||
914 | static int scale64_check_overflow(u64 mult, u64 div, u64 *base) | ||
915 | { | ||
916 | u64 tmp, rem; | ||
917 | |||
918 | tmp = div64_u64_rem(*base, div, &rem); | ||
919 | |||
920 | if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) || | ||
921 | ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem))) | ||
922 | return -EOVERFLOW; | ||
923 | tmp *= mult; | ||
924 | rem *= mult; | ||
925 | |||
926 | do_div(rem, div); | ||
927 | *base = tmp + rem; | ||
928 | return 0; | ||
929 | } | ||
862 | 930 | ||
863 | /** | 931 | /** |
864 | * ktime_get_raw_and_real_ts64 - get day and raw monotonic time in timespec format | 932 | * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval |
865 | * @ts_raw: pointer to the timespec to be set to raw monotonic time | 933 | * @history: Snapshot representing start of history |
866 | * @ts_real: pointer to the timespec to be set to the time of day | 934 | * @partial_history_cycles: Cycle offset into history (fractional part) |
935 | * @total_history_cycles: Total history length in cycles | ||
936 | * @discontinuity: True indicates clock was set on history period | ||
937 | * @ts: Cross timestamp that should be adjusted using | ||
938 | * partial/total ratio | ||
867 | * | 939 | * |
868 | * This function reads both the time of day and raw monotonic time at the | 940 | * Helper function used by get_device_system_crosststamp() to correct the |
869 | * same time atomically and stores the resulting timestamps in timespec | 941 | * crosstimestamp corresponding to the start of the current interval to the |
870 | * format. | 942 | * system counter value (timestamp point) provided by the driver. The |
943 | * total_history_* quantities are the total history starting at the provided | ||
944 | * reference point and ending at the start of the current interval. The cycle | ||
945 | * count between the driver timestamp point and the start of the current | ||
946 | * interval is partial_history_cycles. | ||
871 | */ | 947 | */ |
872 | void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real) | 948 | static int adjust_historical_crosststamp(struct system_time_snapshot *history, |
949 | cycle_t partial_history_cycles, | ||
950 | cycle_t total_history_cycles, | ||
951 | bool discontinuity, | ||
952 | struct system_device_crosststamp *ts) | ||
873 | { | 953 | { |
874 | struct timekeeper *tk = &tk_core.timekeeper; | 954 | struct timekeeper *tk = &tk_core.timekeeper; |
875 | unsigned long seq; | 955 | u64 corr_raw, corr_real; |
876 | s64 nsecs_raw, nsecs_real; | 956 | bool interp_forward; |
957 | int ret; | ||
877 | 958 | ||
878 | WARN_ON_ONCE(timekeeping_suspended); | 959 | if (total_history_cycles == 0 || partial_history_cycles == 0) |
960 | return 0; | ||
961 | |||
962 | /* Interpolate shortest distance from beginning or end of history */ | ||
963 | interp_forward = partial_history_cycles > total_history_cycles/2 ? | ||
964 | true : false; | ||
965 | partial_history_cycles = interp_forward ? | ||
966 | total_history_cycles - partial_history_cycles : | ||
967 | partial_history_cycles; | ||
968 | |||
969 | /* | ||
970 | * Scale the monotonic raw time delta by: | ||
971 | * partial_history_cycles / total_history_cycles | ||
972 | */ | ||
973 | corr_raw = (u64)ktime_to_ns( | ||
974 | ktime_sub(ts->sys_monoraw, history->raw)); | ||
975 | ret = scale64_check_overflow(partial_history_cycles, | ||
976 | total_history_cycles, &corr_raw); | ||
977 | if (ret) | ||
978 | return ret; | ||
979 | |||
980 | /* | ||
981 | * If there is a discontinuity in the history, scale monotonic raw | ||
982 | * correction by: | ||
983 | * mult(real)/mult(raw) yielding the realtime correction | ||
984 | * Otherwise, calculate the realtime correction similar to monotonic | ||
985 | * raw calculation | ||
986 | */ | ||
987 | if (discontinuity) { | ||
988 | corr_real = mul_u64_u32_div | ||
989 | (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult); | ||
990 | } else { | ||
991 | corr_real = (u64)ktime_to_ns( | ||
992 | ktime_sub(ts->sys_realtime, history->real)); | ||
993 | ret = scale64_check_overflow(partial_history_cycles, | ||
994 | total_history_cycles, &corr_real); | ||
995 | if (ret) | ||
996 | return ret; | ||
997 | } | ||
998 | |||
999 | /* Fixup monotonic raw and real time time values */ | ||
1000 | if (interp_forward) { | ||
1001 | ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw); | ||
1002 | ts->sys_realtime = ktime_add_ns(history->real, corr_real); | ||
1003 | } else { | ||
1004 | ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw); | ||
1005 | ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real); | ||
1006 | } | ||
1007 | |||
1008 | return 0; | ||
1009 | } | ||
1010 | |||
1011 | /* | ||
1012 | * cycle_between - true if test occurs chronologically between before and after | ||
1013 | */ | ||
1014 | static bool cycle_between(cycle_t before, cycle_t test, cycle_t after) | ||
1015 | { | ||
1016 | if (test > before && test < after) | ||
1017 | return true; | ||
1018 | if (test < before && before > after) | ||
1019 | return true; | ||
1020 | return false; | ||
1021 | } | ||
1022 | |||
1023 | /** | ||
1024 | * get_device_system_crosststamp - Synchronously capture system/device timestamp | ||
1025 | * @get_time_fn: Callback to get simultaneous device time and | ||
1026 | * system counter from the device driver | ||
1027 | * @ctx: Context passed to get_time_fn() | ||
1028 | * @history_begin: Historical reference point used to interpolate system | ||
1029 | * time when counter provided by the driver is before the current interval | ||
1030 | * @xtstamp: Receives simultaneously captured system and device time | ||
1031 | * | ||
1032 | * Reads a timestamp from a device and correlates it to system time | ||
1033 | */ | ||
1034 | int get_device_system_crosststamp(int (*get_time_fn) | ||
1035 | (ktime_t *device_time, | ||
1036 | struct system_counterval_t *sys_counterval, | ||
1037 | void *ctx), | ||
1038 | void *ctx, | ||
1039 | struct system_time_snapshot *history_begin, | ||
1040 | struct system_device_crosststamp *xtstamp) | ||
1041 | { | ||
1042 | struct system_counterval_t system_counterval; | ||
1043 | struct timekeeper *tk = &tk_core.timekeeper; | ||
1044 | cycle_t cycles, now, interval_start; | ||
1045 | unsigned int clock_was_set_seq = 0; | ||
1046 | ktime_t base_real, base_raw; | ||
1047 | s64 nsec_real, nsec_raw; | ||
1048 | u8 cs_was_changed_seq; | ||
1049 | unsigned long seq; | ||
1050 | bool do_interp; | ||
1051 | int ret; | ||
879 | 1052 | ||
880 | do { | 1053 | do { |
881 | seq = read_seqcount_begin(&tk_core.seq); | 1054 | seq = read_seqcount_begin(&tk_core.seq); |
1055 | /* | ||
1056 | * Try to synchronously capture device time and a system | ||
1057 | * counter value calling back into the device driver | ||
1058 | */ | ||
1059 | ret = get_time_fn(&xtstamp->device, &system_counterval, ctx); | ||
1060 | if (ret) | ||
1061 | return ret; | ||
1062 | |||
1063 | /* | ||
1064 | * Verify that the clocksource associated with the captured | ||
1065 | * system counter value is the same as the currently installed | ||
1066 | * timekeeper clocksource | ||
1067 | */ | ||
1068 | if (tk->tkr_mono.clock != system_counterval.cs) | ||
1069 | return -ENODEV; | ||
1070 | cycles = system_counterval.cycles; | ||
882 | 1071 | ||
883 | *ts_raw = tk->raw_time; | 1072 | /* |
884 | ts_real->tv_sec = tk->xtime_sec; | 1073 | * Check whether the system counter value provided by the |
885 | ts_real->tv_nsec = 0; | 1074 | * device driver is on the current timekeeping interval. |
1075 | */ | ||
1076 | now = tk->tkr_mono.read(tk->tkr_mono.clock); | ||
1077 | interval_start = tk->tkr_mono.cycle_last; | ||
1078 | if (!cycle_between(interval_start, cycles, now)) { | ||
1079 | clock_was_set_seq = tk->clock_was_set_seq; | ||
1080 | cs_was_changed_seq = tk->cs_was_changed_seq; | ||
1081 | cycles = interval_start; | ||
1082 | do_interp = true; | ||
1083 | } else { | ||
1084 | do_interp = false; | ||
1085 | } | ||
886 | 1086 | ||
887 | nsecs_raw = timekeeping_get_ns(&tk->tkr_raw); | 1087 | base_real = ktime_add(tk->tkr_mono.base, |
888 | nsecs_real = timekeeping_get_ns(&tk->tkr_mono); | 1088 | tk_core.timekeeper.offs_real); |
1089 | base_raw = tk->tkr_raw.base; | ||
889 | 1090 | ||
1091 | nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, | ||
1092 | system_counterval.cycles); | ||
1093 | nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, | ||
1094 | system_counterval.cycles); | ||
890 | } while (read_seqcount_retry(&tk_core.seq, seq)); | 1095 | } while (read_seqcount_retry(&tk_core.seq, seq)); |
891 | 1096 | ||
892 | timespec64_add_ns(ts_raw, nsecs_raw); | 1097 | xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real); |
893 | timespec64_add_ns(ts_real, nsecs_real); | 1098 | xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw); |
894 | } | ||
895 | EXPORT_SYMBOL(ktime_get_raw_and_real_ts64); | ||
896 | 1099 | ||
897 | #endif /* CONFIG_NTP_PPS */ | 1100 | /* |
1101 | * Interpolate if necessary, adjusting back from the start of the | ||
1102 | * current interval | ||
1103 | */ | ||
1104 | if (do_interp) { | ||
1105 | cycle_t partial_history_cycles, total_history_cycles; | ||
1106 | bool discontinuity; | ||
1107 | |||
1108 | /* | ||
1109 | * Check that the counter value occurs after the provided | ||
1110 | * history reference and that the history doesn't cross a | ||
1111 | * clocksource change | ||
1112 | */ | ||
1113 | if (!history_begin || | ||
1114 | !cycle_between(history_begin->cycles, | ||
1115 | system_counterval.cycles, cycles) || | ||
1116 | history_begin->cs_was_changed_seq != cs_was_changed_seq) | ||
1117 | return -EINVAL; | ||
1118 | partial_history_cycles = cycles - system_counterval.cycles; | ||
1119 | total_history_cycles = cycles - history_begin->cycles; | ||
1120 | discontinuity = | ||
1121 | history_begin->clock_was_set_seq != clock_was_set_seq; | ||
1122 | |||
1123 | ret = adjust_historical_crosststamp(history_begin, | ||
1124 | partial_history_cycles, | ||
1125 | total_history_cycles, | ||
1126 | discontinuity, xtstamp); | ||
1127 | if (ret) | ||
1128 | return ret; | ||
1129 | } | ||
1130 | |||
1131 | return 0; | ||
1132 | } | ||
1133 | EXPORT_SYMBOL_GPL(get_device_system_crosststamp); | ||
898 | 1134 | ||
899 | /** | 1135 | /** |
900 | * do_gettimeofday - Returns the time of day in a timeval | 1136 | * do_gettimeofday - Returns the time of day in a timeval |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8bfd1aca7a3d..f28f7fad452f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -1442,6 +1442,19 @@ config DEBUG_BLOCK_EXT_DEVT | |||
1442 | 1442 | ||
1443 | Say N if you are unsure. | 1443 | Say N if you are unsure. |
1444 | 1444 | ||
1445 | config CPU_HOTPLUG_STATE_CONTROL | ||
1446 | bool "Enable CPU hotplug state control" | ||
1447 | depends on DEBUG_KERNEL | ||
1448 | depends on HOTPLUG_CPU | ||
1449 | default n | ||
1450 | help | ||
1451 | Allows to write steps between "offline" and "online" to the CPUs | ||
1452 | sysfs target file so states can be stepped granular. This is a debug | ||
1453 | option for now as the hotplug machinery cannot be stopped and | ||
1454 | restarted at arbitrary points yet. | ||
1455 | |||
1456 | Say N if your are unsure. | ||
1457 | |||
1445 | config NOTIFIER_ERROR_INJECTION | 1458 | config NOTIFIER_ERROR_INJECTION |
1446 | tristate "Notifier error injection" | 1459 | tristate "Notifier error injection" |
1447 | depends on DEBUG_KERNEL | 1460 | depends on DEBUG_KERNEL |
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index d62de8bf022d..123481814320 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <linux/atomic.h> | 17 | #include <linux/atomic.h> |
18 | 18 | ||
19 | #ifdef CONFIG_X86 | 19 | #ifdef CONFIG_X86 |
20 | #include <asm/processor.h> /* for boot_cpu_has below */ | 20 | #include <asm/cpufeature.h> /* for boot_cpu_has below */ |
21 | #endif | 21 | #endif |
22 | 22 | ||
23 | #define TEST(bit, op, c_op, val) \ | 23 | #define TEST(bit, op, c_op, val) \ |
diff --git a/mm/memory.c b/mm/memory.c index 8132787ae4d5..906d8e3b42c0 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1551,8 +1551,29 @@ out: | |||
1551 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | 1551 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
1552 | unsigned long pfn) | 1552 | unsigned long pfn) |
1553 | { | 1553 | { |
1554 | return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); | ||
1555 | } | ||
1556 | EXPORT_SYMBOL(vm_insert_pfn); | ||
1557 | |||
1558 | /** | ||
1559 | * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot | ||
1560 | * @vma: user vma to map to | ||
1561 | * @addr: target user address of this page | ||
1562 | * @pfn: source kernel pfn | ||
1563 | * @pgprot: pgprot flags for the inserted page | ||
1564 | * | ||
1565 | * This is exactly like vm_insert_pfn, except that it allows drivers to | ||
1566 | * to override pgprot on a per-page basis. | ||
1567 | * | ||
1568 | * This only makes sense for IO mappings, and it makes no sense for | ||
1569 | * cow mappings. In general, using multiple vmas is preferable; | ||
1570 | * vm_insert_pfn_prot should only be used if using multiple VMAs is | ||
1571 | * impractical. | ||
1572 | */ | ||
1573 | int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, | ||
1574 | unsigned long pfn, pgprot_t pgprot) | ||
1575 | { | ||
1554 | int ret; | 1576 | int ret; |
1555 | pgprot_t pgprot = vma->vm_page_prot; | ||
1556 | /* | 1577 | /* |
1557 | * Technically, architectures with pte_special can avoid all these | 1578 | * Technically, architectures with pte_special can avoid all these |
1558 | * restrictions (same for remap_pfn_range). However we would like | 1579 | * restrictions (same for remap_pfn_range). However we would like |
@@ -1574,7 +1595,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | |||
1574 | 1595 | ||
1575 | return ret; | 1596 | return ret; |
1576 | } | 1597 | } |
1577 | EXPORT_SYMBOL(vm_insert_pfn); | 1598 | EXPORT_SYMBOL(vm_insert_pfn_prot); |
1578 | 1599 | ||
1579 | int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, | 1600 | int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, |
1580 | pfn_t pfn) | 1601 | pfn_t pfn) |
@@ -3066,11 +3066,16 @@ static int special_mapping_fault(struct vm_area_struct *vma, | |||
3066 | pgoff_t pgoff; | 3066 | pgoff_t pgoff; |
3067 | struct page **pages; | 3067 | struct page **pages; |
3068 | 3068 | ||
3069 | if (vma->vm_ops == &legacy_special_mapping_vmops) | 3069 | if (vma->vm_ops == &legacy_special_mapping_vmops) { |
3070 | pages = vma->vm_private_data; | 3070 | pages = vma->vm_private_data; |
3071 | else | 3071 | } else { |
3072 | pages = ((struct vm_special_mapping *)vma->vm_private_data)-> | 3072 | struct vm_special_mapping *sm = vma->vm_private_data; |
3073 | pages; | 3073 | |
3074 | if (sm->fault) | ||
3075 | return sm->fault(sm, vma, vmf); | ||
3076 | |||
3077 | pages = sm->pages; | ||
3078 | } | ||
3074 | 3079 | ||
3075 | for (pgoff = vmf->pgoff; pgoff && *pages; ++pages) | 3080 | for (pgoff = vmf->pgoff; pgoff && *pages; ++pages) |
3076 | pgoff--; | 3081 | pgoff--; |
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 0147c91fa549..874132b26d23 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl | |||
@@ -269,7 +269,8 @@ our $Sparse = qr{ | |||
269 | __init_refok| | 269 | __init_refok| |
270 | __kprobes| | 270 | __kprobes| |
271 | __ref| | 271 | __ref| |
272 | __rcu | 272 | __rcu| |
273 | __private | ||
273 | }x; | 274 | }x; |
274 | our $InitAttributePrefix = qr{__(?:mem|cpu|dev|net_|)}; | 275 | our $InitAttributePrefix = qr{__(?:mem|cpu|dev|net_|)}; |
275 | our $InitAttributeData = qr{$InitAttributePrefix(?:initdata\b)}; | 276 | our $InitAttributeData = qr{$InitAttributePrefix(?:initdata\b)}; |
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 844787a0d7be..5eb49b7f864c 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh | |||
@@ -33,7 +33,7 @@ if grep -Pq '\x00' < $file | |||
33 | then | 33 | then |
34 | print_warning Console output contains nul bytes, old qemu still running? | 34 | print_warning Console output contains nul bytes, old qemu still running? |
35 | fi | 35 | fi |
36 | egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags | 36 | egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags |
37 | if test -s $1.diags | 37 | if test -s $1.diags |
38 | then | 38 | then |
39 | print_warning Assertion failure in $file $title | 39 | print_warning Assertion failure in $file $title |
@@ -64,10 +64,12 @@ then | |||
64 | then | 64 | then |
65 | summary="$summary lockdep: $n_badness" | 65 | summary="$summary lockdep: $n_badness" |
66 | fi | 66 | fi |
67 | n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|Stall ended before state dump start' $1` | 67 | n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1` |
68 | if test "$n_stalls" -ne 0 | 68 | if test "$n_stalls" -ne 0 |
69 | then | 69 | then |
70 | summary="$summary Stalls: $n_stalls" | 70 | summary="$summary Stalls: $n_stalls" |
71 | fi | 71 | fi |
72 | print_warning Summary: $summary | 72 | print_warning Summary: $summary |
73 | else | ||
74 | rm $1.diags | ||
73 | fi | 75 | fi |
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d0c473f65850..d5ce7d7aae3e 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile | |||
@@ -4,15 +4,16 @@ include ../lib.mk | |||
4 | 4 | ||
5 | .PHONY: all all_32 all_64 warn_32bit_failure clean | 5 | .PHONY: all all_32 all_64 warn_32bit_failure clean |
6 | 6 | ||
7 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall | 7 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \ |
8 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \ | 8 | check_initial_reg_state sigreturn ldt_gdt |
9 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ | ||
9 | test_FCMOV test_FCOMI test_FISTTP \ | 10 | test_FCMOV test_FCOMI test_FISTTP \ |
10 | ldt_gdt \ | ||
11 | vdso_restorer | 11 | vdso_restorer |
12 | 12 | ||
13 | TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) | 13 | TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) |
14 | TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) | ||
14 | BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) | 15 | BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) |
15 | BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64) | 16 | BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) |
16 | 17 | ||
17 | CFLAGS := -O2 -g -std=gnu99 -pthread -Wall | 18 | CFLAGS := -O2 -g -std=gnu99 -pthread -Wall |
18 | 19 | ||
@@ -40,7 +41,7 @@ clean: | |||
40 | $(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c | 41 | $(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c |
41 | $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm | 42 | $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm |
42 | 43 | ||
43 | $(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c | 44 | $(TARGETS_C_64BIT_ALL:%=%_64): %_64: %.c |
44 | $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl | 45 | $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl |
45 | 46 | ||
46 | # x86_64 users should be encouraged to install 32-bit libraries | 47 | # x86_64 users should be encouraged to install 32-bit libraries |
@@ -65,3 +66,9 @@ endif | |||
65 | sysret_ss_attrs_64: thunks.S | 66 | sysret_ss_attrs_64: thunks.S |
66 | ptrace_syscall_32: raw_syscall_helper_32.S | 67 | ptrace_syscall_32: raw_syscall_helper_32.S |
67 | test_syscall_vdso_32: thunks_32.S | 68 | test_syscall_vdso_32: thunks_32.S |
69 | |||
70 | # check_initial_reg_state is special: it needs a custom entry, and it | ||
71 | # needs to be static so that its interpreter doesn't destroy its initial | ||
72 | # state. | ||
73 | check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static | ||
74 | check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static | ||
diff --git a/tools/testing/selftests/x86/check_initial_reg_state.c b/tools/testing/selftests/x86/check_initial_reg_state.c new file mode 100644 index 000000000000..6aaed9b85baf --- /dev/null +++ b/tools/testing/selftests/x86/check_initial_reg_state.c | |||
@@ -0,0 +1,109 @@ | |||
1 | /* | ||
2 | * check_initial_reg_state.c - check that execve sets the correct state | ||
3 | * Copyright (c) 2014-2016 Andrew Lutomirski | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License for more details. | ||
13 | */ | ||
14 | |||
15 | #define _GNU_SOURCE | ||
16 | |||
17 | #include <stdio.h> | ||
18 | |||
19 | unsigned long ax, bx, cx, dx, si, di, bp, sp, flags; | ||
20 | unsigned long r8, r9, r10, r11, r12, r13, r14, r15; | ||
21 | |||
22 | asm ( | ||
23 | ".pushsection .text\n\t" | ||
24 | ".type real_start, @function\n\t" | ||
25 | ".global real_start\n\t" | ||
26 | "real_start:\n\t" | ||
27 | #ifdef __x86_64__ | ||
28 | "mov %rax, ax\n\t" | ||
29 | "mov %rbx, bx\n\t" | ||
30 | "mov %rcx, cx\n\t" | ||
31 | "mov %rdx, dx\n\t" | ||
32 | "mov %rsi, si\n\t" | ||
33 | "mov %rdi, di\n\t" | ||
34 | "mov %rbp, bp\n\t" | ||
35 | "mov %rsp, sp\n\t" | ||
36 | "mov %r8, r8\n\t" | ||
37 | "mov %r9, r9\n\t" | ||
38 | "mov %r10, r10\n\t" | ||
39 | "mov %r11, r11\n\t" | ||
40 | "mov %r12, r12\n\t" | ||
41 | "mov %r13, r13\n\t" | ||
42 | "mov %r14, r14\n\t" | ||
43 | "mov %r15, r15\n\t" | ||
44 | "pushfq\n\t" | ||
45 | "popq flags\n\t" | ||
46 | #else | ||
47 | "mov %eax, ax\n\t" | ||
48 | "mov %ebx, bx\n\t" | ||
49 | "mov %ecx, cx\n\t" | ||
50 | "mov %edx, dx\n\t" | ||
51 | "mov %esi, si\n\t" | ||
52 | "mov %edi, di\n\t" | ||
53 | "mov %ebp, bp\n\t" | ||
54 | "mov %esp, sp\n\t" | ||
55 | "pushfl\n\t" | ||
56 | "popl flags\n\t" | ||
57 | #endif | ||
58 | "jmp _start\n\t" | ||
59 | ".size real_start, . - real_start\n\t" | ||
60 | ".popsection"); | ||
61 | |||
62 | int main() | ||
63 | { | ||
64 | int nerrs = 0; | ||
65 | |||
66 | if (sp == 0) { | ||
67 | printf("[FAIL]\tTest was built incorrectly\n"); | ||
68 | return 1; | ||
69 | } | ||
70 | |||
71 | if (ax || bx || cx || dx || si || di || bp | ||
72 | #ifdef __x86_64__ | ||
73 | || r8 || r9 || r10 || r11 || r12 || r13 || r14 || r15 | ||
74 | #endif | ||
75 | ) { | ||
76 | printf("[FAIL]\tAll GPRs except SP should be 0\n"); | ||
77 | #define SHOW(x) printf("\t" #x " = 0x%lx\n", x); | ||
78 | SHOW(ax); | ||
79 | SHOW(bx); | ||
80 | SHOW(cx); | ||
81 | SHOW(dx); | ||
82 | SHOW(si); | ||
83 | SHOW(di); | ||
84 | SHOW(bp); | ||
85 | SHOW(sp); | ||
86 | #ifdef __x86_64__ | ||
87 | SHOW(r8); | ||
88 | SHOW(r9); | ||
89 | SHOW(r10); | ||
90 | SHOW(r11); | ||
91 | SHOW(r12); | ||
92 | SHOW(r13); | ||
93 | SHOW(r14); | ||
94 | SHOW(r15); | ||
95 | #endif | ||
96 | nerrs++; | ||
97 | } else { | ||
98 | printf("[OK]\tAll GPRs except SP are 0\n"); | ||
99 | } | ||
100 | |||
101 | if (flags != 0x202) { | ||
102 | printf("[FAIL]\tFLAGS is 0x%lx, but it should be 0x202\n", flags); | ||
103 | nerrs++; | ||
104 | } else { | ||
105 | printf("[OK]\tFLAGS is 0x202\n"); | ||
106 | } | ||
107 | |||
108 | return nerrs ? 1 : 0; | ||
109 | } | ||
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index 5105b49cd8aa..421456784bc6 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c | |||
@@ -103,6 +103,17 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), | |||
103 | err(1, "sigaction"); | 103 | err(1, "sigaction"); |
104 | } | 104 | } |
105 | 105 | ||
106 | static void setsigign(int sig, int flags) | ||
107 | { | ||
108 | struct sigaction sa; | ||
109 | memset(&sa, 0, sizeof(sa)); | ||
110 | sa.sa_sigaction = (void *)SIG_IGN; | ||
111 | sa.sa_flags = flags; | ||
112 | sigemptyset(&sa.sa_mask); | ||
113 | if (sigaction(sig, &sa, 0)) | ||
114 | err(1, "sigaction"); | ||
115 | } | ||
116 | |||
106 | static void clearhandler(int sig) | 117 | static void clearhandler(int sig) |
107 | { | 118 | { |
108 | struct sigaction sa; | 119 | struct sigaction sa; |
@@ -187,7 +198,7 @@ static void test_ptrace_syscall_restart(void) | |||
187 | 198 | ||
188 | printf("[RUN]\tSYSEMU\n"); | 199 | printf("[RUN]\tSYSEMU\n"); |
189 | if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) | 200 | if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) |
190 | err(1, "PTRACE_SYSCALL"); | 201 | err(1, "PTRACE_SYSEMU"); |
191 | wait_trap(chld); | 202 | wait_trap(chld); |
192 | 203 | ||
193 | if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) | 204 | if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) |
@@ -218,7 +229,7 @@ static void test_ptrace_syscall_restart(void) | |||
218 | err(1, "PTRACE_SETREGS"); | 229 | err(1, "PTRACE_SETREGS"); |
219 | 230 | ||
220 | if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) | 231 | if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) |
221 | err(1, "PTRACE_SYSCALL"); | 232 | err(1, "PTRACE_SYSEMU"); |
222 | wait_trap(chld); | 233 | wait_trap(chld); |
223 | 234 | ||
224 | if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) | 235 | if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) |
@@ -250,7 +261,7 @@ static void test_ptrace_syscall_restart(void) | |||
250 | err(1, "PTRACE_SETREGS"); | 261 | err(1, "PTRACE_SETREGS"); |
251 | 262 | ||
252 | if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) | 263 | if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) |
253 | err(1, "PTRACE_SYSCALL"); | 264 | err(1, "PTRACE_SYSEMU"); |
254 | wait_trap(chld); | 265 | wait_trap(chld); |
255 | 266 | ||
256 | if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) | 267 | if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) |
@@ -277,6 +288,119 @@ static void test_ptrace_syscall_restart(void) | |||
277 | } | 288 | } |
278 | } | 289 | } |
279 | 290 | ||
291 | static void test_restart_under_ptrace(void) | ||
292 | { | ||
293 | printf("[RUN]\tkernel syscall restart under ptrace\n"); | ||
294 | pid_t chld = fork(); | ||
295 | if (chld < 0) | ||
296 | err(1, "fork"); | ||
297 | |||
298 | if (chld == 0) { | ||
299 | if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) | ||
300 | err(1, "PTRACE_TRACEME"); | ||
301 | |||
302 | printf("\tChild will take a nap until signaled\n"); | ||
303 | setsigign(SIGUSR1, SA_RESTART); | ||
304 | raise(SIGSTOP); | ||
305 | |||
306 | syscall(SYS_pause, 0, 0, 0, 0, 0, 0); | ||
307 | _exit(0); | ||
308 | } | ||
309 | |||
310 | int status; | ||
311 | |||
312 | /* Wait for SIGSTOP. */ | ||
313 | if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status)) | ||
314 | err(1, "waitpid"); | ||
315 | |||
316 | struct user_regs_struct regs; | ||
317 | |||
318 | printf("[RUN]\tSYSCALL\n"); | ||
319 | if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0) | ||
320 | err(1, "PTRACE_SYSCALL"); | ||
321 | wait_trap(chld); | ||
322 | |||
323 | /* We should be stopped at pause(2) entry. */ | ||
324 | |||
325 | if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) | ||
326 | err(1, "PTRACE_GETREGS"); | ||
327 | |||
328 | if (regs.user_syscall_nr != SYS_pause || | ||
329 | regs.user_arg0 != 0 || regs.user_arg1 != 0 || | ||
330 | regs.user_arg2 != 0 || regs.user_arg3 != 0 || | ||
331 | regs.user_arg4 != 0 || regs.user_arg5 != 0) { | ||
332 | printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5); | ||
333 | nerrs++; | ||
334 | } else { | ||
335 | printf("[OK]\tInitial nr and args are correct\n"); | ||
336 | } | ||
337 | |||
338 | /* Interrupt it. */ | ||
339 | kill(chld, SIGUSR1); | ||
340 | |||
341 | /* Advance. We should be stopped at exit. */ | ||
342 | printf("[RUN]\tSYSCALL\n"); | ||
343 | if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0) | ||
344 | err(1, "PTRACE_SYSCALL"); | ||
345 | wait_trap(chld); | ||
346 | |||
347 | if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) | ||
348 | err(1, "PTRACE_GETREGS"); | ||
349 | |||
350 | if (regs.user_syscall_nr != SYS_pause || | ||
351 | regs.user_arg0 != 0 || regs.user_arg1 != 0 || | ||
352 | regs.user_arg2 != 0 || regs.user_arg3 != 0 || | ||
353 | regs.user_arg4 != 0 || regs.user_arg5 != 0) { | ||
354 | printf("[FAIL]\tArgs after SIGUSR1 are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5); | ||
355 | nerrs++; | ||
356 | } else { | ||
357 | printf("[OK]\tArgs after SIGUSR1 are correct (ax = %ld)\n", | ||
358 | (long)regs.user_ax); | ||
359 | } | ||
360 | |||
361 | /* Poke the regs back in. This must not break anything. */ | ||
362 | if (ptrace(PTRACE_SETREGS, chld, 0, ®s) != 0) | ||
363 | err(1, "PTRACE_SETREGS"); | ||
364 | |||
365 | /* Catch the (ignored) SIGUSR1. */ | ||
366 | if (ptrace(PTRACE_CONT, chld, 0, 0) != 0) | ||
367 | err(1, "PTRACE_CONT"); | ||
368 | if (waitpid(chld, &status, 0) != chld) | ||
369 | err(1, "waitpid"); | ||
370 | if (!WIFSTOPPED(status)) { | ||
371 | printf("[FAIL]\tChild was stopped for SIGUSR1 (status = 0x%x)\n", status); | ||
372 | nerrs++; | ||
373 | } else { | ||
374 | printf("[OK]\tChild got SIGUSR1\n"); | ||
375 | } | ||
376 | |||
377 | /* The next event should be pause(2) again. */ | ||
378 | printf("[RUN]\tStep again\n"); | ||
379 | if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0) | ||
380 | err(1, "PTRACE_SYSCALL"); | ||
381 | wait_trap(chld); | ||
382 | |||
383 | /* We should be stopped at pause(2) entry. */ | ||
384 | |||
385 | if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) | ||
386 | err(1, "PTRACE_GETREGS"); | ||
387 | |||
388 | if (regs.user_syscall_nr != SYS_pause || | ||
389 | regs.user_arg0 != 0 || regs.user_arg1 != 0 || | ||
390 | regs.user_arg2 != 0 || regs.user_arg3 != 0 || | ||
391 | regs.user_arg4 != 0 || regs.user_arg5 != 0) { | ||
392 | printf("[FAIL]\tpause did not restart (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5); | ||
393 | nerrs++; | ||
394 | } else { | ||
395 | printf("[OK]\tpause(2) restarted correctly\n"); | ||
396 | } | ||
397 | |||
398 | /* Kill it. */ | ||
399 | kill(chld, SIGKILL); | ||
400 | if (waitpid(chld, &status, 0) != chld) | ||
401 | err(1, "waitpid"); | ||
402 | } | ||
403 | |||
280 | int main() | 404 | int main() |
281 | { | 405 | { |
282 | printf("[RUN]\tCheck int80 return regs\n"); | 406 | printf("[RUN]\tCheck int80 return regs\n"); |
@@ -290,5 +414,7 @@ int main() | |||
290 | 414 | ||
291 | test_ptrace_syscall_restart(); | 415 | test_ptrace_syscall_restart(); |
292 | 416 | ||
417 | test_restart_under_ptrace(); | ||
418 | |||
293 | return 0; | 419 | return 0; |
294 | } | 420 | } |
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index b5aa1bab7416..8a577e7070c6 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c | |||
@@ -54,6 +54,37 @@ | |||
54 | #include <sys/ptrace.h> | 54 | #include <sys/ptrace.h> |
55 | #include <sys/user.h> | 55 | #include <sys/user.h> |
56 | 56 | ||
57 | /* Pull in AR_xyz defines. */ | ||
58 | typedef unsigned int u32; | ||
59 | typedef unsigned short u16; | ||
60 | #include "../../../../arch/x86/include/asm/desc_defs.h" | ||
61 | |||
62 | /* | ||
63 | * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc | ||
64 | * headers. | ||
65 | */ | ||
66 | #ifdef __x86_64__ | ||
67 | /* | ||
68 | * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on | ||
69 | * kernels that save SS in the sigcontext. All kernels that set | ||
70 | * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp | ||
71 | * regardless of SS (i.e. they implement espfix). | ||
72 | * | ||
73 | * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS | ||
74 | * when delivering a signal that came from 64-bit code. | ||
75 | * | ||
76 | * Sigreturn restores SS as follows: | ||
77 | * | ||
78 | * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || | ||
79 | * saved CS is not 64-bit) | ||
80 | * new SS = saved SS (will fail IRET and signal if invalid) | ||
81 | * else | ||
82 | * new SS = a flat 32-bit data segment | ||
83 | */ | ||
84 | #define UC_SIGCONTEXT_SS 0x2 | ||
85 | #define UC_STRICT_RESTORE_SS 0x4 | ||
86 | #endif | ||
87 | |||
57 | /* | 88 | /* |
58 | * In principle, this test can run on Linux emulation layers (e.g. | 89 | * In principle, this test can run on Linux emulation layers (e.g. |
59 | * Illumos "LX branded zones"). Solaris-based kernels reserve LDT | 90 | * Illumos "LX branded zones"). Solaris-based kernels reserve LDT |
@@ -267,6 +298,9 @@ static gregset_t initial_regs, requested_regs, resulting_regs; | |||
267 | /* Instructions for the SIGUSR1 handler. */ | 298 | /* Instructions for the SIGUSR1 handler. */ |
268 | static volatile unsigned short sig_cs, sig_ss; | 299 | static volatile unsigned short sig_cs, sig_ss; |
269 | static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; | 300 | static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; |
301 | #ifdef __x86_64__ | ||
302 | static volatile sig_atomic_t sig_corrupt_final_ss; | ||
303 | #endif | ||
270 | 304 | ||
271 | /* Abstractions for some 32-bit vs 64-bit differences. */ | 305 | /* Abstractions for some 32-bit vs 64-bit differences. */ |
272 | #ifdef __x86_64__ | 306 | #ifdef __x86_64__ |
@@ -305,9 +339,105 @@ static greg_t *csptr(ucontext_t *ctx) | |||
305 | } | 339 | } |
306 | #endif | 340 | #endif |
307 | 341 | ||
342 | /* | ||
343 | * Checks a given selector for its code bitness or returns -1 if it's not | ||
344 | * a usable code segment selector. | ||
345 | */ | ||
346 | int cs_bitness(unsigned short cs) | ||
347 | { | ||
348 | uint32_t valid = 0, ar; | ||
349 | asm ("lar %[cs], %[ar]\n\t" | ||
350 | "jnz 1f\n\t" | ||
351 | "mov $1, %[valid]\n\t" | ||
352 | "1:" | ||
353 | : [ar] "=r" (ar), [valid] "+rm" (valid) | ||
354 | : [cs] "r" (cs)); | ||
355 | |||
356 | if (!valid) | ||
357 | return -1; | ||
358 | |||
359 | bool db = (ar & (1 << 22)); | ||
360 | bool l = (ar & (1 << 21)); | ||
361 | |||
362 | if (!(ar & (1<<11))) | ||
363 | return -1; /* Not code. */ | ||
364 | |||
365 | if (l && !db) | ||
366 | return 64; | ||
367 | else if (!l && db) | ||
368 | return 32; | ||
369 | else if (!l && !db) | ||
370 | return 16; | ||
371 | else | ||
372 | return -1; /* Unknown bitness. */ | ||
373 | } | ||
374 | |||
375 | /* | ||
376 | * Checks a given selector for its code bitness or returns -1 if it's not | ||
377 | * a usable code segment selector. | ||
378 | */ | ||
379 | bool is_valid_ss(unsigned short cs) | ||
380 | { | ||
381 | uint32_t valid = 0, ar; | ||
382 | asm ("lar %[cs], %[ar]\n\t" | ||
383 | "jnz 1f\n\t" | ||
384 | "mov $1, %[valid]\n\t" | ||
385 | "1:" | ||
386 | : [ar] "=r" (ar), [valid] "+rm" (valid) | ||
387 | : [cs] "r" (cs)); | ||
388 | |||
389 | if (!valid) | ||
390 | return false; | ||
391 | |||
392 | if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA && | ||
393 | (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN) | ||
394 | return false; | ||
395 | |||
396 | return (ar & AR_P); | ||
397 | } | ||
398 | |||
308 | /* Number of errors in the current test case. */ | 399 | /* Number of errors in the current test case. */ |
309 | static volatile sig_atomic_t nerrs; | 400 | static volatile sig_atomic_t nerrs; |
310 | 401 | ||
402 | static void validate_signal_ss(int sig, ucontext_t *ctx) | ||
403 | { | ||
404 | #ifdef __x86_64__ | ||
405 | bool was_64bit = (cs_bitness(*csptr(ctx)) == 64); | ||
406 | |||
407 | if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) { | ||
408 | printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n"); | ||
409 | nerrs++; | ||
410 | |||
411 | /* | ||
412 | * This happens on Linux 4.1. The rest will fail, too, so | ||
413 | * return now to reduce the noise. | ||
414 | */ | ||
415 | return; | ||
416 | } | ||
417 | |||
418 | /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */ | ||
419 | if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) { | ||
420 | printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n", | ||
421 | sig); | ||
422 | nerrs++; | ||
423 | } | ||
424 | |||
425 | if (is_valid_ss(*ssptr(ctx))) { | ||
426 | /* | ||
427 | * DOSEMU was written before 64-bit sigcontext had SS, and | ||
428 | * it tries to figure out the signal source SS by looking at | ||
429 | * the physical register. Make sure that keeps working. | ||
430 | */ | ||
431 | unsigned short hw_ss; | ||
432 | asm ("mov %%ss, %0" : "=rm" (hw_ss)); | ||
433 | if (hw_ss != *ssptr(ctx)) { | ||
434 | printf("[FAIL]\tHW SS didn't match saved SS\n"); | ||
435 | nerrs++; | ||
436 | } | ||
437 | } | ||
438 | #endif | ||
439 | } | ||
440 | |||
311 | /* | 441 | /* |
312 | * SIGUSR1 handler. Sets CS and SS as requested and points IP to the | 442 | * SIGUSR1 handler. Sets CS and SS as requested and points IP to the |
313 | * int3 trampoline. Sets SP to a large known value so that we can see | 443 | * int3 trampoline. Sets SP to a large known value so that we can see |
@@ -317,6 +447,8 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void) | |||
317 | { | 447 | { |
318 | ucontext_t *ctx = (ucontext_t*)ctx_void; | 448 | ucontext_t *ctx = (ucontext_t*)ctx_void; |
319 | 449 | ||
450 | validate_signal_ss(sig, ctx); | ||
451 | |||
320 | memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); | 452 | memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); |
321 | 453 | ||
322 | *csptr(ctx) = sig_cs; | 454 | *csptr(ctx) = sig_cs; |
@@ -334,13 +466,16 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void) | |||
334 | } | 466 | } |
335 | 467 | ||
336 | /* | 468 | /* |
337 | * Called after a successful sigreturn. Restores our state so that | 469 | * Called after a successful sigreturn (via int3) or from a failed |
338 | * the original raise(SIGUSR1) returns. | 470 | * sigreturn (directly by kernel). Restores our state so that the |
471 | * original raise(SIGUSR1) returns. | ||
339 | */ | 472 | */ |
340 | static void sigtrap(int sig, siginfo_t *info, void *ctx_void) | 473 | static void sigtrap(int sig, siginfo_t *info, void *ctx_void) |
341 | { | 474 | { |
342 | ucontext_t *ctx = (ucontext_t*)ctx_void; | 475 | ucontext_t *ctx = (ucontext_t*)ctx_void; |
343 | 476 | ||
477 | validate_signal_ss(sig, ctx); | ||
478 | |||
344 | sig_err = ctx->uc_mcontext.gregs[REG_ERR]; | 479 | sig_err = ctx->uc_mcontext.gregs[REG_ERR]; |
345 | sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; | 480 | sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; |
346 | 481 | ||
@@ -358,41 +493,62 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) | |||
358 | memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); | 493 | memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); |
359 | memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); | 494 | memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); |
360 | 495 | ||
496 | #ifdef __x86_64__ | ||
497 | if (sig_corrupt_final_ss) { | ||
498 | if (ctx->uc_flags & UC_STRICT_RESTORE_SS) { | ||
499 | printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n"); | ||
500 | nerrs++; | ||
501 | } else { | ||
502 | /* | ||
503 | * DOSEMU transitions from 32-bit to 64-bit mode by | ||
504 | * adjusting sigcontext, and it requires that this work | ||
505 | * even if the saved SS is bogus. | ||
506 | */ | ||
507 | printf("\tCorrupting SS on return to 64-bit mode\n"); | ||
508 | *ssptr(ctx) = 0; | ||
509 | } | ||
510 | } | ||
511 | #endif | ||
512 | |||
361 | sig_trapped = sig; | 513 | sig_trapped = sig; |
362 | } | 514 | } |
363 | 515 | ||
364 | /* | 516 | #ifdef __x86_64__ |
365 | * Checks a given selector for its code bitness or returns -1 if it's not | 517 | /* Tests recovery if !UC_STRICT_RESTORE_SS */ |
366 | * a usable code segment selector. | 518 | static void sigusr2(int sig, siginfo_t *info, void *ctx_void) |
367 | */ | ||
368 | int cs_bitness(unsigned short cs) | ||
369 | { | 519 | { |
370 | uint32_t valid = 0, ar; | 520 | ucontext_t *ctx = (ucontext_t*)ctx_void; |
371 | asm ("lar %[cs], %[ar]\n\t" | ||
372 | "jnz 1f\n\t" | ||
373 | "mov $1, %[valid]\n\t" | ||
374 | "1:" | ||
375 | : [ar] "=r" (ar), [valid] "+rm" (valid) | ||
376 | : [cs] "r" (cs)); | ||
377 | 521 | ||
378 | if (!valid) | 522 | if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) { |
379 | return -1; | 523 | printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n"); |
524 | nerrs++; | ||
525 | return; /* We can't do the rest. */ | ||
526 | } | ||
380 | 527 | ||
381 | bool db = (ar & (1 << 22)); | 528 | ctx->uc_flags &= ~UC_STRICT_RESTORE_SS; |
382 | bool l = (ar & (1 << 21)); | 529 | *ssptr(ctx) = 0; |
383 | 530 | ||
384 | if (!(ar & (1<<11))) | 531 | /* Return. The kernel should recover without sending another signal. */ |
385 | return -1; /* Not code. */ | 532 | } |
386 | 533 | ||
387 | if (l && !db) | 534 | static int test_nonstrict_ss(void) |
388 | return 64; | 535 | { |
389 | else if (!l && db) | 536 | clearhandler(SIGUSR1); |
390 | return 32; | 537 | clearhandler(SIGTRAP); |
391 | else if (!l && !db) | 538 | clearhandler(SIGSEGV); |
392 | return 16; | 539 | clearhandler(SIGILL); |
393 | else | 540 | sethandler(SIGUSR2, sigusr2, 0); |
394 | return -1; /* Unknown bitness. */ | 541 | |
542 | nerrs = 0; | ||
543 | |||
544 | printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n"); | ||
545 | raise(SIGUSR2); | ||
546 | if (!nerrs) | ||
547 | printf("[OK]\tIt worked\n"); | ||
548 | |||
549 | return nerrs; | ||
395 | } | 550 | } |
551 | #endif | ||
396 | 552 | ||
397 | /* Finds a usable code segment of the requested bitness. */ | 553 | /* Finds a usable code segment of the requested bitness. */ |
398 | int find_cs(int bitness) | 554 | int find_cs(int bitness) |
@@ -576,6 +732,12 @@ static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) | |||
576 | errdesc, strsignal(sig_trapped)); | 732 | errdesc, strsignal(sig_trapped)); |
577 | return 0; | 733 | return 0; |
578 | } else { | 734 | } else { |
735 | /* | ||
736 | * This also implicitly tests UC_STRICT_RESTORE_SS: | ||
737 | * We check that these signals set UC_STRICT_RESTORE_SS and, | ||
738 | * if UC_STRICT_RESTORE_SS doesn't cause strict behavior, | ||
739 | * then we won't get SIGSEGV. | ||
740 | */ | ||
579 | printf("[FAIL]\tDid not get SIGSEGV\n"); | 741 | printf("[FAIL]\tDid not get SIGSEGV\n"); |
580 | return 1; | 742 | return 1; |
581 | } | 743 | } |
@@ -632,6 +794,14 @@ int main() | |||
632 | GDT3(gdt_data16_idx)); | 794 | GDT3(gdt_data16_idx)); |
633 | } | 795 | } |
634 | 796 | ||
797 | #ifdef __x86_64__ | ||
798 | /* Nasty ABI case: check SS corruption handling. */ | ||
799 | sig_corrupt_final_ss = 1; | ||
800 | total_nerrs += test_valid_sigreturn(32, false, -1); | ||
801 | total_nerrs += test_valid_sigreturn(32, true, -1); | ||
802 | sig_corrupt_final_ss = 0; | ||
803 | #endif | ||
804 | |||
635 | /* | 805 | /* |
636 | * We're done testing valid sigreturn cases. Now we test states | 806 | * We're done testing valid sigreturn cases. Now we test states |
637 | * for which sigreturn itself will succeed but the subsequent | 807 | * for which sigreturn itself will succeed but the subsequent |
@@ -680,5 +850,9 @@ int main() | |||
680 | if (gdt_npdata32_idx) | 850 | if (gdt_npdata32_idx) |
681 | test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); | 851 | test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); |
682 | 852 | ||
853 | #ifdef __x86_64__ | ||
854 | total_nerrs += test_nonstrict_ss(); | ||
855 | #endif | ||
856 | |||
683 | return total_nerrs ? 1 : 0; | 857 | return total_nerrs ? 1 : 0; |
684 | } | 858 | } |
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c index 60c06af4646a..43fcab367fb0 100644 --- a/tools/testing/selftests/x86/syscall_nt.c +++ b/tools/testing/selftests/x86/syscall_nt.c | |||
@@ -17,6 +17,9 @@ | |||
17 | 17 | ||
18 | #include <stdio.h> | 18 | #include <stdio.h> |
19 | #include <unistd.h> | 19 | #include <unistd.h> |
20 | #include <string.h> | ||
21 | #include <signal.h> | ||
22 | #include <err.h> | ||
20 | #include <sys/syscall.h> | 23 | #include <sys/syscall.h> |
21 | #include <asm/processor-flags.h> | 24 | #include <asm/processor-flags.h> |
22 | 25 | ||
@@ -26,6 +29,8 @@ | |||
26 | # define WIDTH "l" | 29 | # define WIDTH "l" |
27 | #endif | 30 | #endif |
28 | 31 | ||
32 | static unsigned int nerrs; | ||
33 | |||
29 | static unsigned long get_eflags(void) | 34 | static unsigned long get_eflags(void) |
30 | { | 35 | { |
31 | unsigned long eflags; | 36 | unsigned long eflags; |
@@ -39,16 +44,52 @@ static void set_eflags(unsigned long eflags) | |||
39 | : : "rm" (eflags) : "flags"); | 44 | : : "rm" (eflags) : "flags"); |
40 | } | 45 | } |
41 | 46 | ||
42 | int main() | 47 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), |
48 | int flags) | ||
43 | { | 49 | { |
44 | printf("[RUN]\tSet NT and issue a syscall\n"); | 50 | struct sigaction sa; |
45 | set_eflags(get_eflags() | X86_EFLAGS_NT); | 51 | memset(&sa, 0, sizeof(sa)); |
52 | sa.sa_sigaction = handler; | ||
53 | sa.sa_flags = SA_SIGINFO | flags; | ||
54 | sigemptyset(&sa.sa_mask); | ||
55 | if (sigaction(sig, &sa, 0)) | ||
56 | err(1, "sigaction"); | ||
57 | } | ||
58 | |||
59 | static void sigtrap(int sig, siginfo_t *si, void *ctx_void) | ||
60 | { | ||
61 | } | ||
62 | |||
63 | static void do_it(unsigned long extraflags) | ||
64 | { | ||
65 | unsigned long flags; | ||
66 | |||
67 | set_eflags(get_eflags() | extraflags); | ||
46 | syscall(SYS_getpid); | 68 | syscall(SYS_getpid); |
47 | if (get_eflags() & X86_EFLAGS_NT) { | 69 | flags = get_eflags(); |
48 | printf("[OK]\tThe syscall worked and NT is still set\n"); | 70 | if ((flags & extraflags) == extraflags) { |
49 | return 0; | 71 | printf("[OK]\tThe syscall worked and flags are still set\n"); |
50 | } else { | 72 | } else { |
51 | printf("[FAIL]\tThe syscall worked but NT was cleared\n"); | 73 | printf("[FAIL]\tThe syscall worked but flags were cleared (flags = 0x%lx but expected 0x%lx set)\n", |
52 | return 1; | 74 | flags, extraflags); |
75 | nerrs++; | ||
53 | } | 76 | } |
54 | } | 77 | } |
78 | |||
79 | int main(void) | ||
80 | { | ||
81 | printf("[RUN]\tSet NT and issue a syscall\n"); | ||
82 | do_it(X86_EFLAGS_NT); | ||
83 | |||
84 | /* | ||
85 | * Now try it again with TF set -- TF forces returns via IRET in all | ||
86 | * cases except non-ptregs-using 64-bit full fast path syscalls. | ||
87 | */ | ||
88 | |||
89 | sethandler(SIGTRAP, sigtrap, 0); | ||
90 | |||
91 | printf("[RUN]\tSet NT|TF and issue a syscall\n"); | ||
92 | do_it(X86_EFLAGS_NT | X86_EFLAGS_TF); | ||
93 | |||
94 | return nerrs == 0 ? 0 : 1; | ||
95 | } | ||