aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-12-17 07:04:28 -0500
committerIngo Molnar <mingo@kernel.org>2017-12-17 07:04:28 -0500
commit650400b2ccb8542ff4e2677d66ff083d01c7bd6a (patch)
tree055ddec9479ec30cca661989b9902c13f52d533a
parent0fd2e9c53d82704a3ba87ea1980ec515188c5316 (diff)
parentfec8f5ae1715a01c72ad52cb2ecd8aacaf142302 (diff)
Merge branch 'upstream-x86-selftests' into WIP.x86/pti.base
Conflicts: arch/x86/kernel/cpu/Makefile Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/x86/x86_64/mm.txt2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/syscalls/Makefile4
-rw-r--r--arch/x86/include/asm/cpufeatures.h545
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c26
-rw-r--r--arch/x86/kernel/head_64.S11
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/mm/kasan_init_64.c101
-rw-r--r--arch/x86/xen/mmu_pv.c159
-rw-r--r--include/linux/bitops.h26
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--mm/gup.c97
-rw-r--r--mm/page_alloc.c10
-rw-r--r--mm/sparse.c17
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c88
-rw-r--r--tools/testing/selftests/x86/protection_keys.c24
19 files changed, 613 insertions, 522 deletions
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index b0798e281aa6..3448e675b462 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
34ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole 34ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
35ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) 35ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
36... unused hole ... 36... unused hole ...
37ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB) 37ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
38... unused hole ... 38... unused hole ...
39ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks 39ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
40... unused hole ... 40... unused hole ...
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 926fdfbadcdb..4ae940a0ed3b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -303,7 +303,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
303config KASAN_SHADOW_OFFSET 303config KASAN_SHADOW_OFFSET
304 hex 304 hex
305 depends on KASAN 305 depends on KASAN
306 default 0xdff8000000000000 if X86_5LEVEL
307 default 0xdffffc0000000000 306 default 0xdffffc0000000000
308 307
309config HAVE_INTEL_TXT 308config HAVE_INTEL_TXT
diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index 331f1dca5085..6fb9b57ed5ba 100644
--- a/arch/x86/entry/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -1,6 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2out := $(obj)/../../include/generated/asm 2out := arch/$(SRCARCH)/include/generated/asm
3uapi := $(obj)/../../include/generated/uapi/asm 3uapi := arch/$(SRCARCH)/include/generated/uapi/asm
4 4
5# Create output directory if not already present 5# Create output directory if not already present
6_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ 6_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 74370734663c..cdf5be866863 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,178 +13,176 @@
13/* 13/*
14 * Defines x86 CPU feature bits 14 * Defines x86 CPU feature bits
15 */ 15 */
16#define NCAPINTS 18 /* N 32-bit words worth of info */ 16#define NCAPINTS 18 /* N 32-bit words worth of info */
17#define NBUGINTS 1 /* N 32-bit bug flags */ 17#define NBUGINTS 1 /* N 32-bit bug flags */
18 18
19/* 19/*
20 * Note: If the comment begins with a quoted string, that string is used 20 * Note: If the comment begins with a quoted string, that string is used
21 * in /proc/cpuinfo instead of the macro name. If the string is "", 21 * in /proc/cpuinfo instead of the macro name. If the string is "",
22 * this feature bit is not displayed in /proc/cpuinfo at all. 22 * this feature bit is not displayed in /proc/cpuinfo at all.
23 */ 23 *
24
25/*
26 * When adding new features here that depend on other features, 24 * When adding new features here that depend on other features,
27 * please update the table in kernel/cpu/cpuid-deps.c 25 * please update the table in kernel/cpu/cpuid-deps.c as well.
28 */ 26 */
29 27
30/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ 28/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
31#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ 29#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
32#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ 30#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
33#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ 31#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
34#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ 32#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
35#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ 33#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
36#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ 34#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
37#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ 35#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
38#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ 36#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
39#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ 37#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
40#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ 38#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
41#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ 39#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
42#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ 40#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
43#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ 41#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
44#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ 42#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
45#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ 43#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
46 /* (plus FCMOVcc, FCOMI with FPU) */ 44#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
47#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ 45#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
48#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ 46#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
49#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ 47#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
50#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ 48#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
51#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ 49#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
52#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ 50#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
53#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ 51#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
54#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ 52#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
55#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ 53#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
56#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ 54#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
57#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ 55#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
58#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ 56#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
59#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ 57#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
60#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ 58#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
61#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
62 59
63/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ 60/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
64/* Don't duplicate feature flags which are redundant with Intel! */ 61/* Don't duplicate feature flags which are redundant with Intel! */
65#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ 62#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
66#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ 63#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
67#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ 64#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
68#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ 65#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
69#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ 66#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
70#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ 67#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
71#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ 68#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
72#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ 69#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
73#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ 70#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
74#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ 71#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
75 72
76/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ 73/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
77#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ 74#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
78#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ 75#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
79#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ 76#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
80 77
81/* Other features, Linux-defined mapping, word 3 */ 78/* Other features, Linux-defined mapping, word 3 */
82/* This range is used for feature bits which conflict or are synthesized */ 79/* This range is used for feature bits which conflict or are synthesized */
83#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ 80#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
84#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ 81#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
85#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ 82#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
86#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ 83#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
87/* cpu types for specific tunings: */ 84
88#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ 85/* CPU types for specific tunings: */
89#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ 86#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
90#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ 87#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
91#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ 88#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
92#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ 89#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
93#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ 90#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
94#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ 91#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
95#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ 92#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
96#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ 93#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
97#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ 94#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
98#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ 95#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
99#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ 96#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
100#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ 97#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
101#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ 98#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
102#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ 99#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
103#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ 100#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
104#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ 101#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
105#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ 102#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
106#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ 103#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
107#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ 104#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
108#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ 105#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
109#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ 106#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
110#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ 107#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
111#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ 108#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
112#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ 109#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
113#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ 110#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
114#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ 111#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
112#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
115 113
116/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 114/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
117#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ 115#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
118#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ 116#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
119#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ 117#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
120#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ 118#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
121#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ 119#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
122#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ 120#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
123#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ 121#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
124#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ 122#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
125#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ 123#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
126#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ 124#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
127#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ 125#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
128#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ 126#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
129#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ 127#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
130#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ 128#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
131#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ 129#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
132#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ 130#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
133#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ 131#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
134#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ 132#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
135#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ 133#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
136#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ 134#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
137#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ 135#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
138#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ 136#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
139#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ 137#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
140#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ 138#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
141#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ 139#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
142#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ 140#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
143#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ 141#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
144#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ 142#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
145#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ 143#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
146#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ 144#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
147#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ 145#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
148 146
149/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 147/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
150#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ 148#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
151#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ 149#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
152#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ 150#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
153#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ 151#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
154#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ 152#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
155#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ 153#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
156#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ 154#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
157#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ 155#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
158#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ 156#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
159#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ 157#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
160 158
161/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ 159/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
162#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ 160#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
163#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ 161#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
164#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ 162#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
165#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ 163#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
166#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ 164#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
167#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ 165#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
168#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ 166#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
169#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ 167#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
170#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ 168#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
171#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ 169#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
172#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ 170#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
173#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ 171#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
174#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ 172#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
175#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ 173#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
176#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ 174#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
177#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ 175#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
178#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ 176#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
179#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ 177#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
180#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ 178#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
181#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ 179#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
182#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ 180#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
183#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ 181#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
184#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ 182#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
185#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ 183#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
186#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ 184#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
187#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ 185#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
188 186
189/* 187/*
190 * Auxiliary flags: Linux defined - For features scattered in various 188 * Auxiliary flags: Linux defined - For features scattered in various
@@ -192,152 +190,153 @@
192 * 190 *
193 * Reuse free bits when adding new feature flags! 191 * Reuse free bits when adding new feature flags!
194 */ 192 */
195#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ 193#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
196#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ 194#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
197#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ 195#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
198#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ 196#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
199#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ 197#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
200#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ 198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
201#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ 199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
202 200
203#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 201#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
204#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 202#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
205#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ 203#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
206 204
207#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ 205#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
208#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ 206#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
209#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ 207#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
210#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ 208#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
211 209
212#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ 210#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
213 211
214/* Virtualization flags: Linux defined, word 8 */ 212/* Virtualization flags: Linux defined, word 8 */
215#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ 213#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
216#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ 214#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
217#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ 215#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
218#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ 216#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
219#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ 217#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
220 218
221#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ 219#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
222#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ 220#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
223 221
224 222
225/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 223/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
226#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 224#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
227#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ 225#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
228#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ 226#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
229#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ 227#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
230#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ 228#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
231#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ 229#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
232#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ 230#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
233#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ 231#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
234#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ 232#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
235#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ 233#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
236#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ 234#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
237#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ 235#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
238#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ 236#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
239#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ 237#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
240#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ 238#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
241#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ 239#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
242#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ 240#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
243#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ 241#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
244#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ 242#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
245#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ 243#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
246#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ 244#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
247#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ 245#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
248#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ 246#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
249#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ 247#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
250#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ 248#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
251#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ 249#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
252#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ 250#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
253 251
254/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ 252/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
255#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ 253#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
256#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ 254#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
257#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ 255#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
258#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ 256#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
259 257
260/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ 258/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
261#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ 259#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
262 260
263/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ 261/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
264#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ 262#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
265#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ 263#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
266#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ 264#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
267 265
268/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ 266/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
269#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ 267#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
270#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ 268#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
271 269
272/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ 270/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
273#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ 271#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
274#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ 272#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
275#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ 273#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
276#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ 274#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
277#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ 275#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
278#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ 276#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
279#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ 277#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
280#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ 278#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
281#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ 279#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
282#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ 280#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
283 281
284/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ 282/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
285#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ 283#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
286#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ 284#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
287#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ 285#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
288#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ 286#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
289#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ 287#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
290#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ 288#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
291#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ 289#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
292#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ 290#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
293#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ 291#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
294#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ 292#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
295#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ 293#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
296#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ 294#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
297#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ 295#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
298 296
299/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ 297/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
300#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ 298#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
301#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ 299#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
302#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ 300#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
303#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ 301#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
304#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ 302#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
305#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ 303#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
306#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */ 304#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
307#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */ 305#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
308#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */ 306#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
309#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ 307#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
310#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ 308#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
311#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ 309#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
312 310
313/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ 311/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
314#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ 312#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
315#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ 313#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
316#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ 314#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
317 315
318/* 316/*
319 * BUG word(s) 317 * BUG word(s)
320 */ 318 */
321#define X86_BUG(x) (NCAPINTS*32 + (x)) 319#define X86_BUG(x) (NCAPINTS*32 + (x))
322 320
323#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ 321#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
324#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ 322#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
325#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ 323#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
326#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ 324#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
327#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ 325#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
328#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ 326#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
329#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ 327#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
330#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ 328#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
331#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ 329#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
332#ifdef CONFIG_X86_32 330#ifdef CONFIG_X86_32
333/* 331/*
334 * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional 332 * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
335 * to avoid confusion. 333 * to avoid confusion.
336 */ 334 */
337#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ 335#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
338#endif 336#endif
339#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ 337#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
340#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ 338#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
341#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ 339#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
342#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ 340#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
341
343#endif /* _ASM_X86_CPUFEATURES_H */ 342#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 59df7b47a434..9e9b05fc4860 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -200,10 +200,9 @@ enum page_cache_mode {
200 200
201#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) 201#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
202 202
203#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
204 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
205#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ 203#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
206 _PAGE_DIRTY | _PAGE_ENC) 204 _PAGE_DIRTY | _PAGE_ENC)
205#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
207 206
208#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) 207#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
209#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) 208#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d12da41f72da..295abaa58add 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -25,7 +25,8 @@ endif
25KASAN_SANITIZE_head$(BITS).o := n 25KASAN_SANITIZE_head$(BITS).o := n
26KASAN_SANITIZE_dumpstack.o := n 26KASAN_SANITIZE_dumpstack.o := n
27KASAN_SANITIZE_dumpstack_$(BITS).o := n 27KASAN_SANITIZE_dumpstack_$(BITS).o := n
28KASAN_SANITIZE_stacktrace.o := n 28KASAN_SANITIZE_stacktrace.o := n
29KASAN_SANITIZE_paravirt.o := n
29 30
30OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y 31OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
31OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y 32OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index c21f22d836ad..904b0a3c4e53 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -62,23 +62,19 @@ const static struct cpuid_dep cpuid_deps[] = {
62 {} 62 {}
63}; 63};
64 64
65static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
66{
67 clear_bit32(bit, c->x86_capability);
68}
69
70static inline void __setup_clear_cpu_cap(unsigned int bit)
71{
72 clear_cpu_cap(&boot_cpu_data, bit);
73 set_bit32(bit, cpu_caps_cleared);
74}
75
76static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature) 65static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
77{ 66{
78 if (!c) 67 /*
79 __setup_clear_cpu_cap(feature); 68 * Note: This could use the non atomic __*_bit() variants, but the
80 else 69 * rest of the cpufeature code uses atomics as well, so keep it for
81 __clear_cpu_cap(c, feature); 70 * consistency. Cleanup all of it separately.
71 */
72 if (!c) {
73 clear_cpu_cap(&boot_cpu_data, feature);
74 set_bit(feature, (unsigned long *)cpu_caps_cleared);
75 } else {
76 clear_bit(feature, (unsigned long *)c->x86_capability);
77 }
82} 78}
83 79
84/* Take the capabilities and the BUG bits into account */ 80/* Take the capabilities and the BUG bits into account */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index fd58835d8f9b..7dca675fe78d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,11 +38,12 @@
38 * 38 *
39 */ 39 */
40 40
41#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
42#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) 41#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
43 42
43#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
44PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) 44PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
45PGD_START_KERNEL = pgd_index(__START_KERNEL_map) 45PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
46#endif
46L3_START_KERNEL = pud_index(__START_KERNEL_map) 47L3_START_KERNEL = pud_index(__START_KERNEL_map)
47 48
48 .text 49 .text
@@ -362,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts)
362 363
363 .data 364 .data
364 365
365#ifndef CONFIG_XEN 366#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
366NEXT_PAGE(init_top_pgt)
367 .fill 512,8,0
368#else
369NEXT_PAGE(init_top_pgt) 367NEXT_PAGE(init_top_pgt)
370 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 368 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
371 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 369 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
@@ -382,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt)
382 * Don't set NX because code runs from these pages. 380 * Don't set NX because code runs from these pages.
383 */ 381 */
384 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 382 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
383#else
384NEXT_PAGE(init_top_pgt)
385 .fill 512,8,0
385#endif 386#endif
386 387
387#ifdef CONFIG_X86_5LEVEL 388#ifdef CONFIG_X86_5LEVEL
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 048fbe8fc274..adcea90a2046 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
1426 1426
1427#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) 1427#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
1428void register_page_bootmem_memmap(unsigned long section_nr, 1428void register_page_bootmem_memmap(unsigned long section_nr,
1429 struct page *start_page, unsigned long size) 1429 struct page *start_page, unsigned long nr_pages)
1430{ 1430{
1431 unsigned long addr = (unsigned long)start_page; 1431 unsigned long addr = (unsigned long)start_page;
1432 unsigned long end = (unsigned long)(start_page + size); 1432 unsigned long end = (unsigned long)(start_page + nr_pages);
1433 unsigned long next; 1433 unsigned long next;
1434 pgd_t *pgd; 1434 pgd_t *pgd;
1435 p4d_t *p4d; 1435 p4d_t *p4d;
1436 pud_t *pud; 1436 pud_t *pud;
1437 pmd_t *pmd; 1437 pmd_t *pmd;
1438 unsigned int nr_pages; 1438 unsigned int nr_pmd_pages;
1439 struct page *page; 1439 struct page *page;
1440 1440
1441 for (; addr < end; addr = next) { 1441 for (; addr < end; addr = next) {
@@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
1482 if (pmd_none(*pmd)) 1482 if (pmd_none(*pmd))
1483 continue; 1483 continue;
1484 1484
1485 nr_pages = 1 << (get_order(PMD_SIZE)); 1485 nr_pmd_pages = 1 << get_order(PMD_SIZE);
1486 page = pmd_page(*pmd); 1486 page = pmd_page(*pmd);
1487 while (nr_pages--) 1487 while (nr_pmd_pages--)
1488 get_page_bootmem(section_nr, page++, 1488 get_page_bootmem(section_nr, page++,
1489 SECTION_INFO); 1489 SECTION_INFO);
1490 } 1490 }
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8f5be3eb40dd..2b60dc6e64b1 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -16,6 +16,8 @@
16 16
17extern struct range pfn_mapped[E820_MAX_ENTRIES]; 17extern struct range pfn_mapped[E820_MAX_ENTRIES];
18 18
19static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
20
19static int __init map_range(struct range *range) 21static int __init map_range(struct range *range)
20{ 22{
21 unsigned long start; 23 unsigned long start;
@@ -31,8 +33,10 @@ static void __init clear_pgds(unsigned long start,
31 unsigned long end) 33 unsigned long end)
32{ 34{
33 pgd_t *pgd; 35 pgd_t *pgd;
36 /* See comment in kasan_init() */
37 unsigned long pgd_end = end & PGDIR_MASK;
34 38
35 for (; start < end; start += PGDIR_SIZE) { 39 for (; start < pgd_end; start += PGDIR_SIZE) {
36 pgd = pgd_offset_k(start); 40 pgd = pgd_offset_k(start);
37 /* 41 /*
38 * With folded p4d, pgd_clear() is nop, use p4d_clear() 42 * With folded p4d, pgd_clear() is nop, use p4d_clear()
@@ -43,29 +47,61 @@ static void __init clear_pgds(unsigned long start,
43 else 47 else
44 pgd_clear(pgd); 48 pgd_clear(pgd);
45 } 49 }
50
51 pgd = pgd_offset_k(start);
52 for (; start < end; start += P4D_SIZE)
53 p4d_clear(p4d_offset(pgd, start));
54}
55
56static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
57{
58 unsigned long p4d;
59
60 if (!IS_ENABLED(CONFIG_X86_5LEVEL))
61 return (p4d_t *)pgd;
62
63 p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
64 p4d += __START_KERNEL_map - phys_base;
65 return (p4d_t *)p4d + p4d_index(addr);
66}
67
68static void __init kasan_early_p4d_populate(pgd_t *pgd,
69 unsigned long addr,
70 unsigned long end)
71{
72 pgd_t pgd_entry;
73 p4d_t *p4d, p4d_entry;
74 unsigned long next;
75
76 if (pgd_none(*pgd)) {
77 pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
78 set_pgd(pgd, pgd_entry);
79 }
80
81 p4d = early_p4d_offset(pgd, addr);
82 do {
83 next = p4d_addr_end(addr, end);
84
85 if (!p4d_none(*p4d))
86 continue;
87
88 p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
89 set_p4d(p4d, p4d_entry);
90 } while (p4d++, addr = next, addr != end && p4d_none(*p4d));
46} 91}
47 92
48static void __init kasan_map_early_shadow(pgd_t *pgd) 93static void __init kasan_map_early_shadow(pgd_t *pgd)
49{ 94{
50 int i; 95 /* See comment in kasan_init() */
51 unsigned long start = KASAN_SHADOW_START; 96 unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
52 unsigned long end = KASAN_SHADOW_END; 97 unsigned long end = KASAN_SHADOW_END;
98 unsigned long next;
53 99
54 for (i = pgd_index(start); start < end; i++) { 100 pgd += pgd_index(addr);
55 switch (CONFIG_PGTABLE_LEVELS) { 101 do {
56 case 4: 102 next = pgd_addr_end(addr, end);
57 pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) | 103 kasan_early_p4d_populate(pgd, addr, next);
58 _KERNPG_TABLE); 104 } while (pgd++, addr = next, addr != end);
59 break;
60 case 5:
61 pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
62 _KERNPG_TABLE);
63 break;
64 default:
65 BUILD_BUG();
66 }
67 start += PGDIR_SIZE;
68 }
69} 105}
70 106
71#ifdef CONFIG_KASAN_INLINE 107#ifdef CONFIG_KASAN_INLINE
@@ -102,7 +138,7 @@ void __init kasan_early_init(void)
102 for (i = 0; i < PTRS_PER_PUD; i++) 138 for (i = 0; i < PTRS_PER_PUD; i++)
103 kasan_zero_pud[i] = __pud(pud_val); 139 kasan_zero_pud[i] = __pud(pud_val);
104 140
105 for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++) 141 for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
106 kasan_zero_p4d[i] = __p4d(p4d_val); 142 kasan_zero_p4d[i] = __p4d(p4d_val);
107 143
108 kasan_map_early_shadow(early_top_pgt); 144 kasan_map_early_shadow(early_top_pgt);
@@ -118,12 +154,35 @@ void __init kasan_init(void)
118#endif 154#endif
119 155
120 memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); 156 memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
157
158 /*
159 * We use the same shadow offset for 4- and 5-level paging to
160 * facilitate boot-time switching between paging modes.
161 * As result in 5-level paging mode KASAN_SHADOW_START and
162 * KASAN_SHADOW_END are not aligned to PGD boundary.
163 *
164 * KASAN_SHADOW_START doesn't share PGD with anything else.
165 * We claim whole PGD entry to make things easier.
166 *
167 * KASAN_SHADOW_END lands in the last PGD entry and it collides with
168 * bunch of things like kernel code, modules, EFI mapping, etc.
169 * We need to take extra steps to not overwrite them.
170 */
171 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
172 void *ptr;
173
174 ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
175 memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
176 set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
177 __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
178 }
179
121 load_cr3(early_top_pgt); 180 load_cr3(early_top_pgt);
122 __flush_tlb_all(); 181 __flush_tlb_all();
123 182
124 clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); 183 clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
125 184
126 kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, 185 kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
127 kasan_mem_to_shadow((void *)PAGE_OFFSET)); 186 kasan_mem_to_shadow((void *)PAGE_OFFSET));
128 187
129 for (i = 0; i < E820_MAX_ENTRIES; i++) { 188 for (i = 0; i < E820_MAX_ENTRIES; i++) {
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 71495f1a86d7..2ccdaba31a07 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
449} 449}
450PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); 450PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
451 451
452#if CONFIG_PGTABLE_LEVELS == 4 452#ifdef CONFIG_X86_64
453__visible pudval_t xen_pud_val(pud_t pud) 453__visible pudval_t xen_pud_val(pud_t pud)
454{ 454{
455 return pte_mfn_to_pfn(pud.pud); 455 return pte_mfn_to_pfn(pud.pud);
@@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
538 538
539 xen_mc_issue(PARAVIRT_LAZY_MMU); 539 xen_mc_issue(PARAVIRT_LAZY_MMU);
540} 540}
541#endif /* CONFIG_PGTABLE_LEVELS == 4 */ 541#endif /* CONFIG_X86_64 */
542 542
543static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, 543static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
544 int (*func)(struct mm_struct *mm, struct page *, enum pt_level), 544 int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
@@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
580 int (*func)(struct mm_struct *mm, struct page *, enum pt_level), 580 int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
581 bool last, unsigned long limit) 581 bool last, unsigned long limit)
582{ 582{
583 int i, nr, flush = 0; 583 int flush = 0;
584 pud_t *pud;
584 585
585 nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
586 for (i = 0; i < nr; i++) {
587 pud_t *pud;
588 586
589 if (p4d_none(p4d[i])) 587 if (p4d_none(*p4d))
590 continue; 588 return flush;
591 589
592 pud = pud_offset(&p4d[i], 0); 590 pud = pud_offset(p4d, 0);
593 if (PTRS_PER_PUD > 1) 591 if (PTRS_PER_PUD > 1)
594 flush |= (*func)(mm, virt_to_page(pud), PT_PUD); 592 flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
595 flush |= xen_pud_walk(mm, pud, func, 593 flush |= xen_pud_walk(mm, pud, func, last, limit);
596 last && i == nr - 1, limit);
597 }
598 return flush; 594 return flush;
599} 595}
600 596
@@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
644 continue; 640 continue;
645 641
646 p4d = p4d_offset(&pgd[i], 0); 642 p4d = p4d_offset(&pgd[i], 0);
647 if (PTRS_PER_P4D > 1)
648 flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
649 flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); 643 flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
650 } 644 }
651 645
@@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
1176{ 1170{
1177 pgd_t *pgd; 1171 pgd_t *pgd;
1178 p4d_t *p4d; 1172 p4d_t *p4d;
1179 unsigned int i;
1180 bool unpin; 1173 bool unpin;
1181 1174
1182 unpin = (vaddr == 2 * PGDIR_SIZE); 1175 unpin = (vaddr == 2 * PGDIR_SIZE);
1183 vaddr &= PMD_MASK; 1176 vaddr &= PMD_MASK;
1184 pgd = pgd_offset_k(vaddr); 1177 pgd = pgd_offset_k(vaddr);
1185 p4d = p4d_offset(pgd, 0); 1178 p4d = p4d_offset(pgd, 0);
1186 for (i = 0; i < PTRS_PER_P4D; i++) { 1179 if (!p4d_none(*p4d))
1187 if (p4d_none(p4d[i])) 1180 xen_cleanmfnmap_p4d(p4d, unpin);
1188 continue;
1189 xen_cleanmfnmap_p4d(p4d + i, unpin);
1190 }
1191 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
1192 set_pgd(pgd, __pgd(0));
1193 xen_cleanmfnmap_free_pgtbl(p4d, unpin);
1194 }
1195} 1181}
1196 1182
1197static void __init xen_pagetable_p2m_free(void) 1183static void __init xen_pagetable_p2m_free(void)
@@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn)
1692 xen_release_ptpage(pfn, PT_PMD); 1678 xen_release_ptpage(pfn, PT_PMD);
1693} 1679}
1694 1680
1695#if CONFIG_PGTABLE_LEVELS >= 4 1681#ifdef CONFIG_X86_64
1696static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) 1682static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
1697{ 1683{
1698 xen_alloc_ptpage(mm, pfn, PT_PUD); 1684 xen_alloc_ptpage(mm, pfn, PT_PUD);
@@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
2029 */ 2015 */
2030void __init xen_relocate_p2m(void) 2016void __init xen_relocate_p2m(void)
2031{ 2017{
2032 phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys; 2018 phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
2033 unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end; 2019 unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
2034 int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d; 2020 int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
2035 pte_t *pt; 2021 pte_t *pt;
2036 pmd_t *pmd; 2022 pmd_t *pmd;
2037 pud_t *pud; 2023 pud_t *pud;
2038 p4d_t *p4d = NULL;
2039 pgd_t *pgd; 2024 pgd_t *pgd;
2040 unsigned long *new_p2m; 2025 unsigned long *new_p2m;
2041 int save_pud; 2026 int save_pud;
@@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void)
2045 n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT; 2030 n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
2046 n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT; 2031 n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
2047 n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT; 2032 n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
2048 if (PTRS_PER_P4D > 1) 2033 n_frames = n_pte + n_pt + n_pmd + n_pud;
2049 n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
2050 else
2051 n_p4d = 0;
2052 n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
2053 2034
2054 new_area = xen_find_free_area(PFN_PHYS(n_frames)); 2035 new_area = xen_find_free_area(PFN_PHYS(n_frames));
2055 if (!new_area) { 2036 if (!new_area) {
@@ -2065,76 +2046,56 @@ void __init xen_relocate_p2m(void)
2065 * To avoid any possible virtual address collision, just use 2046 * To avoid any possible virtual address collision, just use
2066 * 2 * PUD_SIZE for the new area. 2047 * 2 * PUD_SIZE for the new area.
2067 */ 2048 */
2068 p4d_phys = new_area; 2049 pud_phys = new_area;
2069 pud_phys = p4d_phys + PFN_PHYS(n_p4d);
2070 pmd_phys = pud_phys + PFN_PHYS(n_pud); 2050 pmd_phys = pud_phys + PFN_PHYS(n_pud);
2071 pt_phys = pmd_phys + PFN_PHYS(n_pmd); 2051 pt_phys = pmd_phys + PFN_PHYS(n_pmd);
2072 p2m_pfn = PFN_DOWN(pt_phys) + n_pt; 2052 p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
2073 2053
2074 pgd = __va(read_cr3_pa()); 2054 pgd = __va(read_cr3_pa());
2075 new_p2m = (unsigned long *)(2 * PGDIR_SIZE); 2055 new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
2076 idx_p4d = 0;
2077 save_pud = n_pud; 2056 save_pud = n_pud;
2078 do { 2057 for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
2079 if (n_p4d > 0) { 2058 pud = early_memremap(pud_phys, PAGE_SIZE);
2080 p4d = early_memremap(p4d_phys, PAGE_SIZE); 2059 clear_page(pud);
2081 clear_page(p4d); 2060 for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
2082 n_pud = min(save_pud, PTRS_PER_P4D); 2061 idx_pmd++) {
2083 } 2062 pmd = early_memremap(pmd_phys, PAGE_SIZE);
2084 for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { 2063 clear_page(pmd);
2085 pud = early_memremap(pud_phys, PAGE_SIZE); 2064 for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
2086 clear_page(pud); 2065 idx_pt++) {
2087 for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); 2066 pt = early_memremap(pt_phys, PAGE_SIZE);
2088 idx_pmd++) { 2067 clear_page(pt);
2089 pmd = early_memremap(pmd_phys, PAGE_SIZE); 2068 for (idx_pte = 0;
2090 clear_page(pmd); 2069 idx_pte < min(n_pte, PTRS_PER_PTE);
2091 for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); 2070 idx_pte++) {
2092 idx_pt++) { 2071 set_pte(pt + idx_pte,
2093 pt = early_memremap(pt_phys, PAGE_SIZE); 2072 pfn_pte(p2m_pfn, PAGE_KERNEL));
2094 clear_page(pt); 2073 p2m_pfn++;
2095 for (idx_pte = 0;
2096 idx_pte < min(n_pte, PTRS_PER_PTE);
2097 idx_pte++) {
2098 set_pte(pt + idx_pte,
2099 pfn_pte(p2m_pfn, PAGE_KERNEL));
2100 p2m_pfn++;
2101 }
2102 n_pte -= PTRS_PER_PTE;
2103 early_memunmap(pt, PAGE_SIZE);
2104 make_lowmem_page_readonly(__va(pt_phys));
2105 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
2106 PFN_DOWN(pt_phys));
2107 set_pmd(pmd + idx_pt,
2108 __pmd(_PAGE_TABLE | pt_phys));
2109 pt_phys += PAGE_SIZE;
2110 } 2074 }
2111 n_pt -= PTRS_PER_PMD; 2075 n_pte -= PTRS_PER_PTE;
2112 early_memunmap(pmd, PAGE_SIZE); 2076 early_memunmap(pt, PAGE_SIZE);
2113 make_lowmem_page_readonly(__va(pmd_phys)); 2077 make_lowmem_page_readonly(__va(pt_phys));
2114 pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, 2078 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
2115 PFN_DOWN(pmd_phys)); 2079 PFN_DOWN(pt_phys));
2116 set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); 2080 set_pmd(pmd + idx_pt,
2117 pmd_phys += PAGE_SIZE; 2081 __pmd(_PAGE_TABLE | pt_phys));
2082 pt_phys += PAGE_SIZE;
2118 } 2083 }
2119 n_pmd -= PTRS_PER_PUD; 2084 n_pt -= PTRS_PER_PMD;
2120 early_memunmap(pud, PAGE_SIZE); 2085 early_memunmap(pmd, PAGE_SIZE);
2121 make_lowmem_page_readonly(__va(pud_phys)); 2086 make_lowmem_page_readonly(__va(pmd_phys));
2122 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); 2087 pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
2123 if (n_p4d > 0) 2088 PFN_DOWN(pmd_phys));
2124 set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys)); 2089 set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
2125 else 2090 pmd_phys += PAGE_SIZE;
2126 set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
2127 pud_phys += PAGE_SIZE;
2128 }
2129 if (n_p4d > 0) {
2130 save_pud -= PTRS_PER_P4D;
2131 early_memunmap(p4d, PAGE_SIZE);
2132 make_lowmem_page_readonly(__va(p4d_phys));
2133 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
2134 set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
2135 p4d_phys += PAGE_SIZE;
2136 } 2091 }
2137 } while (++idx_p4d < n_p4d); 2092 n_pmd -= PTRS_PER_PUD;
2093 early_memunmap(pud, PAGE_SIZE);
2094 make_lowmem_page_readonly(__va(pud_phys));
2095 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
2096 set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
2097 pud_phys += PAGE_SIZE;
2098 }
2138 2099
2139 /* Now copy the old p2m info to the new area. */ 2100 /* Now copy the old p2m info to the new area. */
2140 memcpy(new_p2m, xen_p2m_addr, size); 2101 memcpy(new_p2m, xen_p2m_addr, size);
@@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void)
2361 pv_mmu_ops.set_pte = xen_set_pte; 2322 pv_mmu_ops.set_pte = xen_set_pte;
2362 pv_mmu_ops.set_pmd = xen_set_pmd; 2323 pv_mmu_ops.set_pmd = xen_set_pmd;
2363 pv_mmu_ops.set_pud = xen_set_pud; 2324 pv_mmu_ops.set_pud = xen_set_pud;
2364#if CONFIG_PGTABLE_LEVELS >= 4 2325#ifdef CONFIG_X86_64
2365 pv_mmu_ops.set_p4d = xen_set_p4d; 2326 pv_mmu_ops.set_p4d = xen_set_p4d;
2366#endif 2327#endif
2367 2328
@@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void)
2371 pv_mmu_ops.alloc_pmd = xen_alloc_pmd; 2332 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
2372 pv_mmu_ops.release_pte = xen_release_pte; 2333 pv_mmu_ops.release_pte = xen_release_pte;
2373 pv_mmu_ops.release_pmd = xen_release_pmd; 2334 pv_mmu_ops.release_pmd = xen_release_pmd;
2374#if CONFIG_PGTABLE_LEVELS >= 4 2335#ifdef CONFIG_X86_64
2375 pv_mmu_ops.alloc_pud = xen_alloc_pud; 2336 pv_mmu_ops.alloc_pud = xen_alloc_pud;
2376 pv_mmu_ops.release_pud = xen_release_pud; 2337 pv_mmu_ops.release_pud = xen_release_pud;
2377#endif 2338#endif
@@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2435 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), 2396 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
2436 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), 2397 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
2437 2398
2438#if CONFIG_PGTABLE_LEVELS >= 4 2399#ifdef CONFIG_X86_64
2439 .pud_val = PV_CALLEE_SAVE(xen_pud_val), 2400 .pud_val = PV_CALLEE_SAVE(xen_pud_val),
2440 .make_pud = PV_CALLEE_SAVE(xen_make_pud), 2401 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
2441 .set_p4d = xen_set_p4d_hyper, 2402 .set_p4d = xen_set_p4d_hyper,
2442 2403
2443 .alloc_pud = xen_alloc_pmd_init, 2404 .alloc_pud = xen_alloc_pmd_init,
2444 .release_pud = xen_release_pmd_init, 2405 .release_pud = xen_release_pmd_init,
2445#endif /* CONFIG_PGTABLE_LEVELS == 4 */ 2406#endif /* CONFIG_X86_64 */
2446 2407
2447 .activate_mm = xen_activate_mm, 2408 .activate_mm = xen_activate_mm,
2448 .dup_mmap = xen_dup_mmap, 2409 .dup_mmap = xen_dup_mmap,
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 8a7e9924df57..d03c5dd6185d 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -228,32 +228,6 @@ static inline unsigned long __ffs64(u64 word)
228 return __ffs((unsigned long)word); 228 return __ffs((unsigned long)word);
229} 229}
230 230
231/*
232 * clear_bit32 - Clear a bit in memory for u32 array
233 * @nr: Bit to clear
234 * @addr: u32 * address of bitmap
235 *
236 * Same as clear_bit, but avoids needing casts for u32 arrays.
237 */
238
239static __always_inline void clear_bit32(long nr, volatile u32 *addr)
240{
241 clear_bit(nr, (volatile unsigned long *)addr);
242}
243
244/*
245 * set_bit32 - Set a bit in memory for u32 array
246 * @nr: Bit to clear
247 * @addr: u32 * address of bitmap
248 *
249 * Same as set_bit, but avoids needing casts for u32 arrays.
250 */
251
252static __always_inline void set_bit32(long nr, volatile u32 *addr)
253{
254 set_bit(nr, (volatile unsigned long *)addr);
255}
256
257#ifdef __KERNEL__ 231#ifdef __KERNEL__
258 232
259#ifndef set_mask_bits 233#ifndef set_mask_bits
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 43edf659453b..91b46f99b4d2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2496,7 +2496,7 @@ void vmemmap_populate_print_last(void);
2496void vmemmap_free(unsigned long start, unsigned long end); 2496void vmemmap_free(unsigned long start, unsigned long end);
2497#endif 2497#endif
2498void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, 2498void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
2499 unsigned long size); 2499 unsigned long nr_pages);
2500 2500
2501enum mf_flags { 2501enum mf_flags {
2502 MF_COUNT_INCREASED = 1 << 0, 2502 MF_COUNT_INCREASED = 1 << 0,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c9c4a81b9767..a507f43ad221 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1151,13 +1151,17 @@ struct mem_section {
1151#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) 1151#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
1152 1152
1153#ifdef CONFIG_SPARSEMEM_EXTREME 1153#ifdef CONFIG_SPARSEMEM_EXTREME
1154extern struct mem_section *mem_section[NR_SECTION_ROOTS]; 1154extern struct mem_section **mem_section;
1155#else 1155#else
1156extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; 1156extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
1157#endif 1157#endif
1158 1158
1159static inline struct mem_section *__nr_to_section(unsigned long nr) 1159static inline struct mem_section *__nr_to_section(unsigned long nr)
1160{ 1160{
1161#ifdef CONFIG_SPARSEMEM_EXTREME
1162 if (!mem_section)
1163 return NULL;
1164#endif
1161 if (!mem_section[SECTION_NR_TO_ROOT(nr)]) 1165 if (!mem_section[SECTION_NR_TO_ROOT(nr)])
1162 return NULL; 1166 return NULL;
1163 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; 1167 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
diff --git a/mm/gup.c b/mm/gup.c
index b2b4d4263768..dfcde13f289a 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1643,6 +1643,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
1643 return 1; 1643 return 1;
1644} 1644}
1645 1645
1646static void gup_pgd_range(unsigned long addr, unsigned long end,
1647 int write, struct page **pages, int *nr)
1648{
1649 unsigned long next;
1650 pgd_t *pgdp;
1651
1652 pgdp = pgd_offset(current->mm, addr);
1653 do {
1654 pgd_t pgd = READ_ONCE(*pgdp);
1655
1656 next = pgd_addr_end(addr, end);
1657 if (pgd_none(pgd))
1658 return;
1659 if (unlikely(pgd_huge(pgd))) {
1660 if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
1661 pages, nr))
1662 return;
1663 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
1664 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
1665 PGDIR_SHIFT, next, write, pages, nr))
1666 return;
1667 } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
1668 return;
1669 } while (pgdp++, addr = next, addr != end);
1670}
1671
1672#ifndef gup_fast_permitted
1673/*
1674 * Check if it's allowed to use __get_user_pages_fast() for the range, or
1675 * we need to fall back to the slow version:
1676 */
1677bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
1678{
1679 unsigned long len, end;
1680
1681 len = (unsigned long) nr_pages << PAGE_SHIFT;
1682 end = start + len;
1683 return end >= start;
1684}
1685#endif
1686
1646/* 1687/*
1647 * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to 1688 * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
1648 * the regular GUP. It will only return non-negative values. 1689 * the regular GUP. It will only return non-negative values.
@@ -1650,10 +1691,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
1650int __get_user_pages_fast(unsigned long start, int nr_pages, int write, 1691int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
1651 struct page **pages) 1692 struct page **pages)
1652{ 1693{
1653 struct mm_struct *mm = current->mm;
1654 unsigned long addr, len, end; 1694 unsigned long addr, len, end;
1655 unsigned long next, flags; 1695 unsigned long flags;
1656 pgd_t *pgdp;
1657 int nr = 0; 1696 int nr = 0;
1658 1697
1659 start &= PAGE_MASK; 1698 start &= PAGE_MASK;
@@ -1677,45 +1716,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
1677 * block IPIs that come from THPs splitting. 1716 * block IPIs that come from THPs splitting.
1678 */ 1717 */
1679 1718
1680 local_irq_save(flags); 1719 if (gup_fast_permitted(start, nr_pages, write)) {
1681 pgdp = pgd_offset(mm, addr); 1720 local_irq_save(flags);
1682 do { 1721 gup_pgd_range(addr, end, write, pages, &nr);
1683 pgd_t pgd = READ_ONCE(*pgdp); 1722 local_irq_restore(flags);
1684 1723 }
1685 next = pgd_addr_end(addr, end);
1686 if (pgd_none(pgd))
1687 break;
1688 if (unlikely(pgd_huge(pgd))) {
1689 if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
1690 pages, &nr))
1691 break;
1692 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
1693 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
1694 PGDIR_SHIFT, next, write, pages, &nr))
1695 break;
1696 } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
1697 break;
1698 } while (pgdp++, addr = next, addr != end);
1699 local_irq_restore(flags);
1700 1724
1701 return nr; 1725 return nr;
1702} 1726}
1703 1727
1704#ifndef gup_fast_permitted
1705/*
1706 * Check if it's allowed to use __get_user_pages_fast() for the range, or
1707 * we need to fall back to the slow version:
1708 */
1709bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
1710{
1711 unsigned long len, end;
1712
1713 len = (unsigned long) nr_pages << PAGE_SHIFT;
1714 end = start + len;
1715 return end >= start;
1716}
1717#endif
1718
1719/** 1728/**
1720 * get_user_pages_fast() - pin user pages in memory 1729 * get_user_pages_fast() - pin user pages in memory
1721 * @start: starting user address 1730 * @start: starting user address
@@ -1735,12 +1744,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
1735int get_user_pages_fast(unsigned long start, int nr_pages, int write, 1744int get_user_pages_fast(unsigned long start, int nr_pages, int write,
1736 struct page **pages) 1745 struct page **pages)
1737{ 1746{
1747 unsigned long addr, len, end;
1738 int nr = 0, ret = 0; 1748 int nr = 0, ret = 0;
1739 1749
1740 start &= PAGE_MASK; 1750 start &= PAGE_MASK;
1751 addr = start;
1752 len = (unsigned long) nr_pages << PAGE_SHIFT;
1753 end = start + len;
1754
1755 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
1756 (void __user *)start, len)))
1757 return 0;
1741 1758
1742 if (gup_fast_permitted(start, nr_pages, write)) { 1759 if (gup_fast_permitted(start, nr_pages, write)) {
1743 nr = __get_user_pages_fast(start, nr_pages, write, pages); 1760 local_irq_disable();
1761 gup_pgd_range(addr, end, write, pages, &nr);
1762 local_irq_enable();
1744 ret = nr; 1763 ret = nr;
1745 } 1764 }
1746 1765
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 77e4d3c5c57b..8dfd13f724d9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5646,6 +5646,16 @@ void __init sparse_memory_present_with_active_regions(int nid)
5646 unsigned long start_pfn, end_pfn; 5646 unsigned long start_pfn, end_pfn;
5647 int i, this_nid; 5647 int i, this_nid;
5648 5648
5649#ifdef CONFIG_SPARSEMEM_EXTREME
5650 if (!mem_section) {
5651 unsigned long size, align;
5652
5653 size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
5654 align = 1 << (INTERNODE_CACHE_SHIFT);
5655 mem_section = memblock_virt_alloc(size, align);
5656 }
5657#endif
5658
5649 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) 5659 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
5650 memory_present(this_nid, start_pfn, end_pfn); 5660 memory_present(this_nid, start_pfn, end_pfn);
5651} 5661}
diff --git a/mm/sparse.c b/mm/sparse.c
index 4900707ae146..044138852baf 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -23,8 +23,7 @@
23 * 1) mem_section - memory sections, mem_map's for valid memory 23 * 1) mem_section - memory sections, mem_map's for valid memory
24 */ 24 */
25#ifdef CONFIG_SPARSEMEM_EXTREME 25#ifdef CONFIG_SPARSEMEM_EXTREME
26struct mem_section *mem_section[NR_SECTION_ROOTS] 26struct mem_section **mem_section;
27 ____cacheline_internodealigned_in_smp;
28#else 27#else
29struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] 28struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
30 ____cacheline_internodealigned_in_smp; 29 ____cacheline_internodealigned_in_smp;
@@ -101,7 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
101int __section_nr(struct mem_section* ms) 100int __section_nr(struct mem_section* ms)
102{ 101{
103 unsigned long root_nr; 102 unsigned long root_nr;
104 struct mem_section* root; 103 struct mem_section *root = NULL;
105 104
106 for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) { 105 for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
107 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT); 106 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
@@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms)
112 break; 111 break;
113 } 112 }
114 113
115 VM_BUG_ON(root_nr == NR_SECTION_ROOTS); 114 VM_BUG_ON(!root);
116 115
117 return (root_nr * SECTIONS_PER_ROOT) + (ms - root); 116 return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
118} 117}
@@ -330,11 +329,17 @@ again:
330static void __init check_usemap_section_nr(int nid, unsigned long *usemap) 329static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
331{ 330{
332 unsigned long usemap_snr, pgdat_snr; 331 unsigned long usemap_snr, pgdat_snr;
333 static unsigned long old_usemap_snr = NR_MEM_SECTIONS; 332 static unsigned long old_usemap_snr;
334 static unsigned long old_pgdat_snr = NR_MEM_SECTIONS; 333 static unsigned long old_pgdat_snr;
335 struct pglist_data *pgdat = NODE_DATA(nid); 334 struct pglist_data *pgdat = NODE_DATA(nid);
336 int usemap_nid; 335 int usemap_nid;
337 336
337 /* First call */
338 if (!old_usemap_snr) {
339 old_usemap_snr = NR_MEM_SECTIONS;
340 old_pgdat_snr = NR_MEM_SECTIONS;
341 }
342
338 usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); 343 usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
339 pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); 344 pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
340 if (usemap_snr == pgdat_snr) 345 if (usemap_snr == pgdat_snr)
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 961e3ee26c27..66e5ce5b91f0 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -115,7 +115,15 @@ static void check_valid_segment(uint16_t index, int ldt,
115 return; 115 return;
116 } 116 }
117 117
118 if (ar != expected_ar) { 118 /* The SDM says "bits 19:16 are undefined". Thanks. */
119 ar &= ~0xF0000;
120
121 /*
122 * NB: Different Linux versions do different things with the
123 * accessed bit in set_thread_area().
124 */
125 if (ar != expected_ar &&
126 (ldt || ar != (expected_ar | AR_ACCESSED))) {
119 printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", 127 printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
120 (ldt ? "LDT" : "GDT"), index, ar, expected_ar); 128 (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
121 nerrs++; 129 nerrs++;
@@ -129,30 +137,51 @@ static void check_valid_segment(uint16_t index, int ldt,
129 } 137 }
130} 138}
131 139
132static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, 140static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
133 bool oldmode) 141 bool oldmode, bool ldt)
134{ 142{
135 int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, 143 struct user_desc desc = *d;
136 desc, sizeof(*desc)); 144 int ret;
137 if (ret < -1) 145
138 errno = -ret; 146 if (!ldt) {
147#ifndef __i386__
148 /* No point testing set_thread_area in a 64-bit build */
149 return false;
150#endif
151 if (!gdt_entry_num)
152 return false;
153 desc.entry_number = gdt_entry_num;
154
155 ret = syscall(SYS_set_thread_area, &desc);
156 } else {
157 ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
158 &desc, sizeof(desc));
159
160 if (ret < -1)
161 errno = -ret;
162
163 if (ret != 0 && errno == ENOSYS) {
164 printf("[OK]\tmodify_ldt returned -ENOSYS\n");
165 return false;
166 }
167 }
168
139 if (ret == 0) { 169 if (ret == 0) {
140 uint32_t limit = desc->limit; 170 uint32_t limit = desc.limit;
141 if (desc->limit_in_pages) 171 if (desc.limit_in_pages)
142 limit = (limit << 12) + 4095; 172 limit = (limit << 12) + 4095;
143 check_valid_segment(desc->entry_number, 1, ar, limit, true); 173 check_valid_segment(desc.entry_number, ldt, ar, limit, true);
144 return true; 174 return true;
145 } else if (errno == ENOSYS) {
146 printf("[OK]\tmodify_ldt returned -ENOSYS\n");
147 return false;
148 } else { 175 } else {
149 if (desc->seg_32bit) { 176 if (desc.seg_32bit) {
150 printf("[FAIL]\tUnexpected modify_ldt failure %d\n", 177 printf("[FAIL]\tUnexpected %s failure %d\n",
178 ldt ? "modify_ldt" : "set_thread_area",
151 errno); 179 errno);
152 nerrs++; 180 nerrs++;
153 return false; 181 return false;
154 } else { 182 } else {
155 printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); 183 printf("[OK]\t%s rejected 16 bit segment\n",
184 ldt ? "modify_ldt" : "set_thread_area");
156 return false; 185 return false;
157 } 186 }
158 } 187 }
@@ -160,7 +189,15 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
160 189
161static bool install_valid(const struct user_desc *desc, uint32_t ar) 190static bool install_valid(const struct user_desc *desc, uint32_t ar)
162{ 191{
163 return install_valid_mode(desc, ar, false); 192 bool ret = install_valid_mode(desc, ar, false, true);
193
194 if (desc->contents <= 1 && desc->seg_32bit &&
195 !desc->seg_not_present) {
196 /* Should work in the GDT, too. */
197 install_valid_mode(desc, ar, false, false);
198 }
199
200 return ret;
164} 201}
165 202
166static void install_invalid(const struct user_desc *desc, bool oldmode) 203static void install_invalid(const struct user_desc *desc, bool oldmode)
@@ -367,9 +404,24 @@ static void do_simple_tests(void)
367 install_invalid(&desc, false); 404 install_invalid(&desc, false);
368 405
369 desc.seg_not_present = 0; 406 desc.seg_not_present = 0;
370 desc.read_exec_only = 0;
371 desc.seg_32bit = 1; 407 desc.seg_32bit = 1;
408 desc.read_exec_only = 0;
409 desc.limit = 0xfffff;
410
372 install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); 411 install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
412
413 desc.limit_in_pages = 1;
414
415 install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
416 desc.read_exec_only = 1;
417 install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
418 desc.contents = 1;
419 desc.read_exec_only = 0;
420 install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
421 desc.read_exec_only = 1;
422 install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
423
424 desc.limit = 0;
373 install_invalid(&desc, true); 425 install_invalid(&desc, true);
374} 426}
375 427
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 555e43ca846b..7a1cc0e56d2d 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -189,17 +189,29 @@ void lots_o_noops_around_write(int *write_to_me)
189#define u64 uint64_t 189#define u64 uint64_t
190 190
191#ifdef __i386__ 191#ifdef __i386__
192#define SYS_mprotect_key 380 192
193#define SYS_pkey_alloc 381 193#ifndef SYS_mprotect_key
194#define SYS_pkey_free 382 194# define SYS_mprotect_key 380
195#endif
196#ifndef SYS_pkey_alloc
197# define SYS_pkey_alloc 381
198# define SYS_pkey_free 382
199#endif
195#define REG_IP_IDX REG_EIP 200#define REG_IP_IDX REG_EIP
196#define si_pkey_offset 0x14 201#define si_pkey_offset 0x14
202
197#else 203#else
198#define SYS_mprotect_key 329 204
199#define SYS_pkey_alloc 330 205#ifndef SYS_mprotect_key
200#define SYS_pkey_free 331 206# define SYS_mprotect_key 329
207#endif
208#ifndef SYS_pkey_alloc
209# define SYS_pkey_alloc 330
210# define SYS_pkey_free 331
211#endif
201#define REG_IP_IDX REG_RIP 212#define REG_IP_IDX REG_RIP
202#define si_pkey_offset 0x20 213#define si_pkey_offset 0x20
214
203#endif 215#endif
204 216
205void dump_mem(void *dumpme, int len_bytes) 217void dump_mem(void *dumpme, int len_bytes)