diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-12 17:31:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-12 17:31:10 -0400 |
commit | 01ea443982203fcdee1250ab630ab6516f0a16e6 (patch) | |
tree | 4bb26d3417faebe6ccdad7021184b0c1d0ccb3fb | |
parent | 3bc6d8c155fbbbe789b6caa44b9e658a5b2995d3 (diff) | |
parent | d52c0569bab4edc888832df44dc7ac28517134f6 (diff) |
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar:
"This is bigger than usual - the reason is partly a pent-up stream of
fixes after the merge window and partly accidental. The fixes are:
- five patches to fix a boot failure on Andy Lutomirsky's laptop
- four SGI UV platform fixes
- KASAN fix
- warning fix
- documentation update
- swap entry definition fix
- pkeys fix
- irq stats fix"
* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/apic/x2apic, smp/hotplug: Don't use before alloc in x2apic_cluster_probe()
x86/efi: Allocate a trampoline if needed in efi_free_boot_services()
x86/boot: Rework reserve_real_mode() to allow multiple tries
x86/boot: Defer setup_real_mode() to early_initcall time
x86/boot: Synchronize trampoline_cr4_features and mmu_cr4_features directly
x86/boot: Run reserve_bios_regions() after we initialize the memory map
x86/irq: Do not substract irq_tlb_count from irq_call_count
x86/mm: Fix swap entry comment and macro
x86/mm/kaslr: Fix -Wformat-security warning
x86/mm/pkeys: Fix compact mode by removing protection keys' XSAVE buffer manipulation
x86/build: Reduce the W=1 warnings noise when compiling x86 syscall tables
x86/platform/UV: Fix kernel panic running RHEL kdump kernel on UV systems
x86/platform/UV: Fix problem with UV4 BIOS providing incorrect PXM values
x86/platform/UV: Fix bug with iounmap() of the UV4 EFI System Table causing a crash
x86/platform/UV: Fix problem with UV4 Socket IDs not being contiguous
x86/entry: Clarify the RF saving/restoring situation with SYSCALL/SYSRET
x86/mm: Disable preemption during CR3 read+write
x86/mm/KASLR: Increase BRK pages for KASLR memory randomization
x86/mm/KASLR: Fix physical memory calculation on KASLR memory randomization
x86, kasan, ftrace: Put APIC interrupt handlers into .irqentry.text
-rw-r--r-- | arch/x86/entry/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 25 | ||||
-rw-r--r-- | arch/x86/include/asm/hardirq.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/realmode.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/uv/bios.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 42 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 138 | ||||
-rw-r--r-- | arch/x86/kernel/head32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 27 | ||||
-rw-r--r-- | arch/x86/lib/kaslr.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/kaslr.c | 2 | ||||
-rw-r--r-- | arch/x86/platform/efi/quirks.c | 21 | ||||
-rw-r--r-- | arch/x86/platform/uv/bios_uv.c | 8 | ||||
-rw-r--r-- | arch/x86/realmode/init.c | 47 |
20 files changed, 182 insertions, 195 deletions
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index fe91c25092da..77f28ce9c646 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile | |||
@@ -5,6 +5,8 @@ | |||
5 | OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y | 5 | OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y |
6 | OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y | 6 | OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y |
7 | 7 | ||
8 | CFLAGS_syscall_64.o += -Wno-override-init | ||
9 | CFLAGS_syscall_32.o += -Wno-override-init | ||
8 | obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o | 10 | obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o |
9 | obj-y += common.o | 11 | obj-y += common.o |
10 | 12 | ||
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b846875aeea6..d172c619c449 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -288,11 +288,15 @@ return_from_SYSCALL_64: | |||
288 | jne opportunistic_sysret_failed | 288 | jne opportunistic_sysret_failed |
289 | 289 | ||
290 | /* | 290 | /* |
291 | * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET, | 291 | * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot |
292 | * restoring TF results in a trap from userspace immediately after | 292 | * restore RF properly. If the slowpath sets it for whatever reason, we |
293 | * SYSRET. This would cause an infinite loop whenever #DB happens | 293 | * need to restore it correctly. |
294 | * with register state that satisfies the opportunistic SYSRET | 294 | * |
295 | * conditions. For example, single-stepping this user code: | 295 | * SYSRET can restore TF, but unlike IRET, restoring TF results in a |
296 | * trap from userspace immediately after SYSRET. This would cause an | ||
297 | * infinite loop whenever #DB happens with register state that satisfies | ||
298 | * the opportunistic SYSRET conditions. For example, single-stepping | ||
299 | * this user code: | ||
296 | * | 300 | * |
297 | * movq $stuck_here, %rcx | 301 | * movq $stuck_here, %rcx |
298 | * pushfq | 302 | * pushfq |
@@ -601,9 +605,20 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym) | |||
601 | .endm | 605 | .endm |
602 | #endif | 606 | #endif |
603 | 607 | ||
608 | /* Make sure APIC interrupt handlers end up in the irqentry section: */ | ||
609 | #if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) | ||
610 | # define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" | ||
611 | # define POP_SECTION_IRQENTRY .popsection | ||
612 | #else | ||
613 | # define PUSH_SECTION_IRQENTRY | ||
614 | # define POP_SECTION_IRQENTRY | ||
615 | #endif | ||
616 | |||
604 | .macro apicinterrupt num sym do_sym | 617 | .macro apicinterrupt num sym do_sym |
618 | PUSH_SECTION_IRQENTRY | ||
605 | apicinterrupt3 \num \sym \do_sym | 619 | apicinterrupt3 \num \sym \do_sym |
606 | trace_apicinterrupt \num \sym | 620 | trace_apicinterrupt \num \sym |
621 | POP_SECTION_IRQENTRY | ||
607 | .endm | 622 | .endm |
608 | 623 | ||
609 | #ifdef CONFIG_SMP | 624 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 7178043b0e1d..59405a248fc2 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -22,10 +22,6 @@ typedef struct { | |||
22 | #ifdef CONFIG_SMP | 22 | #ifdef CONFIG_SMP |
23 | unsigned int irq_resched_count; | 23 | unsigned int irq_resched_count; |
24 | unsigned int irq_call_count; | 24 | unsigned int irq_call_count; |
25 | /* | ||
26 | * irq_tlb_count is double-counted in irq_call_count, so it must be | ||
27 | * subtracted from irq_call_count when displaying irq_call_count | ||
28 | */ | ||
29 | unsigned int irq_tlb_count; | 25 | unsigned int irq_tlb_count; |
30 | #endif | 26 | #endif |
31 | #ifdef CONFIG_X86_THERMAL_VECTOR | 27 | #ifdef CONFIG_X86_THERMAL_VECTOR |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 7e8ec7ae10fa..1cc82ece9ac1 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -145,7 +145,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } | |||
145 | * | 145 | * |
146 | * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number | 146 | * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number |
147 | * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names | 147 | * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names |
148 | * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry | 148 | * | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry |
149 | * | 149 | * |
150 | * G (8) is aliased and used as a PROT_NONE indicator for | 150 | * G (8) is aliased and used as a PROT_NONE indicator for |
151 | * !present ptes. We need to start storing swap entries above | 151 | * !present ptes. We need to start storing swap entries above |
@@ -156,7 +156,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } | |||
156 | #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) | 156 | #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) |
157 | #define SWP_TYPE_BITS 5 | 157 | #define SWP_TYPE_BITS 5 |
158 | /* Place the offset above the type: */ | 158 | /* Place the offset above the type: */ |
159 | #define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1) | 159 | #define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS) |
160 | 160 | ||
161 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) | 161 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) |
162 | 162 | ||
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 9c6b890d5e7a..b2988c0ed829 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h | |||
@@ -58,7 +58,15 @@ extern unsigned char boot_gdt[]; | |||
58 | extern unsigned char secondary_startup_64[]; | 58 | extern unsigned char secondary_startup_64[]; |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | static inline size_t real_mode_size_needed(void) | ||
62 | { | ||
63 | if (real_mode_header) | ||
64 | return 0; /* already allocated. */ | ||
65 | |||
66 | return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE); | ||
67 | } | ||
68 | |||
69 | void set_real_mode_mem(phys_addr_t mem, size_t size); | ||
61 | void reserve_real_mode(void); | 70 | void reserve_real_mode(void); |
62 | void setup_real_mode(void); | ||
63 | 71 | ||
64 | #endif /* _ARCH_X86_REALMODE_H */ | 72 | #endif /* _ARCH_X86_REALMODE_H */ |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 4e5be94e079a..6fa85944af83 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) | |||
135 | 135 | ||
136 | static inline void __native_flush_tlb(void) | 136 | static inline void __native_flush_tlb(void) |
137 | { | 137 | { |
138 | /* | ||
139 | * If current->mm == NULL then we borrow a mm which may change during a | ||
140 | * task switch and therefore we must not be preempted while we write CR3 | ||
141 | * back: | ||
142 | */ | ||
143 | preempt_disable(); | ||
138 | native_write_cr3(native_read_cr3()); | 144 | native_write_cr3(native_read_cr3()); |
145 | preempt_enable(); | ||
139 | } | 146 | } |
140 | 147 | ||
141 | static inline void __native_flush_tlb_global_irq_disabled(void) | 148 | static inline void __native_flush_tlb_global_irq_disabled(void) |
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index c852590254d5..e652a7cc6186 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h | |||
@@ -79,7 +79,7 @@ struct uv_gam_range_entry { | |||
79 | u16 nasid; /* HNasid */ | 79 | u16 nasid; /* HNasid */ |
80 | u16 sockid; /* Socket ID, high bits of APIC ID */ | 80 | u16 sockid; /* Socket ID, high bits of APIC ID */ |
81 | u16 pnode; /* Index to MMR and GRU spaces */ | 81 | u16 pnode; /* Index to MMR and GRU spaces */ |
82 | u32 pxm; /* ACPI proximity domain number */ | 82 | u32 unused2; |
83 | u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */ | 83 | u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */ |
84 | }; | 84 | }; |
85 | 85 | ||
@@ -88,7 +88,8 @@ struct uv_gam_range_entry { | |||
88 | #define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */ | 88 | #define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */ |
89 | #define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */ | 89 | #define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */ |
90 | #define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */ | 90 | #define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */ |
91 | #define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_2 | 91 | #define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */ |
92 | #define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3 | ||
92 | 93 | ||
93 | #define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */ | 94 | #define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */ |
94 | #define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */ | 95 | #define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */ |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 6368fa69d2af..54f35d988025 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -155,7 +155,7 @@ static void init_x2apic_ldr(void) | |||
155 | /* | 155 | /* |
156 | * At CPU state changes, update the x2apic cluster sibling info. | 156 | * At CPU state changes, update the x2apic cluster sibling info. |
157 | */ | 157 | */ |
158 | int x2apic_prepare_cpu(unsigned int cpu) | 158 | static int x2apic_prepare_cpu(unsigned int cpu) |
159 | { | 159 | { |
160 | if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL)) | 160 | if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL)) |
161 | return -ENOMEM; | 161 | return -ENOMEM; |
@@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu) | |||
168 | return 0; | 168 | return 0; |
169 | } | 169 | } |
170 | 170 | ||
171 | int x2apic_dead_cpu(unsigned int this_cpu) | 171 | static int x2apic_dead_cpu(unsigned int this_cpu) |
172 | { | 172 | { |
173 | int cpu; | 173 | int cpu; |
174 | 174 | ||
@@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu) | |||
186 | static int x2apic_cluster_probe(void) | 186 | static int x2apic_cluster_probe(void) |
187 | { | 187 | { |
188 | int cpu = smp_processor_id(); | 188 | int cpu = smp_processor_id(); |
189 | int ret; | ||
189 | 190 | ||
190 | if (!x2apic_mode) | 191 | if (!x2apic_mode) |
191 | return 0; | 192 | return 0; |
192 | 193 | ||
194 | ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE", | ||
195 | x2apic_prepare_cpu, x2apic_dead_cpu); | ||
196 | if (ret < 0) { | ||
197 | pr_err("Failed to register X2APIC_PREPARE\n"); | ||
198 | return 0; | ||
199 | } | ||
193 | cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); | 200 | cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); |
194 | cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE", | ||
195 | x2apic_prepare_cpu, x2apic_dead_cpu); | ||
196 | return 1; | 201 | return 1; |
197 | } | 202 | } |
198 | 203 | ||
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 09b59adaea3f..cb0673c1e940 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
223 | if (strncmp(oem_id, "SGI", 3) != 0) | 223 | if (strncmp(oem_id, "SGI", 3) != 0) |
224 | return 0; | 224 | return 0; |
225 | 225 | ||
226 | if (numa_off) { | ||
227 | pr_err("UV: NUMA is off, disabling UV support\n"); | ||
228 | return 0; | ||
229 | } | ||
230 | |||
226 | /* Setup early hub type field in uv_hub_info for Node 0 */ | 231 | /* Setup early hub type field in uv_hub_info for Node 0 */ |
227 | uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; | 232 | uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; |
228 | 233 | ||
@@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void) | |||
325 | struct uv_gam_range_entry *gre = uv_gre_table; | 330 | struct uv_gam_range_entry *gre = uv_gre_table; |
326 | struct uv_gam_range_s *grt; | 331 | struct uv_gam_range_s *grt; |
327 | unsigned long last_limit = 0, ram_limit = 0; | 332 | unsigned long last_limit = 0, ram_limit = 0; |
328 | int bytes, i, sid, lsid = -1; | 333 | int bytes, i, sid, lsid = -1, indx = 0, lindx = -1; |
329 | 334 | ||
330 | if (!gre) | 335 | if (!gre) |
331 | return; | 336 | return; |
@@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void) | |||
356 | } | 361 | } |
357 | sid = gre->sockid - _min_socket; | 362 | sid = gre->sockid - _min_socket; |
358 | if (lsid < sid) { /* new range */ | 363 | if (lsid < sid) { /* new range */ |
359 | grt = &_gr_table[sid]; | 364 | grt = &_gr_table[indx]; |
360 | grt->base = lsid; | 365 | grt->base = lindx; |
361 | grt->nasid = gre->nasid; | 366 | grt->nasid = gre->nasid; |
362 | grt->limit = last_limit = gre->limit; | 367 | grt->limit = last_limit = gre->limit; |
363 | lsid = sid; | 368 | lsid = sid; |
369 | lindx = indx++; | ||
364 | continue; | 370 | continue; |
365 | } | 371 | } |
366 | if (lsid == sid && !ram_limit) { /* update range */ | 372 | if (lsid == sid && !ram_limit) { /* update range */ |
@@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void) | |||
371 | } | 377 | } |
372 | if (!ram_limit) { /* non-contiguous ram range */ | 378 | if (!ram_limit) { /* non-contiguous ram range */ |
373 | grt++; | 379 | grt++; |
374 | grt->base = sid - 1; | 380 | grt->base = lindx; |
375 | grt->nasid = gre->nasid; | 381 | grt->nasid = gre->nasid; |
376 | grt->limit = last_limit = gre->limit; | 382 | grt->limit = last_limit = gre->limit; |
377 | continue; | 383 | continue; |
@@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) | |||
1155 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { | 1161 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { |
1156 | if (!index) { | 1162 | if (!index) { |
1157 | pr_info("UV: GAM Range Table...\n"); | 1163 | pr_info("UV: GAM Range Table...\n"); |
1158 | pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n", | 1164 | pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", |
1159 | "Range", "", "Size", "Type", "NASID", | 1165 | "Range", "", "Size", "Type", "NASID", |
1160 | "SID", "PN", "PXM"); | 1166 | "SID", "PN"); |
1161 | } | 1167 | } |
1162 | pr_info( | 1168 | pr_info( |
1163 | "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n", | 1169 | "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n", |
1164 | index++, | 1170 | index++, |
1165 | (unsigned long)lgre << UV_GAM_RANGE_SHFT, | 1171 | (unsigned long)lgre << UV_GAM_RANGE_SHFT, |
1166 | (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, | 1172 | (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, |
1167 | ((unsigned long)(gre->limit - lgre)) >> | 1173 | ((unsigned long)(gre->limit - lgre)) >> |
1168 | (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */ | 1174 | (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */ |
1169 | gre->type, gre->nasid, gre->sockid, | 1175 | gre->type, gre->nasid, gre->sockid, gre->pnode); |
1170 | gre->pnode, gre->pxm); | ||
1171 | 1176 | ||
1172 | lgre = gre->limit; | 1177 | lgre = gre->limit; |
1173 | if (sock_min > gre->sockid) | 1178 | if (sock_min > gre->sockid) |
@@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void) | |||
1286 | _pnode_to_socket[i] = SOCK_EMPTY; | 1291 | _pnode_to_socket[i] = SOCK_EMPTY; |
1287 | 1292 | ||
1288 | /* fill in pnode/node/addr conversion list values */ | 1293 | /* fill in pnode/node/addr conversion list values */ |
1289 | pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n"); | 1294 | pr_info("UV: GAM Building socket/pnode conversion tables\n"); |
1290 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { | 1295 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { |
1291 | if (gre->type == UV_GAM_RANGE_TYPE_HOLE) | 1296 | if (gre->type == UV_GAM_RANGE_TYPE_HOLE) |
1292 | continue; | 1297 | continue; |
@@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void) | |||
1294 | if (_socket_to_pnode[i] != SOCK_EMPTY) | 1299 | if (_socket_to_pnode[i] != SOCK_EMPTY) |
1295 | continue; /* duplicate */ | 1300 | continue; /* duplicate */ |
1296 | _socket_to_pnode[i] = gre->pnode; | 1301 | _socket_to_pnode[i] = gre->pnode; |
1297 | _socket_to_node[i] = gre->pxm; | ||
1298 | 1302 | ||
1299 | i = gre->pnode - minpnode; | 1303 | i = gre->pnode - minpnode; |
1300 | _pnode_to_socket[i] = gre->sockid; | 1304 | _pnode_to_socket[i] = gre->sockid; |
1301 | 1305 | ||
1302 | pr_info( | 1306 | pr_info( |
1303 | "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n", | 1307 | "UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n", |
1304 | gre->sockid, gre->type, gre->nasid, | 1308 | gre->sockid, gre->type, gre->nasid, |
1305 | _socket_to_pnode[gre->sockid - minsock], | 1309 | _socket_to_pnode[gre->sockid - minsock], |
1306 | _socket_to_node[gre->sockid - minsock], | ||
1307 | _pnode_to_socket[gre->pnode - minpnode]); | 1310 | _pnode_to_socket[gre->pnode - minpnode]); |
1308 | } | 1311 | } |
1309 | 1312 | ||
1310 | /* check socket -> node values */ | 1313 | /* Set socket -> node values */ |
1311 | lnid = -1; | 1314 | lnid = -1; |
1312 | for_each_present_cpu(cpu) { | 1315 | for_each_present_cpu(cpu) { |
1313 | int nid = cpu_to_node(cpu); | 1316 | int nid = cpu_to_node(cpu); |
@@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void) | |||
1318 | lnid = nid; | 1321 | lnid = nid; |
1319 | apicid = per_cpu(x86_cpu_to_apicid, cpu); | 1322 | apicid = per_cpu(x86_cpu_to_apicid, cpu); |
1320 | sockid = apicid >> uv_cpuid.socketid_shift; | 1323 | sockid = apicid >> uv_cpuid.socketid_shift; |
1321 | i = sockid - minsock; | 1324 | _socket_to_node[sockid - minsock] = nid; |
1322 | 1325 | pr_info("UV: sid:%02x: apicid:%04x node:%2d\n", | |
1323 | if (nid != _socket_to_node[i]) { | 1326 | sockid, apicid, nid); |
1324 | pr_warn( | ||
1325 | "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n", | ||
1326 | i, sockid, gre->type, _socket_to_node[i], nid); | ||
1327 | _socket_to_node[i] = nid; | ||
1328 | } | ||
1329 | } | 1327 | } |
1330 | 1328 | ||
1331 | /* Setup physical blade to pnode translation from GAM Range Table */ | 1329 | /* Setup physical blade to pnode translation from GAM Range Table */ |
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 680049aa4593..01567aa87503 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c | |||
@@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state) | |||
866 | return get_xsave_addr(&fpu->state.xsave, xsave_state); | 866 | return get_xsave_addr(&fpu->state.xsave, xsave_state); |
867 | } | 867 | } |
868 | 868 | ||
869 | |||
870 | /* | ||
871 | * Set xfeatures (aka XSTATE_BV) bit for a feature that we want | ||
872 | * to take out of its "init state". This will ensure that an | ||
873 | * XRSTOR actually restores the state. | ||
874 | */ | ||
875 | static void fpu__xfeature_set_non_init(struct xregs_state *xsave, | ||
876 | int xstate_feature_mask) | ||
877 | { | ||
878 | xsave->header.xfeatures |= xstate_feature_mask; | ||
879 | } | ||
880 | |||
881 | /* | ||
882 | * This function is safe to call whether the FPU is in use or not. | ||
883 | * | ||
884 | * Note that this only works on the current task. | ||
885 | * | ||
886 | * Inputs: | ||
887 | * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, | ||
888 | * XFEATURE_MASK_SSE, etc...) | ||
889 | * @xsave_state_ptr: a pointer to a copy of the state that you would | ||
890 | * like written in to the current task's FPU xsave state. This pointer | ||
891 | * must not be located in the current tasks's xsave area. | ||
892 | * Output: | ||
893 | * address of the state in the xsave area or NULL if the state | ||
894 | * is not present or is in its 'init state'. | ||
895 | */ | ||
896 | static void fpu__xfeature_set_state(int xstate_feature_mask, | ||
897 | void *xstate_feature_src, size_t len) | ||
898 | { | ||
899 | struct xregs_state *xsave = ¤t->thread.fpu.state.xsave; | ||
900 | struct fpu *fpu = ¤t->thread.fpu; | ||
901 | void *dst; | ||
902 | |||
903 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) { | ||
904 | WARN_ONCE(1, "%s() attempted with no xsave support", __func__); | ||
905 | return; | ||
906 | } | ||
907 | |||
908 | /* | ||
909 | * Tell the FPU code that we need the FPU state to be in | ||
910 | * 'fpu' (not in the registers), and that we need it to | ||
911 | * be stable while we write to it. | ||
912 | */ | ||
913 | fpu__current_fpstate_write_begin(); | ||
914 | |||
915 | /* | ||
916 | * This method *WILL* *NOT* work for compact-format | ||
917 | * buffers. If the 'xstate_feature_mask' is unset in | ||
918 | * xcomp_bv then we may need to move other feature state | ||
919 | * "up" in the buffer. | ||
920 | */ | ||
921 | if (xsave->header.xcomp_bv & xstate_feature_mask) { | ||
922 | WARN_ON_ONCE(1); | ||
923 | goto out; | ||
924 | } | ||
925 | |||
926 | /* find the location in the xsave buffer of the desired state */ | ||
927 | dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask); | ||
928 | |||
929 | /* | ||
930 | * Make sure that the pointer being passed in did not | ||
931 | * come from the xsave buffer itself. | ||
932 | */ | ||
933 | WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself"); | ||
934 | |||
935 | /* put the caller-provided data in the location */ | ||
936 | memcpy(dst, xstate_feature_src, len); | ||
937 | |||
938 | /* | ||
939 | * Mark the xfeature so that the CPU knows there is state | ||
940 | * in the buffer now. | ||
941 | */ | ||
942 | fpu__xfeature_set_non_init(xsave, xstate_feature_mask); | ||
943 | out: | ||
944 | /* | ||
945 | * We are done writing to the 'fpu'. Reenable preeption | ||
946 | * and (possibly) move the fpstate back in to the fpregs. | ||
947 | */ | ||
948 | fpu__current_fpstate_write_end(); | ||
949 | } | ||
950 | |||
951 | #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) | 869 | #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) |
952 | #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) | 870 | #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) |
953 | 871 | ||
954 | /* | 872 | /* |
955 | * This will go out and modify the XSAVE buffer so that PKRU is | 873 | * This will go out and modify PKRU register to set the access |
956 | * set to a particular state for access to 'pkey'. | 874 | * rights for @pkey to @init_val. |
957 | * | ||
958 | * PKRU state does affect kernel access to user memory. We do | ||
959 | * not modfiy PKRU *itself* here, only the XSAVE state that will | ||
960 | * be restored in to PKRU when we return back to userspace. | ||
961 | */ | 875 | */ |
962 | int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | 876 | int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, |
963 | unsigned long init_val) | 877 | unsigned long init_val) |
964 | { | 878 | { |
965 | struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; | 879 | u32 old_pkru; |
966 | struct pkru_state *old_pkru_state; | ||
967 | struct pkru_state new_pkru_state; | ||
968 | int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); | 880 | int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); |
969 | u32 new_pkru_bits = 0; | 881 | u32 new_pkru_bits = 0; |
970 | 882 | ||
@@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | |||
974 | */ | 886 | */ |
975 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) | 887 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) |
976 | return -EINVAL; | 888 | return -EINVAL; |
889 | /* | ||
890 | * For most XSAVE components, this would be an arduous task: | ||
891 | * brining fpstate up to date with fpregs, updating fpstate, | ||
892 | * then re-populating fpregs. But, for components that are | ||
893 | * never lazily managed, we can just access the fpregs | ||
894 | * directly. PKRU is never managed lazily, so we can just | ||
895 | * manipulate it directly. Make sure it stays that way. | ||
896 | */ | ||
897 | WARN_ON_ONCE(!use_eager_fpu()); | ||
977 | 898 | ||
978 | /* Set the bits we need in PKRU: */ | 899 | /* Set the bits we need in PKRU: */ |
979 | if (init_val & PKEY_DISABLE_ACCESS) | 900 | if (init_val & PKEY_DISABLE_ACCESS) |
@@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | |||
984 | /* Shift the bits in to the correct place in PKRU for pkey: */ | 905 | /* Shift the bits in to the correct place in PKRU for pkey: */ |
985 | new_pkru_bits <<= pkey_shift; | 906 | new_pkru_bits <<= pkey_shift; |
986 | 907 | ||
987 | /* Locate old copy of the state in the xsave buffer: */ | 908 | /* Get old PKRU and mask off any old bits in place: */ |
988 | old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); | 909 | old_pkru = read_pkru(); |
989 | 910 | old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); | |
990 | /* | ||
991 | * When state is not in the buffer, it is in the init | ||
992 | * state, set it manually. Otherwise, copy out the old | ||
993 | * state. | ||
994 | */ | ||
995 | if (!old_pkru_state) | ||
996 | new_pkru_state.pkru = 0; | ||
997 | else | ||
998 | new_pkru_state.pkru = old_pkru_state->pkru; | ||
999 | |||
1000 | /* Mask off any old bits in place: */ | ||
1001 | new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); | ||
1002 | |||
1003 | /* Set the newly-requested bits: */ | ||
1004 | new_pkru_state.pkru |= new_pkru_bits; | ||
1005 | |||
1006 | /* | ||
1007 | * We could theoretically live without zeroing pkru.pad. | ||
1008 | * The current XSAVE feature state definition says that | ||
1009 | * only bytes 0->3 are used. But we do not want to | ||
1010 | * chance leaking kernel stack out to userspace in case a | ||
1011 | * memcpy() of the whole xsave buffer was done. | ||
1012 | * | ||
1013 | * They're in the same cacheline anyway. | ||
1014 | */ | ||
1015 | new_pkru_state.pad = 0; | ||
1016 | 911 | ||
1017 | fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state)); | 912 | /* Write old part along with new part: */ |
913 | write_pkru(old_pkru | new_pkru_bits); | ||
1018 | 914 | ||
1019 | return 0; | 915 | return 0; |
1020 | } | 916 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 2dda0bc4576e..f16c55bfc090 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void) | |||
25 | /* Initialize 32bit specific setup functions */ | 25 | /* Initialize 32bit specific setup functions */ |
26 | x86_init.resources.reserve_resources = i386_reserve_resources; | 26 | x86_init.resources.reserve_resources = i386_reserve_resources; |
27 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; | 27 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; |
28 | |||
29 | reserve_bios_regions(); | ||
30 | } | 28 | } |
31 | 29 | ||
32 | asmlinkage __visible void __init i386_start_kernel(void) | 30 | asmlinkage __visible void __init i386_start_kernel(void) |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 99d48e7d2974..54a2372f5dbb 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
183 | copy_bootdata(__va(real_mode_data)); | 183 | copy_bootdata(__va(real_mode_data)); |
184 | 184 | ||
185 | x86_early_init_platform_quirks(); | 185 | x86_early_init_platform_quirks(); |
186 | reserve_bios_regions(); | ||
187 | 186 | ||
188 | switch (boot_params.hdr.hardware_subarch) { | 187 | switch (boot_params.hdr.hardware_subarch) { |
189 | case X86_SUBARCH_INTEL_MID: | 188 | case X86_SUBARCH_INTEL_MID: |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 61521dc19c10..9f669fdd2010 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) | |||
102 | seq_puts(p, " Rescheduling interrupts\n"); | 102 | seq_puts(p, " Rescheduling interrupts\n"); |
103 | seq_printf(p, "%*s: ", prec, "CAL"); | 103 | seq_printf(p, "%*s: ", prec, "CAL"); |
104 | for_each_online_cpu(j) | 104 | for_each_online_cpu(j) |
105 | seq_printf(p, "%10u ", irq_stats(j)->irq_call_count - | 105 | seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); |
106 | irq_stats(j)->irq_tlb_count); | ||
107 | seq_puts(p, " Function call interrupts\n"); | 106 | seq_puts(p, " Function call interrupts\n"); |
108 | seq_printf(p, "%*s: ", prec, "TLB"); | 107 | seq_printf(p, "%*s: ", prec, "TLB"); |
109 | for_each_online_cpu(j) | 108 | for_each_online_cpu(j) |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 991b77986d57..0fa60f5f5a16 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -936,8 +936,6 @@ void __init setup_arch(char **cmdline_p) | |||
936 | 936 | ||
937 | x86_init.oem.arch_setup(); | 937 | x86_init.oem.arch_setup(); |
938 | 938 | ||
939 | kernel_randomize_memory(); | ||
940 | |||
941 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; | 939 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; |
942 | setup_memory_map(); | 940 | setup_memory_map(); |
943 | parse_setup_data(); | 941 | parse_setup_data(); |
@@ -1055,6 +1053,12 @@ void __init setup_arch(char **cmdline_p) | |||
1055 | 1053 | ||
1056 | max_possible_pfn = max_pfn; | 1054 | max_possible_pfn = max_pfn; |
1057 | 1055 | ||
1056 | /* | ||
1057 | * Define random base addresses for memory sections after max_pfn is | ||
1058 | * defined and before each memory section base is used. | ||
1059 | */ | ||
1060 | kernel_randomize_memory(); | ||
1061 | |||
1058 | #ifdef CONFIG_X86_32 | 1062 | #ifdef CONFIG_X86_32 |
1059 | /* max_low_pfn get updated here */ | 1063 | /* max_low_pfn get updated here */ |
1060 | find_low_pfn_range(); | 1064 | find_low_pfn_range(); |
@@ -1097,6 +1101,8 @@ void __init setup_arch(char **cmdline_p) | |||
1097 | efi_find_mirror(); | 1101 | efi_find_mirror(); |
1098 | } | 1102 | } |
1099 | 1103 | ||
1104 | reserve_bios_regions(); | ||
1105 | |||
1100 | /* | 1106 | /* |
1101 | * The EFI specification says that boot service code won't be called | 1107 | * The EFI specification says that boot service code won't be called |
1102 | * after ExitBootServices(). This is, in fact, a lie. | 1108 | * after ExitBootServices(). This is, in fact, a lie. |
@@ -1125,7 +1131,15 @@ void __init setup_arch(char **cmdline_p) | |||
1125 | 1131 | ||
1126 | early_trap_pf_init(); | 1132 | early_trap_pf_init(); |
1127 | 1133 | ||
1128 | setup_real_mode(); | 1134 | /* |
1135 | * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features) | ||
1136 | * with the current CR4 value. This may not be necessary, but | ||
1137 | * auditing all the early-boot CR4 manipulation would be needed to | ||
1138 | * rule it out. | ||
1139 | */ | ||
1140 | if (boot_cpu_data.cpuid_level >= 0) | ||
1141 | /* A CPU has %cr4 if and only if it has CPUID. */ | ||
1142 | mmu_cr4_features = __read_cr4(); | ||
1129 | 1143 | ||
1130 | memblock_set_current_limit(get_max_mapped()); | 1144 | memblock_set_current_limit(get_max_mapped()); |
1131 | 1145 | ||
@@ -1174,13 +1188,6 @@ void __init setup_arch(char **cmdline_p) | |||
1174 | 1188 | ||
1175 | kasan_init(); | 1189 | kasan_init(); |
1176 | 1190 | ||
1177 | if (boot_cpu_data.cpuid_level >= 0) { | ||
1178 | /* A CPU has %cr4 if and only if it has CPUID */ | ||
1179 | mmu_cr4_features = __read_cr4(); | ||
1180 | if (trampoline_cr4_features) | ||
1181 | *trampoline_cr4_features = mmu_cr4_features; | ||
1182 | } | ||
1183 | |||
1184 | #ifdef CONFIG_X86_32 | 1191 | #ifdef CONFIG_X86_32 |
1185 | /* sync back kernel address range */ | 1192 | /* sync back kernel address range */ |
1186 | clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, | 1193 | clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, |
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c index f7dfeda83e5c..121f59c6ee54 100644 --- a/arch/x86/lib/kaslr.c +++ b/arch/x86/lib/kaslr.c | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <asm/cpufeature.h> | 19 | #include <asm/cpufeature.h> |
20 | #include <asm/setup.h> | 20 | #include <asm/setup.h> |
21 | 21 | ||
22 | #define debug_putstr(v) early_printk(v) | 22 | #define debug_putstr(v) early_printk("%s", v) |
23 | #define has_cpuflag(f) boot_cpu_has(f) | 23 | #define has_cpuflag(f) boot_cpu_has(f) |
24 | #define get_boot_seed() kaslr_offset() | 24 | #define get_boot_seed() kaslr_offset() |
25 | #endif | 25 | #endif |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 620928903be3..d28a2d741f9e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -122,8 +122,18 @@ __ref void *alloc_low_pages(unsigned int num) | |||
122 | return __va(pfn << PAGE_SHIFT); | 122 | return __va(pfn << PAGE_SHIFT); |
123 | } | 123 | } |
124 | 124 | ||
125 | /* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */ | 125 | /* |
126 | #define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE) | 126 | * By default need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS. |
127 | * With KASLR memory randomization, depending on the machine e820 memory | ||
128 | * and the PUD alignment. We may need twice more pages when KASLR memory | ||
129 | * randomization is enabled. | ||
130 | */ | ||
131 | #ifndef CONFIG_RANDOMIZE_MEMORY | ||
132 | #define INIT_PGD_PAGE_COUNT 6 | ||
133 | #else | ||
134 | #define INIT_PGD_PAGE_COUNT 12 | ||
135 | #endif | ||
136 | #define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE) | ||
127 | RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); | 137 | RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); |
128 | void __init early_alloc_pgt_buf(void) | 138 | void __init early_alloc_pgt_buf(void) |
129 | { | 139 | { |
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 26dccd6c0df1..ec8654f117d8 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c | |||
@@ -97,7 +97,7 @@ void __init kernel_randomize_memory(void) | |||
97 | * add padding if needed (especially for memory hotplug support). | 97 | * add padding if needed (especially for memory hotplug support). |
98 | */ | 98 | */ |
99 | BUG_ON(kaslr_regions[0].base != &page_offset_base); | 99 | BUG_ON(kaslr_regions[0].base != &page_offset_base); |
100 | memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) + | 100 | memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) + |
101 | CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING; | 101 | CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING; |
102 | 102 | ||
103 | /* Adapt phyiscal memory region size based on available memory */ | 103 | /* Adapt phyiscal memory region size based on available memory */ |
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 4480c06cade7..89d1146f5a6f 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c | |||
@@ -254,6 +254,7 @@ void __init efi_free_boot_services(void) | |||
254 | for_each_efi_memory_desc(md) { | 254 | for_each_efi_memory_desc(md) { |
255 | unsigned long long start = md->phys_addr; | 255 | unsigned long long start = md->phys_addr; |
256 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; | 256 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; |
257 | size_t rm_size; | ||
257 | 258 | ||
258 | if (md->type != EFI_BOOT_SERVICES_CODE && | 259 | if (md->type != EFI_BOOT_SERVICES_CODE && |
259 | md->type != EFI_BOOT_SERVICES_DATA) | 260 | md->type != EFI_BOOT_SERVICES_DATA) |
@@ -263,6 +264,26 @@ void __init efi_free_boot_services(void) | |||
263 | if (md->attribute & EFI_MEMORY_RUNTIME) | 264 | if (md->attribute & EFI_MEMORY_RUNTIME) |
264 | continue; | 265 | continue; |
265 | 266 | ||
267 | /* | ||
268 | * Nasty quirk: if all sub-1MB memory is used for boot | ||
269 | * services, we can get here without having allocated the | ||
270 | * real mode trampoline. It's too late to hand boot services | ||
271 | * memory back to the memblock allocator, so instead | ||
272 | * try to manually allocate the trampoline if needed. | ||
273 | * | ||
274 | * I've seen this on a Dell XPS 13 9350 with firmware | ||
275 | * 1.4.4 with SGX enabled booting Linux via Fedora 24's | ||
276 | * grub2-efi on a hard disk. (And no, I don't know why | ||
277 | * this happened, but Linux should still try to boot rather | ||
278 | * panicing early.) | ||
279 | */ | ||
280 | rm_size = real_mode_size_needed(); | ||
281 | if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { | ||
282 | set_real_mode_mem(start, rm_size); | ||
283 | start += rm_size; | ||
284 | size -= rm_size; | ||
285 | } | ||
286 | |||
266 | free_bootmem_late(start, size); | 287 | free_bootmem_late(start, size); |
267 | } | 288 | } |
268 | 289 | ||
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 0df8a0370d32..23f2f3e41c7f 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c | |||
@@ -200,12 +200,14 @@ void uv_bios_init(void) | |||
200 | return; | 200 | return; |
201 | } | 201 | } |
202 | 202 | ||
203 | /* Starting with UV4 the UV systab size is variable */ | ||
203 | if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) { | 204 | if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) { |
205 | int size = uv_systab->size; | ||
206 | |||
204 | iounmap(uv_systab); | 207 | iounmap(uv_systab); |
205 | uv_systab = ioremap(efi.uv_systab, uv_systab->size); | 208 | uv_systab = ioremap(efi.uv_systab, size); |
206 | if (!uv_systab) { | 209 | if (!uv_systab) { |
207 | pr_err("UV: UVsystab: ioremap(%d) failed!\n", | 210 | pr_err("UV: UVsystab: ioremap(%d) failed!\n", size); |
208 | uv_systab->size); | ||
209 | return; | 211 | return; |
210 | } | 212 | } |
211 | } | 213 | } |
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 705e3fffb4a1..5db706f14111 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c | |||
@@ -1,9 +1,11 @@ | |||
1 | #include <linux/io.h> | 1 | #include <linux/io.h> |
2 | #include <linux/slab.h> | ||
2 | #include <linux/memblock.h> | 3 | #include <linux/memblock.h> |
3 | 4 | ||
4 | #include <asm/cacheflush.h> | 5 | #include <asm/cacheflush.h> |
5 | #include <asm/pgtable.h> | 6 | #include <asm/pgtable.h> |
6 | #include <asm/realmode.h> | 7 | #include <asm/realmode.h> |
8 | #include <asm/tlbflush.h> | ||
7 | 9 | ||
8 | struct real_mode_header *real_mode_header; | 10 | struct real_mode_header *real_mode_header; |
9 | u32 *trampoline_cr4_features; | 11 | u32 *trampoline_cr4_features; |
@@ -11,25 +13,37 @@ u32 *trampoline_cr4_features; | |||
11 | /* Hold the pgd entry used on booting additional CPUs */ | 13 | /* Hold the pgd entry used on booting additional CPUs */ |
12 | pgd_t trampoline_pgd_entry; | 14 | pgd_t trampoline_pgd_entry; |
13 | 15 | ||
16 | void __init set_real_mode_mem(phys_addr_t mem, size_t size) | ||
17 | { | ||
18 | void *base = __va(mem); | ||
19 | |||
20 | real_mode_header = (struct real_mode_header *) base; | ||
21 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | ||
22 | base, (unsigned long long)mem, size); | ||
23 | } | ||
24 | |||
14 | void __init reserve_real_mode(void) | 25 | void __init reserve_real_mode(void) |
15 | { | 26 | { |
16 | phys_addr_t mem; | 27 | phys_addr_t mem; |
17 | unsigned char *base; | 28 | size_t size = real_mode_size_needed(); |
18 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); | 29 | |
30 | if (!size) | ||
31 | return; | ||
32 | |||
33 | WARN_ON(slab_is_available()); | ||
19 | 34 | ||
20 | /* Has to be under 1M so we can execute real-mode AP code. */ | 35 | /* Has to be under 1M so we can execute real-mode AP code. */ |
21 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); | 36 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); |
22 | if (!mem) | 37 | if (!mem) { |
23 | panic("Cannot allocate trampoline\n"); | 38 | pr_info("No sub-1M memory is available for the trampoline\n"); |
39 | return; | ||
40 | } | ||
24 | 41 | ||
25 | base = __va(mem); | ||
26 | memblock_reserve(mem, size); | 42 | memblock_reserve(mem, size); |
27 | real_mode_header = (struct real_mode_header *) base; | 43 | set_real_mode_mem(mem, size); |
28 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | ||
29 | base, (unsigned long long)mem, size); | ||
30 | } | 44 | } |
31 | 45 | ||
32 | void __init setup_real_mode(void) | 46 | static void __init setup_real_mode(void) |
33 | { | 47 | { |
34 | u16 real_mode_seg; | 48 | u16 real_mode_seg; |
35 | const u32 *rel; | 49 | const u32 *rel; |
@@ -84,7 +98,7 @@ void __init setup_real_mode(void) | |||
84 | 98 | ||
85 | trampoline_header->start = (u64) secondary_startup_64; | 99 | trampoline_header->start = (u64) secondary_startup_64; |
86 | trampoline_cr4_features = &trampoline_header->cr4; | 100 | trampoline_cr4_features = &trampoline_header->cr4; |
87 | *trampoline_cr4_features = __read_cr4(); | 101 | *trampoline_cr4_features = mmu_cr4_features; |
88 | 102 | ||
89 | trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); | 103 | trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); |
90 | trampoline_pgd[0] = trampoline_pgd_entry.pgd; | 104 | trampoline_pgd[0] = trampoline_pgd_entry.pgd; |
@@ -100,7 +114,7 @@ void __init setup_real_mode(void) | |||
100 | * need to mark it executable at do_pre_smp_initcalls() at least, | 114 | * need to mark it executable at do_pre_smp_initcalls() at least, |
101 | * thus run it as a early_initcall(). | 115 | * thus run it as a early_initcall(). |
102 | */ | 116 | */ |
103 | static int __init set_real_mode_permissions(void) | 117 | static void __init set_real_mode_permissions(void) |
104 | { | 118 | { |
105 | unsigned char *base = (unsigned char *) real_mode_header; | 119 | unsigned char *base = (unsigned char *) real_mode_header; |
106 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); | 120 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); |
@@ -119,7 +133,16 @@ static int __init set_real_mode_permissions(void) | |||
119 | set_memory_nx((unsigned long) base, size >> PAGE_SHIFT); | 133 | set_memory_nx((unsigned long) base, size >> PAGE_SHIFT); |
120 | set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT); | 134 | set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT); |
121 | set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT); | 135 | set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT); |
136 | } | ||
137 | |||
138 | static int __init init_real_mode(void) | ||
139 | { | ||
140 | if (!real_mode_header) | ||
141 | panic("Real mode trampoline was not allocated"); | ||
142 | |||
143 | setup_real_mode(); | ||
144 | set_real_mode_permissions(); | ||
122 | 145 | ||
123 | return 0; | 146 | return 0; |
124 | } | 147 | } |
125 | early_initcall(set_real_mode_permissions); | 148 | early_initcall(init_real_mode); |