aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2014-07-30 13:48:00 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2014-07-30 13:48:00 -0400
commitc3107e3c504d3187ed8eac8179494946faff1481 (patch)
treee7615968a55fc9176ee02926ae442e9d8890d5bd /arch/x86
parent5ccb8225abf2ac51cd023a99f28366ac9823bd0d (diff)
parent594c7255dce7a13cac50cf2470cc56e2c3b0494e (diff)
Merge tag 'please-pull-apei' into x86/ras
APEI is currently implemented so that it depends on x86 hardware. The primary dependency is that GHES uses the x86 NMI for hardware error notification and MCE for memory error handling. These patches remove that dependency. Other APEI features such as error reporting via external IRQ, error serialization, or error injection, do not require changes to use them on non-x86 architectures. The following patch set eliminates the APEI Kconfig x86 dependency by making these changes: - treat NMI notification as GHES architecture - HAVE_ACPI_APEI_NMI - group and wrap around #ifdef CONFIG_HAVE_ACPI_APEI_NMI code which is used only for NMI path - identify architectural boxes and abstract it accordingly (tlb flush and MCE) - rework ioremap for both IRQ and NMI context NMI code is kept in ghes.c file since NMI and IRQ context are tightly coupled. Note, these patches introduce no functional changes for x86. The NMI notification feature is hard selected for x86. Architectures that want to use this feature should also provide NMI code infrastructure.
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/boot/header.S26
-rw-r--r--arch/x86/boot/tools/build.c38
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c2
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/asm/ptrace.h16
-rw-r--r--arch/x86/kernel/acpi/Makefile1
-rw-r--r--arch/x86/kernel/acpi/apei.c62
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c18
-rw-r--r--arch/x86/kernel/apm_32.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c9
-rw-r--r--arch/x86/kernel/entry_32.S10
-rw-r--r--arch/x86/kernel/espfix_64.c5
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kvm/svm.c1
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/vdso/Makefile24
-rw-r--r--arch/x86/vdso/vclock_gettime.c3
-rw-r--r--arch/x86/vdso/vdso-fakesections.c41
-rw-r--r--arch/x86/vdso/vdso-layout.lds.S64
-rw-r--r--arch/x86/vdso/vdso.lds.S2
-rw-r--r--arch/x86/vdso/vdso2c.c73
-rw-r--r--arch/x86/vdso/vdso2c.h202
-rw-r--r--arch/x86/vdso/vdso32/vdso-fakesections.c1
-rw-r--r--arch/x86/vdso/vdsox32.lds.S2
-rw-r--r--arch/x86/vdso/vma.c4
28 files changed, 471 insertions, 151 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a8f749ef0fdc..43873442dee1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -131,6 +131,9 @@ config X86
131 select HAVE_CC_STACKPROTECTOR 131 select HAVE_CC_STACKPROTECTOR
132 select GENERIC_CPU_AUTOPROBE 132 select GENERIC_CPU_AUTOPROBE
133 select HAVE_ARCH_AUDITSYSCALL 133 select HAVE_ARCH_AUDITSYSCALL
134 select ARCH_SUPPORTS_ATOMIC_RMW
135 select HAVE_ACPI_APEI if ACPI
136 select HAVE_ACPI_APEI_NMI if ACPI
134 137
135config INSTRUCTION_DECODER 138config INSTRUCTION_DECODER
136 def_bool y 139 def_bool y
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 84c223479e3c..7a6d43a554d7 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -91,10 +91,9 @@ bs_die:
91 91
92 .section ".bsdata", "a" 92 .section ".bsdata", "a"
93bugger_off_msg: 93bugger_off_msg:
94 .ascii "Direct floppy boot is not supported. " 94 .ascii "Use a boot loader.\r\n"
95 .ascii "Use a boot loader program instead.\r\n"
96 .ascii "\n" 95 .ascii "\n"
97 .ascii "Remove disk and press any key to reboot ...\r\n" 96 .ascii "Remove disk and press any key to reboot...\r\n"
98 .byte 0 97 .byte 0
99 98
100#ifdef CONFIG_EFI_STUB 99#ifdef CONFIG_EFI_STUB
@@ -108,7 +107,7 @@ coff_header:
108#else 107#else
109 .word 0x8664 # x86-64 108 .word 0x8664 # x86-64
110#endif 109#endif
111 .word 3 # nr_sections 110 .word 4 # nr_sections
112 .long 0 # TimeDateStamp 111 .long 0 # TimeDateStamp
113 .long 0 # PointerToSymbolTable 112 .long 0 # PointerToSymbolTable
114 .long 1 # NumberOfSymbols 113 .long 1 # NumberOfSymbols
@@ -250,6 +249,25 @@ section_table:
250 .word 0 # NumberOfLineNumbers 249 .word 0 # NumberOfLineNumbers
251 .long 0x60500020 # Characteristics (section flags) 250 .long 0x60500020 # Characteristics (section flags)
252 251
252 #
253 # The offset & size fields are filled in by build.c.
254 #
255 .ascii ".bss"
256 .byte 0
257 .byte 0
258 .byte 0
259 .byte 0
260 .long 0
261 .long 0x0
262 .long 0 # Size of initialized data
263 # on disk
264 .long 0x0
265 .long 0 # PointerToRelocations
266 .long 0 # PointerToLineNumbers
267 .word 0 # NumberOfRelocations
268 .word 0 # NumberOfLineNumbers
269 .long 0xc8000080 # Characteristics (section flags)
270
253#endif /* CONFIG_EFI_STUB */ 271#endif /* CONFIG_EFI_STUB */
254 272
255 # Kernel attributes; used by setup. This is part 1 of the 273 # Kernel attributes; used by setup. This is part 1 of the
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 1a2f2121cada..a7661c430cd9 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -143,7 +143,7 @@ static void usage(void)
143 143
144#ifdef CONFIG_EFI_STUB 144#ifdef CONFIG_EFI_STUB
145 145
146static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) 146static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset)
147{ 147{
148 unsigned int pe_header; 148 unsigned int pe_header;
149 unsigned short num_sections; 149 unsigned short num_sections;
@@ -164,10 +164,10 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz
164 put_unaligned_le32(size, section + 0x8); 164 put_unaligned_le32(size, section + 0x8);
165 165
166 /* section header vma field */ 166 /* section header vma field */
167 put_unaligned_le32(offset, section + 0xc); 167 put_unaligned_le32(vma, section + 0xc);
168 168
169 /* section header 'size of initialised data' field */ 169 /* section header 'size of initialised data' field */
170 put_unaligned_le32(size, section + 0x10); 170 put_unaligned_le32(datasz, section + 0x10);
171 171
172 /* section header 'file offset' field */ 172 /* section header 'file offset' field */
173 put_unaligned_le32(offset, section + 0x14); 173 put_unaligned_le32(offset, section + 0x14);
@@ -179,6 +179,11 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz
179 } 179 }
180} 180}
181 181
182static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
183{
184 update_pecoff_section_header_fields(section_name, offset, size, size, offset);
185}
186
182static void update_pecoff_setup_and_reloc(unsigned int size) 187static void update_pecoff_setup_and_reloc(unsigned int size)
183{ 188{
184 u32 setup_offset = 0x200; 189 u32 setup_offset = 0x200;
@@ -203,9 +208,6 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
203 208
204 pe_header = get_unaligned_le32(&buf[0x3c]); 209 pe_header = get_unaligned_le32(&buf[0x3c]);
205 210
206 /* Size of image */
207 put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
208
209 /* 211 /*
210 * Size of code: Subtract the size of the first sector (512 bytes) 212 * Size of code: Subtract the size of the first sector (512 bytes)
211 * which includes the header. 213 * which includes the header.
@@ -220,6 +222,22 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
220 update_pecoff_section_header(".text", text_start, text_sz); 222 update_pecoff_section_header(".text", text_start, text_sz);
221} 223}
222 224
225static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz)
226{
227 unsigned int pe_header;
228 unsigned int bss_sz = init_sz - file_sz;
229
230 pe_header = get_unaligned_le32(&buf[0x3c]);
231
232 /* Size of uninitialized data */
233 put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]);
234
235 /* Size of image */
236 put_unaligned_le32(init_sz, &buf[pe_header + 0x50]);
237
238 update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0);
239}
240
223static int reserve_pecoff_reloc_section(int c) 241static int reserve_pecoff_reloc_section(int c)
224{ 242{
225 /* Reserve 0x20 bytes for .reloc section */ 243 /* Reserve 0x20 bytes for .reloc section */
@@ -259,6 +277,8 @@ static void efi_stub_entry_update(void)
259static inline void update_pecoff_setup_and_reloc(unsigned int size) {} 277static inline void update_pecoff_setup_and_reloc(unsigned int size) {}
260static inline void update_pecoff_text(unsigned int text_start, 278static inline void update_pecoff_text(unsigned int text_start,
261 unsigned int file_sz) {} 279 unsigned int file_sz) {}
280static inline void update_pecoff_bss(unsigned int file_sz,
281 unsigned int init_sz) {}
262static inline void efi_stub_defaults(void) {} 282static inline void efi_stub_defaults(void) {}
263static inline void efi_stub_entry_update(void) {} 283static inline void efi_stub_entry_update(void) {}
264 284
@@ -310,7 +330,7 @@ static void parse_zoffset(char *fname)
310 330
311int main(int argc, char ** argv) 331int main(int argc, char ** argv)
312{ 332{
313 unsigned int i, sz, setup_sectors; 333 unsigned int i, sz, setup_sectors, init_sz;
314 int c; 334 int c;
315 u32 sys_size; 335 u32 sys_size;
316 struct stat sb; 336 struct stat sb;
@@ -376,7 +396,9 @@ int main(int argc, char ** argv)
376 buf[0x1f1] = setup_sectors-1; 396 buf[0x1f1] = setup_sectors-1;
377 put_unaligned_le32(sys_size, &buf[0x1f4]); 397 put_unaligned_le32(sys_size, &buf[0x1f4]);
378 398
379 update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); 399 update_pecoff_text(setup_sectors * 512, i + (sys_size * 16));
400 init_sz = get_unaligned_le32(&buf[0x260]);
401 update_pecoff_bss(i + (sys_size * 16), init_sz);
380 402
381 efi_stub_entry_update(); 403 efi_stub_entry_update();
382 404
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index f30cd10293f0..8626b03e83b7 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -141,7 +141,7 @@ static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
141 141
142 /* save number of bits */ 142 /* save number of bits */
143 bits[1] = cpu_to_be64(sctx->count[0] << 3); 143 bits[1] = cpu_to_be64(sctx->count[0] << 3);
144 bits[0] = cpu_to_be64(sctx->count[1] << 3) | sctx->count[0] >> 61; 144 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
145 145
146 /* Pad out to 112 mod 128 and append length */ 146 /* Pad out to 112 mod 128 and append length */
147 index = sctx->count[0] & 0x7f; 147 index = sctx->count[0] & 0x7f;
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index cb6cfcd034cf..a80cbb88ea91 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector);
43extern void init_ISA_irqs(void); 43extern void init_ISA_irqs(void);
44 44
45#ifdef CONFIG_X86_LOCAL_APIC 45#ifdef CONFIG_X86_LOCAL_APIC
46void arch_trigger_all_cpu_backtrace(void); 46void arch_trigger_all_cpu_backtrace(bool);
47#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 47#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
48#endif 48#endif
49 49
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49314155b66c..49205d01b9ad 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -95,7 +95,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
95#define KVM_REFILL_PAGES 25 95#define KVM_REFILL_PAGES 25
96#define KVM_MAX_CPUID_ENTRIES 80 96#define KVM_MAX_CPUID_ENTRIES 80
97#define KVM_NR_FIXED_MTRR_REGION 88 97#define KVM_NR_FIXED_MTRR_REGION 88
98#define KVM_NR_VAR_MTRR 8 98#define KVM_NR_VAR_MTRR 10
99 99
100#define ASYNC_PF_PER_VCPU 64 100#define ASYNC_PF_PER_VCPU 64
101 101
@@ -461,7 +461,7 @@ struct kvm_vcpu_arch {
461 bool nmi_injected; /* Trying to inject an NMI this entry */ 461 bool nmi_injected; /* Trying to inject an NMI this entry */
462 462
463 struct mtrr_state_type mtrr_state; 463 struct mtrr_state_type mtrr_state;
464 u32 pat; 464 u64 pat;
465 465
466 unsigned switch_db_regs; 466 unsigned switch_db_regs;
467 unsigned long db[KVM_NR_DB_REGS]; 467 unsigned long db[KVM_NR_DB_REGS];
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 14fd6fd75a19..6205f0c434db 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -231,6 +231,22 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
231 231
232#define ARCH_HAS_USER_SINGLE_STEP_INFO 232#define ARCH_HAS_USER_SINGLE_STEP_INFO
233 233
234/*
235 * When hitting ptrace_stop(), we cannot return using SYSRET because
236 * that does not restore the full CPU state, only a minimal set. The
237 * ptracer can change arbitrary register values, which is usually okay
238 * because the usual ptrace stops run off the signal delivery path which
239 * forces IRET; however, ptrace_event() stops happen in arbitrary places
240 * in the kernel and don't force IRET path.
241 *
242 * So force IRET path after a ptrace stop.
243 */
244#define arch_ptrace_stop_needed(code, info) \
245({ \
246 set_thread_flag(TIF_NOTIFY_RESUME); \
247 false; \
248})
249
234struct user_desc; 250struct user_desc;
235extern int do_get_thread_area(struct task_struct *p, int idx, 251extern int do_get_thread_area(struct task_struct *p, int idx,
236 struct user_desc __user *info); 252 struct user_desc __user *info);
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 163b22581472..3242e591fa82 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,5 +1,6 @@
1obj-$(CONFIG_ACPI) += boot.o 1obj-$(CONFIG_ACPI) += boot.o
2obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o 2obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
3obj-$(CONFIG_ACPI_APEI) += apei.o
3 4
4ifneq ($(CONFIG_ACPI_PROCESSOR),) 5ifneq ($(CONFIG_ACPI_PROCESSOR),)
5obj-y += cstate.o 6obj-y += cstate.o
diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c
new file mode 100644
index 000000000000..c280df6b2aa2
--- /dev/null
+++ b/arch/x86/kernel/acpi/apei.c
@@ -0,0 +1,62 @@
1/*
2 * Arch-specific APEI-related functions.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 */
14
15#include <acpi/apei.h>
16
17#include <asm/mce.h>
18#include <asm/tlbflush.h>
19
20int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data)
21{
22#ifdef CONFIG_X86_MCE
23 int i;
24 struct acpi_hest_ia_corrected *cmc;
25 struct acpi_hest_ia_error_bank *mc_bank;
26
27 if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
28 return 0;
29
30 cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
31 if (!cmc->enabled)
32 return 0;
33
34 /*
35 * We expect HEST to provide a list of MC banks that report errors
36 * in firmware first mode. Otherwise, return non-zero value to
37 * indicate that we are done parsing HEST.
38 */
39 if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) ||
40 !cmc->num_hardware_banks)
41 return 1;
42
43 pr_info("HEST: Enabling Firmware First mode for corrected errors.\n");
44
45 mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1);
46 for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
47 mce_disable_bank(mc_bank->bank_number);
48#endif
49 return 1;
50}
51
52void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
53{
54#ifdef CONFIG_X86_MCE
55 apei_mce_report_mem_error(sev, mem_err);
56#endif
57}
58
59void arch_apei_flush_tlb_one(unsigned long addr)
60{
61 __flush_tlb_one(addr);
62}
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index c3fcb5de5083..6a1e71bde323 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -33,31 +33,41 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
33/* "in progress" flag of arch_trigger_all_cpu_backtrace */ 33/* "in progress" flag of arch_trigger_all_cpu_backtrace */
34static unsigned long backtrace_flag; 34static unsigned long backtrace_flag;
35 35
36void arch_trigger_all_cpu_backtrace(void) 36void arch_trigger_all_cpu_backtrace(bool include_self)
37{ 37{
38 int i; 38 int i;
39 int cpu = get_cpu();
39 40
40 if (test_and_set_bit(0, &backtrace_flag)) 41 if (test_and_set_bit(0, &backtrace_flag)) {
41 /* 42 /*
42 * If there is already a trigger_all_cpu_backtrace() in progress 43 * If there is already a trigger_all_cpu_backtrace() in progress
43 * (backtrace_flag == 1), don't output double cpu dump infos. 44 * (backtrace_flag == 1), don't output double cpu dump infos.
44 */ 45 */
46 put_cpu();
45 return; 47 return;
48 }
46 49
47 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); 50 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
51 if (!include_self)
52 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
48 53
49 printk(KERN_INFO "sending NMI to all CPUs:\n"); 54 if (!cpumask_empty(to_cpumask(backtrace_mask))) {
50 apic->send_IPI_all(NMI_VECTOR); 55 pr_info("sending NMI to %s CPUs:\n",
56 (include_self ? "all" : "other"));
57 apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR);
58 }
51 59
52 /* Wait for up to 10 seconds for all CPUs to do the backtrace */ 60 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
53 for (i = 0; i < 10 * 1000; i++) { 61 for (i = 0; i < 10 * 1000; i++) {
54 if (cpumask_empty(to_cpumask(backtrace_mask))) 62 if (cpumask_empty(to_cpumask(backtrace_mask)))
55 break; 63 break;
56 mdelay(1); 64 mdelay(1);
65 touch_softlockup_watchdog();
57 } 66 }
58 67
59 clear_bit(0, &backtrace_flag); 68 clear_bit(0, &backtrace_flag);
60 smp_mb__after_atomic(); 69 smp_mb__after_atomic();
70 put_cpu();
61} 71}
62 72
63static int 73static int
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f3a1f04ed4cb..584874451414 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -841,7 +841,6 @@ static int apm_do_idle(void)
841 u32 eax; 841 u32 eax;
842 u8 ret = 0; 842 u8 ret = 0;
843 int idled = 0; 843 int idled = 0;
844 int polling;
845 int err = 0; 844 int err = 0;
846 845
847 if (!need_resched()) { 846 if (!need_resched()) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index adb02aa62af5..07846d738bdb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1382,6 +1382,15 @@ again:
1382 intel_pmu_lbr_read(); 1382 intel_pmu_lbr_read();
1383 1383
1384 /* 1384 /*
1385 * CondChgd bit 63 doesn't mean any overflow status. Ignore
1386 * and clear the bit.
1387 */
1388 if (__test_and_clear_bit(63, (unsigned long *)&status)) {
1389 if (!status)
1390 goto done;
1391 }
1392
1393 /*
1385 * PEBS overflow sets bit 62 in the global status register 1394 * PEBS overflow sets bit 62 in the global status register
1386 */ 1395 */
1387 if (__test_and_clear_bit(62, (unsigned long *)&status)) { 1396 if (__test_and_clear_bit(62, (unsigned long *)&status)) {
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0da82b8e634..dbaa23e78b36 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -423,9 +423,10 @@ sysenter_past_esp:
423 jnz sysenter_audit 423 jnz sysenter_audit
424sysenter_do_call: 424sysenter_do_call:
425 cmpl $(NR_syscalls), %eax 425 cmpl $(NR_syscalls), %eax
426 jae syscall_badsys 426 jae sysenter_badsys
427 call *sys_call_table(,%eax,4) 427 call *sys_call_table(,%eax,4)
428 movl %eax,PT_EAX(%esp) 428 movl %eax,PT_EAX(%esp)
429sysenter_after_call:
429 LOCKDEP_SYS_EXIT 430 LOCKDEP_SYS_EXIT
430 DISABLE_INTERRUPTS(CLBR_ANY) 431 DISABLE_INTERRUPTS(CLBR_ANY)
431 TRACE_IRQS_OFF 432 TRACE_IRQS_OFF
@@ -675,7 +676,12 @@ END(syscall_fault)
675 676
676syscall_badsys: 677syscall_badsys:
677 movl $-ENOSYS,PT_EAX(%esp) 678 movl $-ENOSYS,PT_EAX(%esp)
678 jmp resume_userspace 679 jmp syscall_exit
680END(syscall_badsys)
681
682sysenter_badsys:
683 movl $-ENOSYS,PT_EAX(%esp)
684 jmp sysenter_after_call
679END(syscall_badsys) 685END(syscall_badsys)
680 CFI_ENDPROC 686 CFI_ENDPROC
681 687
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 6afbb16e9b79..94d857fb1033 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -175,7 +175,7 @@ void init_espfix_ap(void)
175 if (!pud_present(pud)) { 175 if (!pud_present(pud)) {
176 pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); 176 pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
177 pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); 177 pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
178 paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); 178 paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
179 for (n = 0; n < ESPFIX_PUD_CLONES; n++) 179 for (n = 0; n < ESPFIX_PUD_CLONES; n++)
180 set_pud(&pud_p[n], pud); 180 set_pud(&pud_p[n], pud);
181 } 181 }
@@ -185,7 +185,7 @@ void init_espfix_ap(void)
185 if (!pmd_present(pmd)) { 185 if (!pmd_present(pmd)) {
186 pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); 186 pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
187 pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); 187 pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
188 paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT); 188 paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
189 for (n = 0; n < ESPFIX_PMD_CLONES; n++) 189 for (n = 0; n < ESPFIX_PMD_CLONES; n++)
190 set_pmd(&pmd_p[n], pmd); 190 set_pmd(&pmd_p[n], pmd);
191 } 191 }
@@ -193,7 +193,6 @@ void init_espfix_ap(void)
193 pte_p = pte_offset_kernel(&pmd, addr); 193 pte_p = pte_offset_kernel(&pmd, addr);
194 stack_page = (void *)__get_free_page(GFP_KERNEL); 194 stack_page = (void *)__get_free_page(GFP_KERNEL);
195 pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); 195 pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
196 paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT);
197 for (n = 0; n < ESPFIX_PTE_CLONES; n++) 196 for (n = 0; n < ESPFIX_PTE_CLONES; n++)
198 set_pte(&pte_p[n*PTE_STRIDE], pte); 197 set_pte(&pte_p[n*PTE_STRIDE], pte);
199 198
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index a0da58db43a8..2851d63c1202 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -363,7 +363,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
363 363
364 /* Set up to return from userspace. */ 364 /* Set up to return from userspace. */
365 restorer = current->mm->context.vdso + 365 restorer = current->mm->context.vdso +
366 selected_vdso32->sym___kernel_sigreturn; 366 selected_vdso32->sym___kernel_rt_sigreturn;
367 if (ksig->ka.sa.sa_flags & SA_RESTORER) 367 if (ksig->ka.sa.sa_flags & SA_RESTORER)
368 restorer = ksig->ka.sa.sa_restorer; 368 restorer = ksig->ka.sa.sa_restorer;
369 put_user_ex(restorer, &frame->pretcode); 369 put_user_ex(restorer, &frame->pretcode);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 57e5ce126d5a..ea030319b321 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -920,9 +920,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
920 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); 920 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
921 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 921 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
922 mark_tsc_unstable("cpufreq changes"); 922 mark_tsc_unstable("cpufreq changes");
923 }
924 923
925 set_cyc2ns_scale(tsc_khz, freq->cpu); 924 set_cyc2ns_scale(tsc_khz, freq->cpu);
925 }
926 926
927 return 0; 927 return 0;
928} 928}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ec8366c5cfea..b5e994ad0135 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1462,6 +1462,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
1462 */ 1462 */
1463 if (var->unusable) 1463 if (var->unusable)
1464 var->db = 0; 1464 var->db = 0;
1465 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
1465 break; 1466 break;
1466 } 1467 }
1467} 1468}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f32a02578c0d..f6449334ec45 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1898,7 +1898,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1898 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1898 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1899 break; 1899 break;
1900 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 1900 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1901 if (kvm_write_guest(kvm, data, 1901 if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
1902 &tsc_ref, sizeof(tsc_ref))) 1902 &tsc_ref, sizeof(tsc_ref)))
1903 return 1; 1903 return 1;
1904 mark_page_dirty(kvm, gfn); 1904 mark_page_dirty(kvm, gfn);
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 3c0809a0631f..61b04fe36e66 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -11,7 +11,6 @@ VDSO32-$(CONFIG_COMPAT) := y
11 11
12# files to link into the vdso 12# files to link into the vdso
13vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o 13vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o
14vobjs-nox32 := vdso-fakesections.o
15 14
16# files to link into kernel 15# files to link into kernel
17obj-y += vma.o 16obj-y += vma.o
@@ -67,7 +66,8 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE
67# 66#
68CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ 67CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
69 $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ 68 $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
70 -fno-omit-frame-pointer -foptimize-sibling-calls 69 -fno-omit-frame-pointer -foptimize-sibling-calls \
70 -DDISABLE_BRANCH_PROFILING
71 71
72$(vobjs): KBUILD_CFLAGS += $(CFL) 72$(vobjs): KBUILD_CFLAGS += $(CFL)
73 73
@@ -134,7 +134,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
134 134
135targets += vdso32/vdso32.lds 135targets += vdso32/vdso32.lds
136targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) 136targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
137targets += vdso32/vclock_gettime.o 137targets += vdso32/vclock_gettime.o vdso32/vdso-fakesections.o
138 138
139$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) 139$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
140 140
@@ -150,11 +150,13 @@ KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
150KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) 150KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
151KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) 151KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
152KBUILD_CFLAGS_32 += -fno-omit-frame-pointer 152KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
153KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
153$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) 154$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
154 155
155$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ 156$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
156 $(obj)/vdso32/vdso32.lds \ 157 $(obj)/vdso32/vdso32.lds \
157 $(obj)/vdso32/vclock_gettime.o \ 158 $(obj)/vdso32/vclock_gettime.o \
159 $(obj)/vdso32/vdso-fakesections.o \
158 $(obj)/vdso32/note.o \ 160 $(obj)/vdso32/note.o \
159 $(obj)/vdso32/%.o 161 $(obj)/vdso32/%.o
160 $(call if_changed,vdso) 162 $(call if_changed,vdso)
@@ -169,14 +171,24 @@ quiet_cmd_vdso = VDSO $@
169 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' 171 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
170 172
171VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ 173VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
172 -Wl,-Bsymbolic $(LTO_CFLAGS) 174 $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS)
173GCOV_PROFILE := n 175GCOV_PROFILE := n
174 176
175# 177#
176# Install the unstripped copies of vdso*.so. 178# Install the unstripped copies of vdso*.so. If our toolchain supports
179# build-id, install .build-id links as well.
177# 180#
178quiet_cmd_vdso_install = INSTALL $(@:install_%=%) 181quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
179 cmd_vdso_install = cp $< $(MODLIB)/vdso/$(@:install_%=%) 182define cmd_vdso_install
183 cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \
184 if readelf -n $< |grep -q 'Build ID'; then \
185 buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
186 first=`echo $$buildid | cut -b-2`; \
187 last=`echo $$buildid | cut -b3-`; \
188 mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
189 ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
190 fi
191endef
180 192
181vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) 193vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
182 194
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index b2e4f493e5b0..9793322751e0 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -11,9 +11,6 @@
11 * Check with readelf after changing. 11 * Check with readelf after changing.
12 */ 12 */
13 13
14/* Disable profiling for userspace code: */
15#define DISABLE_BRANCH_PROFILING
16
17#include <uapi/linux/time.h> 14#include <uapi/linux/time.h>
18#include <asm/vgtod.h> 15#include <asm/vgtod.h>
19#include <asm/hpet.h> 16#include <asm/hpet.h>
diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c
index cb8a8d72c24b..aa5fbfab20a5 100644
--- a/arch/x86/vdso/vdso-fakesections.c
+++ b/arch/x86/vdso/vdso-fakesections.c
@@ -2,31 +2,20 @@
2 * Copyright 2014 Andy Lutomirski 2 * Copyright 2014 Andy Lutomirski
3 * Subject to the GNU Public License, v.2 3 * Subject to the GNU Public License, v.2
4 * 4 *
5 * Hack to keep broken Go programs working. 5 * String table for loadable section headers. See vdso2c.h for why
6 * 6 * this exists.
7 * The Go runtime had a couple of bugs: it would read the section table to try
8 * to figure out how many dynamic symbols there were (it shouldn't have looked
9 * at the section table at all) and, if there were no SHT_SYNDYM section table
10 * entry, it would use an uninitialized value for the number of symbols. As a
11 * workaround, we supply a minimal section table. vdso2c will adjust the
12 * in-memory image so that "vdso_fake_sections" becomes the section table.
13 *
14 * The bug was introduced by:
15 * https://code.google.com/p/go/source/detail?r=56ea40aac72b (2012-08-31)
16 * and is being addressed in the Go runtime in this issue:
17 * https://code.google.com/p/go/issues/detail?id=8197
18 */ 7 */
19 8
20#ifndef __x86_64__ 9const char fake_shstrtab[] __attribute__((section(".fake_shstrtab"))) =
21#error This hack is specific to the 64-bit vDSO 10 ".hash\0"
22#endif 11 ".dynsym\0"
23 12 ".dynstr\0"
24#include <linux/elf.h> 13 ".gnu.version\0"
25 14 ".gnu.version_d\0"
26extern const __visible struct elf64_shdr vdso_fake_sections[]; 15 ".dynamic\0"
27const __visible struct elf64_shdr vdso_fake_sections[] = { 16 ".rodata\0"
28 { 17 ".fake_shstrtab\0" /* Yay, self-referential code. */
29 .sh_type = SHT_DYNSYM, 18 ".note\0"
30 .sh_entsize = sizeof(Elf64_Sym), 19 ".eh_frame_hdr\0"
31 } 20 ".eh_frame\0"
32}; 21 ".text";
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index 2ec72f651ebf..9197544eea9a 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -6,6 +6,16 @@
6 * This script controls its layout. 6 * This script controls its layout.
7 */ 7 */
8 8
9#if defined(BUILD_VDSO64)
10# define SHDR_SIZE 64
11#elif defined(BUILD_VDSO32) || defined(BUILD_VDSOX32)
12# define SHDR_SIZE 40
13#else
14# error unknown VDSO target
15#endif
16
17#define NUM_FAKE_SHDRS 13
18
9SECTIONS 19SECTIONS
10{ 20{
11 . = SIZEOF_HEADERS; 21 . = SIZEOF_HEADERS;
@@ -18,36 +28,53 @@ SECTIONS
18 .gnu.version_d : { *(.gnu.version_d) } 28 .gnu.version_d : { *(.gnu.version_d) }
19 .gnu.version_r : { *(.gnu.version_r) } 29 .gnu.version_r : { *(.gnu.version_r) }
20 30
31 .dynamic : { *(.dynamic) } :text :dynamic
32
33 .rodata : {
34 *(.rodata*)
35 *(.data*)
36 *(.sdata*)
37 *(.got.plt) *(.got)
38 *(.gnu.linkonce.d.*)
39 *(.bss*)
40 *(.dynbss*)
41 *(.gnu.linkonce.b.*)
42
43 /*
44 * Ideally this would live in a C file, but that won't
45 * work cleanly for x32 until we start building the x32
46 * C code using an x32 toolchain.
47 */
48 VDSO_FAKE_SECTION_TABLE_START = .;
49 . = . + NUM_FAKE_SHDRS * SHDR_SIZE;
50 VDSO_FAKE_SECTION_TABLE_END = .;
51 } :text
52
53 .fake_shstrtab : { *(.fake_shstrtab) } :text
54
55
21 .note : { *(.note.*) } :text :note 56 .note : { *(.note.*) } :text :note
22 57
23 .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr 58 .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
24 .eh_frame : { KEEP (*(.eh_frame)) } :text 59 .eh_frame : { KEEP (*(.eh_frame)) } :text
25 60
26 .dynamic : { *(.dynamic) } :text :dynamic
27
28 .rodata : { *(.rodata*) } :text
29 .data : {
30 *(.data*)
31 *(.sdata*)
32 *(.got.plt) *(.got)
33 *(.gnu.linkonce.d.*)
34 *(.bss*)
35 *(.dynbss*)
36 *(.gnu.linkonce.b.*)
37 }
38
39 .altinstructions : { *(.altinstructions) }
40 .altinstr_replacement : { *(.altinstr_replacement) }
41 61
42 /* 62 /*
43 * Align the actual code well away from the non-instruction data. 63 * Text is well-separated from actual data: there's plenty of
44 * This is the best thing for the I-cache. 64 * stuff that isn't used at runtime in between.
45 */ 65 */
46 . = ALIGN(0x100);
47 66
48 .text : { *(.text*) } :text =0x90909090, 67 .text : { *(.text*) } :text =0x90909090,
49 68
50 /* 69 /*
70 * At the end so that eu-elflint stays happy when vdso2c strips
71 * these. A better implementation would avoid allocating space
72 * for these.
73 */
74 .altinstructions : { *(.altinstructions) } :text
75 .altinstr_replacement : { *(.altinstr_replacement) } :text
76
77 /*
51 * The remainder of the vDSO consists of special pages that are 78 * The remainder of the vDSO consists of special pages that are
52 * shared between the kernel and userspace. It needs to be at the 79 * shared between the kernel and userspace. It needs to be at the
53 * end so that it doesn't overlap the mapping of the actual 80 * end so that it doesn't overlap the mapping of the actual
@@ -75,6 +102,7 @@ SECTIONS
75 /DISCARD/ : { 102 /DISCARD/ : {
76 *(.discard) 103 *(.discard)
77 *(.discard.*) 104 *(.discard.*)
105 *(__bug_table)
78 } 106 }
79} 107}
80 108
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S
index 75e3404c83b1..6807932643c2 100644
--- a/arch/x86/vdso/vdso.lds.S
+++ b/arch/x86/vdso/vdso.lds.S
@@ -6,6 +6,8 @@
6 * the DSO. 6 * the DSO.
7 */ 7 */
8 8
9#define BUILD_VDSO64
10
9#include "vdso-layout.lds.S" 11#include "vdso-layout.lds.S"
10 12
11/* 13/*
diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c
index 7a6bf50f9165..238dbe82776e 100644
--- a/arch/x86/vdso/vdso2c.c
+++ b/arch/x86/vdso/vdso2c.c
@@ -23,6 +23,8 @@ enum {
23 sym_vvar_page, 23 sym_vvar_page,
24 sym_hpet_page, 24 sym_hpet_page,
25 sym_end_mapping, 25 sym_end_mapping,
26 sym_VDSO_FAKE_SECTION_TABLE_START,
27 sym_VDSO_FAKE_SECTION_TABLE_END,
26}; 28};
27 29
28const int special_pages[] = { 30const int special_pages[] = {
@@ -30,15 +32,26 @@ const int special_pages[] = {
30 sym_hpet_page, 32 sym_hpet_page,
31}; 33};
32 34
33char const * const required_syms[] = { 35struct vdso_sym {
34 [sym_vvar_page] = "vvar_page", 36 const char *name;
35 [sym_hpet_page] = "hpet_page", 37 bool export;
36 [sym_end_mapping] = "end_mapping", 38};
37 "VDSO32_NOTE_MASK", 39
38 "VDSO32_SYSENTER_RETURN", 40struct vdso_sym required_syms[] = {
39 "__kernel_vsyscall", 41 [sym_vvar_page] = {"vvar_page", true},
40 "__kernel_sigreturn", 42 [sym_hpet_page] = {"hpet_page", true},
41 "__kernel_rt_sigreturn", 43 [sym_end_mapping] = {"end_mapping", true},
44 [sym_VDSO_FAKE_SECTION_TABLE_START] = {
45 "VDSO_FAKE_SECTION_TABLE_START", false
46 },
47 [sym_VDSO_FAKE_SECTION_TABLE_END] = {
48 "VDSO_FAKE_SECTION_TABLE_END", false
49 },
50 {"VDSO32_NOTE_MASK", true},
51 {"VDSO32_SYSENTER_RETURN", true},
52 {"__kernel_vsyscall", true},
53 {"__kernel_sigreturn", true},
54 {"__kernel_rt_sigreturn", true},
42}; 55};
43 56
44__attribute__((format(printf, 1, 2))) __attribute__((noreturn)) 57__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
@@ -83,37 +96,21 @@ extern void bad_put_le(void);
83 96
84#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) 97#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
85 98
86#define BITS 64 99#define BITSFUNC3(name, bits) name##bits
87#define GOFUNC go64 100#define BITSFUNC2(name, bits) BITSFUNC3(name, bits)
88#define Elf_Ehdr Elf64_Ehdr 101#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS)
89#define Elf_Shdr Elf64_Shdr 102
90#define Elf_Phdr Elf64_Phdr 103#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
91#define Elf_Sym Elf64_Sym 104#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
92#define Elf_Dyn Elf64_Dyn 105#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
106
107#define ELF_BITS 64
93#include "vdso2c.h" 108#include "vdso2c.h"
94#undef BITS 109#undef ELF_BITS
95#undef GOFUNC 110
96#undef Elf_Ehdr 111#define ELF_BITS 32
97#undef Elf_Shdr
98#undef Elf_Phdr
99#undef Elf_Sym
100#undef Elf_Dyn
101
102#define BITS 32
103#define GOFUNC go32
104#define Elf_Ehdr Elf32_Ehdr
105#define Elf_Shdr Elf32_Shdr
106#define Elf_Phdr Elf32_Phdr
107#define Elf_Sym Elf32_Sym
108#define Elf_Dyn Elf32_Dyn
109#include "vdso2c.h" 112#include "vdso2c.h"
110#undef BITS 113#undef ELF_BITS
111#undef GOFUNC
112#undef Elf_Ehdr
113#undef Elf_Shdr
114#undef Elf_Phdr
115#undef Elf_Sym
116#undef Elf_Dyn
117 114
118static void go(void *addr, size_t len, FILE *outfile, const char *name) 115static void go(void *addr, size_t len, FILE *outfile, const char *name)
119{ 116{
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index c6eefaf389b9..11b65d4f9414 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -4,23 +4,139 @@
4 * are built for 32-bit userspace. 4 * are built for 32-bit userspace.
5 */ 5 */
6 6
7static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) 7/*
8 * We're writing a section table for a few reasons:
9 *
10 * The Go runtime had a couple of bugs: it would read the section
11 * table to try to figure out how many dynamic symbols there were (it
12 * shouldn't have looked at the section table at all) and, if there
13 * were no SHT_SYNDYM section table entry, it would use an
14 * uninitialized value for the number of symbols. An empty DYNSYM
15 * table would work, but I see no reason not to write a valid one (and
16 * keep full performance for old Go programs). This hack is only
17 * needed on x86_64.
18 *
19 * The bug was introduced on 2012-08-31 by:
20 * https://code.google.com/p/go/source/detail?r=56ea40aac72b
21 * and was fixed on 2014-06-13 by:
22 * https://code.google.com/p/go/source/detail?r=fc1cd5e12595
23 *
24 * Binutils has issues debugging the vDSO: it reads the section table to
25 * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
26 * would break build-id if we removed the section table. Binutils
27 * also requires that shstrndx != 0. See:
28 * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
29 *
30 * elfutils might not look for PT_NOTE if there is a section table at
31 * all. I don't know whether this matters for any practical purpose.
32 *
33 * For simplicity, rather than hacking up a partial section table, we
34 * just write a mostly complete one. We omit non-dynamic symbols,
35 * though, since they're rather large.
36 *
37 * Once binutils gets fixed, we might be able to drop this for all but
38 * the 64-bit vdso, since build-id only works in kernel RPMs, and
39 * systems that update to new enough kernel RPMs will likely update
40 * binutils in sync. build-id has never worked for home-built kernel
41 * RPMs without manual symlinking, and I suspect that no one ever does
42 * that.
43 */
44struct BITSFUNC(fake_sections)
45{
46 ELF(Shdr) *table;
47 unsigned long table_offset;
48 int count, max_count;
49
50 int in_shstrndx;
51 unsigned long shstr_offset;
52 const char *shstrtab;
53 size_t shstrtab_len;
54
55 int out_shstrndx;
56};
57
58static unsigned int BITSFUNC(find_shname)(struct BITSFUNC(fake_sections) *out,
59 const char *name)
60{
61 const char *outname = out->shstrtab;
62 while (outname - out->shstrtab < out->shstrtab_len) {
63 if (!strcmp(name, outname))
64 return (outname - out->shstrtab) + out->shstr_offset;
65 outname += strlen(outname) + 1;
66 }
67
68 if (*name)
69 printf("Warning: could not find output name \"%s\"\n", name);
70 return out->shstr_offset + out->shstrtab_len - 1; /* Use a null. */
71}
72
73static void BITSFUNC(init_sections)(struct BITSFUNC(fake_sections) *out)
74{
75 if (!out->in_shstrndx)
76 fail("didn't find the fake shstrndx\n");
77
78 memset(out->table, 0, out->max_count * sizeof(ELF(Shdr)));
79
80 if (out->max_count < 1)
81 fail("we need at least two fake output sections\n");
82
83 PUT_LE(&out->table[0].sh_type, SHT_NULL);
84 PUT_LE(&out->table[0].sh_name, BITSFUNC(find_shname)(out, ""));
85
86 out->count = 1;
87}
88
89static void BITSFUNC(copy_section)(struct BITSFUNC(fake_sections) *out,
90 int in_idx, const ELF(Shdr) *in,
91 const char *name)
92{
93 uint64_t flags = GET_LE(&in->sh_flags);
94
95 bool copy = flags & SHF_ALLOC &&
96 (GET_LE(&in->sh_size) ||
97 (GET_LE(&in->sh_type) != SHT_RELA &&
98 GET_LE(&in->sh_type) != SHT_REL)) &&
99 strcmp(name, ".altinstructions") &&
100 strcmp(name, ".altinstr_replacement");
101
102 if (!copy)
103 return;
104
105 if (out->count >= out->max_count)
106 fail("too many copied sections (max = %d)\n", out->max_count);
107
108 if (in_idx == out->in_shstrndx)
109 out->out_shstrndx = out->count;
110
111 out->table[out->count] = *in;
112 PUT_LE(&out->table[out->count].sh_name,
113 BITSFUNC(find_shname)(out, name));
114
115 /* elfutils requires that a strtab have the correct type. */
116 if (!strcmp(name, ".fake_shstrtab"))
117 PUT_LE(&out->table[out->count].sh_type, SHT_STRTAB);
118
119 out->count++;
120}
121
122static void BITSFUNC(go)(void *addr, size_t len,
123 FILE *outfile, const char *name)
8{ 124{
9 int found_load = 0; 125 int found_load = 0;
10 unsigned long load_size = -1; /* Work around bogus warning */ 126 unsigned long load_size = -1; /* Work around bogus warning */
11 unsigned long data_size; 127 unsigned long data_size;
12 Elf_Ehdr *hdr = (Elf_Ehdr *)addr; 128 ELF(Ehdr) *hdr = (ELF(Ehdr) *)addr;
13 int i; 129 int i;
14 unsigned long j; 130 unsigned long j;
15 Elf_Shdr *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr, 131 ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
16 *alt_sec = NULL; 132 *alt_sec = NULL;
17 Elf_Dyn *dyn = 0, *dyn_end = 0; 133 ELF(Dyn) *dyn = 0, *dyn_end = 0;
18 const char *secstrings; 134 const char *secstrings;
19 uint64_t syms[NSYMS] = {}; 135 uint64_t syms[NSYMS] = {};
20 136
21 uint64_t fake_sections_value = 0, fake_sections_size = 0; 137 struct BITSFUNC(fake_sections) fake_sections = {};
22 138
23 Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(&hdr->e_phoff)); 139 ELF(Phdr) *pt = (ELF(Phdr) *)(addr + GET_LE(&hdr->e_phoff));
24 140
25 /* Walk the segment table. */ 141 /* Walk the segment table. */
26 for (i = 0; i < GET_LE(&hdr->e_phnum); i++) { 142 for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
@@ -51,7 +167,7 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
51 for (i = 0; dyn + i < dyn_end && 167 for (i = 0; dyn + i < dyn_end &&
52 GET_LE(&dyn[i].d_tag) != DT_NULL; i++) { 168 GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
53 typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag); 169 typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag);
54 if (tag == DT_REL || tag == DT_RELSZ || 170 if (tag == DT_REL || tag == DT_RELSZ || tag == DT_RELA ||
55 tag == DT_RELENT || tag == DT_TEXTREL) 171 tag == DT_RELENT || tag == DT_TEXTREL)
56 fail("vdso image contains dynamic relocations\n"); 172 fail("vdso image contains dynamic relocations\n");
57 } 173 }
@@ -61,7 +177,7 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
61 GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx); 177 GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx);
62 secstrings = addr + GET_LE(&secstrings_hdr->sh_offset); 178 secstrings = addr + GET_LE(&secstrings_hdr->sh_offset);
63 for (i = 0; i < GET_LE(&hdr->e_shnum); i++) { 179 for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
64 Elf_Shdr *sh = addr + GET_LE(&hdr->e_shoff) + 180 ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) +
65 GET_LE(&hdr->e_shentsize) * i; 181 GET_LE(&hdr->e_shentsize) * i;
66 if (GET_LE(&sh->sh_type) == SHT_SYMTAB) 182 if (GET_LE(&sh->sh_type) == SHT_SYMTAB)
67 symtab_hdr = sh; 183 symtab_hdr = sh;
@@ -82,29 +198,63 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
82 i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize); 198 i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
83 i++) { 199 i++) {
84 int k; 200 int k;
85 Elf_Sym *sym = addr + GET_LE(&symtab_hdr->sh_offset) + 201 ELF(Sym) *sym = addr + GET_LE(&symtab_hdr->sh_offset) +
86 GET_LE(&symtab_hdr->sh_entsize) * i; 202 GET_LE(&symtab_hdr->sh_entsize) * i;
87 const char *name = addr + GET_LE(&strtab_hdr->sh_offset) + 203 const char *name = addr + GET_LE(&strtab_hdr->sh_offset) +
88 GET_LE(&sym->st_name); 204 GET_LE(&sym->st_name);
89 205
90 for (k = 0; k < NSYMS; k++) { 206 for (k = 0; k < NSYMS; k++) {
91 if (!strcmp(name, required_syms[k])) { 207 if (!strcmp(name, required_syms[k].name)) {
92 if (syms[k]) { 208 if (syms[k]) {
93 fail("duplicate symbol %s\n", 209 fail("duplicate symbol %s\n",
94 required_syms[k]); 210 required_syms[k].name);
95 } 211 }
96 syms[k] = GET_LE(&sym->st_value); 212 syms[k] = GET_LE(&sym->st_value);
97 } 213 }
98 } 214 }
99 215
100 if (!strcmp(name, "vdso_fake_sections")) { 216 if (!strcmp(name, "fake_shstrtab")) {
101 if (fake_sections_value) 217 ELF(Shdr) *sh;
102 fail("duplicate vdso_fake_sections\n"); 218
103 fake_sections_value = GET_LE(&sym->st_value); 219 fake_sections.in_shstrndx = GET_LE(&sym->st_shndx);
104 fake_sections_size = GET_LE(&sym->st_size); 220 fake_sections.shstrtab = addr + GET_LE(&sym->st_value);
221 fake_sections.shstrtab_len = GET_LE(&sym->st_size);
222 sh = addr + GET_LE(&hdr->e_shoff) +
223 GET_LE(&hdr->e_shentsize) *
224 fake_sections.in_shstrndx;
225 fake_sections.shstr_offset = GET_LE(&sym->st_value) -
226 GET_LE(&sh->sh_addr);
105 } 227 }
106 } 228 }
107 229
230 /* Build the output section table. */
231 if (!syms[sym_VDSO_FAKE_SECTION_TABLE_START] ||
232 !syms[sym_VDSO_FAKE_SECTION_TABLE_END])
233 fail("couldn't find fake section table\n");
234 if ((syms[sym_VDSO_FAKE_SECTION_TABLE_END] -
235 syms[sym_VDSO_FAKE_SECTION_TABLE_START]) % sizeof(ELF(Shdr)))
236 fail("fake section table size isn't a multiple of sizeof(Shdr)\n");
237 fake_sections.table = addr + syms[sym_VDSO_FAKE_SECTION_TABLE_START];
238 fake_sections.table_offset = syms[sym_VDSO_FAKE_SECTION_TABLE_START];
239 fake_sections.max_count = (syms[sym_VDSO_FAKE_SECTION_TABLE_END] -
240 syms[sym_VDSO_FAKE_SECTION_TABLE_START]) /
241 sizeof(ELF(Shdr));
242
243 BITSFUNC(init_sections)(&fake_sections);
244 for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
245 ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) +
246 GET_LE(&hdr->e_shentsize) * i;
247 BITSFUNC(copy_section)(&fake_sections, i, sh,
248 secstrings + GET_LE(&sh->sh_name));
249 }
250 if (!fake_sections.out_shstrndx)
251 fail("didn't generate shstrndx?!?\n");
252
253 PUT_LE(&hdr->e_shoff, fake_sections.table_offset);
254 PUT_LE(&hdr->e_shentsize, sizeof(ELF(Shdr)));
255 PUT_LE(&hdr->e_shnum, fake_sections.count);
256 PUT_LE(&hdr->e_shstrndx, fake_sections.out_shstrndx);
257
108 /* Validate mapping addresses. */ 258 /* Validate mapping addresses. */
109 for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) { 259 for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
110 if (!syms[i]) 260 if (!syms[i])
@@ -112,25 +262,17 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
112 262
113 if (syms[i] % 4096) 263 if (syms[i] % 4096)
114 fail("%s must be a multiple of 4096\n", 264 fail("%s must be a multiple of 4096\n",
115 required_syms[i]); 265 required_syms[i].name);
116 if (syms[i] < data_size) 266 if (syms[i] < data_size)
117 fail("%s must be after the text mapping\n", 267 fail("%s must be after the text mapping\n",
118 required_syms[i]); 268 required_syms[i].name);
119 if (syms[sym_end_mapping] < syms[i] + 4096) 269 if (syms[sym_end_mapping] < syms[i] + 4096)
120 fail("%s overruns end_mapping\n", required_syms[i]); 270 fail("%s overruns end_mapping\n",
271 required_syms[i].name);
121 } 272 }
122 if (syms[sym_end_mapping] % 4096) 273 if (syms[sym_end_mapping] % 4096)
123 fail("end_mapping must be a multiple of 4096\n"); 274 fail("end_mapping must be a multiple of 4096\n");
124 275
125 /* Remove sections or use fakes */
126 if (fake_sections_size % sizeof(Elf_Shdr))
127 fail("vdso_fake_sections size is not a multiple of %ld\n",
128 (long)sizeof(Elf_Shdr));
129 PUT_LE(&hdr->e_shoff, fake_sections_value);
130 PUT_LE(&hdr->e_shentsize, fake_sections_value ? sizeof(Elf_Shdr) : 0);
131 PUT_LE(&hdr->e_shnum, fake_sections_size / sizeof(Elf_Shdr));
132 PUT_LE(&hdr->e_shstrndx, SHN_UNDEF);
133
134 if (!name) { 276 if (!name) {
135 fwrite(addr, load_size, 1, outfile); 277 fwrite(addr, load_size, 1, outfile);
136 return; 278 return;
@@ -168,9 +310,9 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
168 (unsigned long)GET_LE(&alt_sec->sh_size)); 310 (unsigned long)GET_LE(&alt_sec->sh_size));
169 } 311 }
170 for (i = 0; i < NSYMS; i++) { 312 for (i = 0; i < NSYMS; i++) {
171 if (syms[i]) 313 if (required_syms[i].export && syms[i])
172 fprintf(outfile, "\t.sym_%s = 0x%" PRIx64 ",\n", 314 fprintf(outfile, "\t.sym_%s = 0x%" PRIx64 ",\n",
173 required_syms[i], syms[i]); 315 required_syms[i].name, syms[i]);
174 } 316 }
175 fprintf(outfile, "};\n"); 317 fprintf(outfile, "};\n");
176} 318}
diff --git a/arch/x86/vdso/vdso32/vdso-fakesections.c b/arch/x86/vdso/vdso32/vdso-fakesections.c
new file mode 100644
index 000000000000..541468e25265
--- /dev/null
+++ b/arch/x86/vdso/vdso32/vdso-fakesections.c
@@ -0,0 +1 @@
#include "../vdso-fakesections.c"
diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S
index 46b991b578a8..697c11ece90c 100644
--- a/arch/x86/vdso/vdsox32.lds.S
+++ b/arch/x86/vdso/vdsox32.lds.S
@@ -6,6 +6,8 @@
6 * the DSO. 6 * the DSO.
7 */ 7 */
8 8
9#define BUILD_VDSOX32
10
9#include "vdso-layout.lds.S" 11#include "vdso-layout.lds.S"
10 12
11/* 13/*
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index e1513c47872a..5a5176de8d0a 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -62,6 +62,9 @@ struct linux_binprm;
62 Only used for the 64-bit and x32 vdsos. */ 62 Only used for the 64-bit and x32 vdsos. */
63static unsigned long vdso_addr(unsigned long start, unsigned len) 63static unsigned long vdso_addr(unsigned long start, unsigned len)
64{ 64{
65#ifdef CONFIG_X86_32
66 return 0;
67#else
65 unsigned long addr, end; 68 unsigned long addr, end;
66 unsigned offset; 69 unsigned offset;
67 end = (start + PMD_SIZE - 1) & PMD_MASK; 70 end = (start + PMD_SIZE - 1) & PMD_MASK;
@@ -83,6 +86,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
83 addr = align_vdso_addr(addr); 86 addr = align_vdso_addr(addr);
84 87
85 return addr; 88 return addr;
89#endif
86} 90}
87 91
88static int map_vdso(const struct vdso_image *image, bool calculate_addr) 92static int map_vdso(const struct vdso_image *image, bool calculate_addr)