aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-08-22 04:04:15 -0400
committerIngo Molnar <mingo@kernel.org>2014-08-22 04:04:15 -0400
commit80b304fd00e8b667775ff791121b61ecd7cd0c03 (patch)
treeb4f2ec59fe062c43343ee4c2f10a6bcd0e4dcd1b /arch/x86
parentfb21b84e7f809ef04b1e5aed5d463cf0d4866638 (diff)
parent6a7519e81321343165f89abb8b616df186d3e57a (diff)
Merge tag 'efi-urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi into x86/urgent
Pull EFI fixes from Matt Fleming: * WARN_ON(!spin_is_locked()) always triggers on non-SMP machines. Swap it for the more canonical lockdep_assert_held() which always does the right thing - Guenter Roeck * Assign the correct value to efi.runtime_version on arm64 so that all the runtime services can be invoked - Semen Protsenko Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kbuild4
-rw-r--r--arch/x86/Kconfig35
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/include/asm/Kbuild3
-rw-r--r--arch/x86/include/asm/acenv.h4
-rw-r--r--arch/x86/include/asm/acpi.h5
-rw-r--r--arch/x86/include/asm/alternative.h14
-rw-r--r--arch/x86/include/asm/apic.h46
-rw-r--r--arch/x86/include/asm/crash.h9
-rw-r--r--arch/x86/include/asm/fpu-internal.h9
-rw-r--r--arch/x86/include/asm/hardirq.h3
-rw-r--r--arch/x86/include/asm/i8259.h5
-rw-r--r--arch/x86/include/asm/io_apic.h56
-rw-r--r--arch/x86/include/asm/kexec-bzimage64.h6
-rw-r--r--arch/x86/include/asm/kexec.h45
-rw-r--r--arch/x86/include/asm/mpspec.h15
-rw-r--r--arch/x86/include/asm/page.h1
-rw-r--r--arch/x86/include/asm/page_64.h2
-rw-r--r--arch/x86/include/asm/platform_sst_audio.h78
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/prom.h2
-rw-r--r--arch/x86/include/asm/scatterlist.h8
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h10
-rw-r--r--arch/x86/include/asm/xsave.h223
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h3
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c400
-rw-r--r--arch/x86/kernel/apic/apic.c75
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c16
-rw-r--r--arch/x86/kernel/apic/apic_noop.c23
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c8
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c14
-rw-r--r--arch/x86/kernel/apic/io_apic.c759
-rw-r--r--arch/x86/kernel/apic/probe_32.c33
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c8
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c8
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c8
-rw-r--r--arch/x86/kernel/cpu/common.c8
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c10
-rw-r--r--arch/x86/kernel/crash.c563
-rw-r--r--arch/x86/kernel/devicetree.c207
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/iosf_mbi.c2
-rw-r--r--arch/x86/kernel/irqinit.c12
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c553
-rw-r--r--arch/x86/kernel/machine_kexec_64.c239
-rw-r--r--arch/x86/kernel/mpparse.c111
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--arch/x86/kernel/smpboot.c8
-rw-r--r--arch/x86/kernel/tsc.c21
-rw-r--r--arch/x86/kernel/vsmp_64.c4
-rw-r--r--arch/x86/kernel/vsyscall_gtod.c23
-rw-r--r--arch/x86/kernel/xsave.c118
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/lapic.c52
-rw-r--r--arch/x86/kvm/mmu_audit.c2
-rw-r--r--arch/x86/kvm/vmx.c4
-rw-r--r--arch/x86/kvm/x86.c64
-rw-r--r--arch/x86/mm/fault.c3
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--arch/x86/net/bpf_jit_comp.c16
-rw-r--r--arch/x86/pci/acpi.c6
-rw-r--r--arch/x86/pci/intel_mid_pci.c27
-rw-r--r--arch/x86/pci/irq.c16
-rw-r--r--arch/x86/pci/xen.c7
-rw-r--r--arch/x86/platform/ce4100/ce4100.c11
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_wdt.c22
-rw-r--r--arch/x86/platform/intel-mid/sfi.c56
-rw-r--r--arch/x86/platform/sfi/sfi.c10
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/purgatory/Makefile30
-rw-r--r--arch/x86/purgatory/entry64.S101
-rw-r--r--arch/x86/purgatory/purgatory.c72
-rw-r--r--arch/x86/purgatory/setup-x86_64.S58
-rw-r--r--arch/x86/purgatory/sha256.c283
-rw-r--r--arch/x86/purgatory/sha256.h22
-rw-r--r--arch/x86/purgatory/stack.S19
-rw-r--r--arch/x86/purgatory/string.c13
-rw-r--r--arch/x86/syscalls/syscall_32.tbl3
-rw-r--r--arch/x86/syscalls/syscall_64.tbl4
-rw-r--r--arch/x86/um/asm/elf.h1
-rw-r--r--arch/x86/um/mem_64.c15
-rw-r--r--arch/x86/um/signal.c45
-rw-r--r--arch/x86/vdso/vdso32-setup.c19
-rw-r--r--arch/x86/xen/enlighten.c13
-rw-r--r--arch/x86/xen/grant-table.c70
-rw-r--r--arch/x86/xen/p2m.c5
-rw-r--r--arch/x86/xen/time.c2
94 files changed, 3547 insertions, 1400 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index e5287d8517aa..61b6d51866f8 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -16,3 +16,7 @@ obj-$(CONFIG_IA32_EMULATION) += ia32/
16 16
17obj-y += platform/ 17obj-y += platform/
18obj-y += net/ 18obj-y += net/
19
20ifeq ($(CONFIG_X86_64),y)
21obj-$(CONFIG_KEXEC) += purgatory/
22endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6b71f0417293..5d0bf1aa9dcb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,6 +21,7 @@ config X86_64
21### Arch settings 21### Arch settings
22config X86 22config X86
23 def_bool y 23 def_bool y
24 select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
24 select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS 25 select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
25 select ARCH_MIGHT_HAVE_PC_PARPORT 26 select ARCH_MIGHT_HAVE_PC_PARPORT
26 select ARCH_MIGHT_HAVE_PC_SERIO 27 select ARCH_MIGHT_HAVE_PC_SERIO
@@ -95,6 +96,7 @@ config X86
95 select IRQ_FORCED_THREADING 96 select IRQ_FORCED_THREADING
96 select HAVE_BPF_JIT if X86_64 97 select HAVE_BPF_JIT if X86_64
97 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 98 select HAVE_ARCH_TRANSPARENT_HUGEPAGE
99 select ARCH_HAS_SG_CHAIN
98 select CLKEVT_I8253 100 select CLKEVT_I8253
99 select ARCH_HAVE_NMI_SAFE_CMPXCHG 101 select ARCH_HAVE_NMI_SAFE_CMPXCHG
100 select GENERIC_IOMAP 102 select GENERIC_IOMAP
@@ -108,9 +110,9 @@ config X86
108 select CLOCKSOURCE_WATCHDOG 110 select CLOCKSOURCE_WATCHDOG
109 select GENERIC_CLOCKEVENTS 111 select GENERIC_CLOCKEVENTS
110 select ARCH_CLOCKSOURCE_DATA 112 select ARCH_CLOCKSOURCE_DATA
113 select CLOCKSOURCE_VALIDATE_LAST_CYCLE
111 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) 114 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
112 select GENERIC_TIME_VSYSCALL 115 select GENERIC_TIME_VSYSCALL
113 select KTIME_SCALAR if X86_32
114 select GENERIC_STRNCPY_FROM_USER 116 select GENERIC_STRNCPY_FROM_USER
115 select GENERIC_STRNLEN_USER 117 select GENERIC_STRNLEN_USER
116 select HAVE_CONTEXT_TRACKING if X86_64 118 select HAVE_CONTEXT_TRACKING if X86_64
@@ -133,6 +135,7 @@ config X86
133 select ARCH_SUPPORTS_ATOMIC_RMW 135 select ARCH_SUPPORTS_ATOMIC_RMW
134 select HAVE_ACPI_APEI if ACPI 136 select HAVE_ACPI_APEI if ACPI
135 select HAVE_ACPI_APEI_NMI if ACPI 137 select HAVE_ACPI_APEI_NMI if ACPI
138 select ACPI_LEGACY_TABLES_LOOKUP if ACPI
136 139
137config INSTRUCTION_DECODER 140config INSTRUCTION_DECODER
138 def_bool y 141 def_bool y
@@ -431,6 +434,7 @@ config X86_INTEL_CE
431 bool "CE4100 TV platform" 434 bool "CE4100 TV platform"
432 depends on PCI 435 depends on PCI
433 depends on PCI_GODIRECT 436 depends on PCI_GODIRECT
437 depends on X86_IO_APIC
434 depends on X86_32 438 depends on X86_32
435 depends on X86_EXTENDED_PLATFORM 439 depends on X86_EXTENDED_PLATFORM
436 select X86_REBOOTFIXUPS 440 select X86_REBOOTFIXUPS
@@ -837,6 +841,7 @@ config X86_IO_APIC
837 def_bool y 841 def_bool y
838 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI 842 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
839 select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ 843 select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
844 select IRQ_DOMAIN
840 845
841config X86_REROUTE_FOR_BROKEN_BOOT_IRQS 846config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
842 bool "Reroute for broken boot IRQs" 847 bool "Reroute for broken boot IRQs"
@@ -1538,7 +1543,8 @@ config EFI
1538 1543
1539config EFI_STUB 1544config EFI_STUB
1540 bool "EFI stub support" 1545 bool "EFI stub support"
1541 depends on EFI 1546 depends on EFI && !X86_USE_3DNOW
1547 select RELOCATABLE
1542 ---help--- 1548 ---help---
1543 This kernel feature allows a bzImage to be loaded directly 1549 This kernel feature allows a bzImage to be loaded directly
1544 by EFI firmware without the use of a bootloader. 1550 by EFI firmware without the use of a bootloader.
@@ -1579,6 +1585,9 @@ source kernel/Kconfig.hz
1579 1585
1580config KEXEC 1586config KEXEC
1581 bool "kexec system call" 1587 bool "kexec system call"
1588 select BUILD_BIN2C
1589 select CRYPTO
1590 select CRYPTO_SHA256
1582 ---help--- 1591 ---help---
1583 kexec is a system call that implements the ability to shutdown your 1592 kexec is a system call that implements the ability to shutdown your
1584 current kernel, and to start another kernel. It is like a reboot 1593 current kernel, and to start another kernel. It is like a reboot
@@ -1593,6 +1602,28 @@ config KEXEC
1593 interface is strongly in flux, so no good recommendation can be 1602 interface is strongly in flux, so no good recommendation can be
1594 made. 1603 made.
1595 1604
1605config KEXEC_VERIFY_SIG
1606 bool "Verify kernel signature during kexec_file_load() syscall"
1607 depends on KEXEC
1608 ---help---
1609 This option makes kernel signature verification mandatory for
1610 kexec_file_load() syscall. If kernel is signature can not be
1611 verified, kexec_file_load() will fail.
1612
1613 This option enforces signature verification at generic level.
1614 One needs to enable signature verification for type of kernel
1615 image being loaded to make sure it works. For example, enable
1616 bzImage signature verification option to be able to load and
1617 verify signatures of bzImage. Otherwise kernel loading will fail.
1618
1619config KEXEC_BZIMAGE_VERIFY_SIG
1620 bool "Enable bzImage signature verification support"
1621 depends on KEXEC_VERIFY_SIG
1622 depends on SIGNED_PE_FILE_VERIFICATION
1623 select SYSTEM_TRUSTED_KEYRING
1624 ---help---
1625 Enable bzImage signature verification support.
1626
1596config CRASH_DUMP 1627config CRASH_DUMP
1597 bool "kernel crash dumps" 1628 bool "kernel crash dumps"
1598 depends on X86_64 || (X86_32 && HIGHMEM) 1629 depends on X86_64 || (X86_32 && HIGHMEM)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c65fd9650467..c1aa36887843 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -183,6 +183,14 @@ archscripts: scripts_basic
183archheaders: 183archheaders:
184 $(Q)$(MAKE) $(build)=arch/x86/syscalls all 184 $(Q)$(MAKE) $(build)=arch/x86/syscalls all
185 185
186archprepare:
187ifeq ($(CONFIG_KEXEC),y)
188# Build only for 64bit. No loaders for 32bit yet.
189 ifeq ($(CONFIG_X86_64),y)
190 $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
191 endif
192endif
193
186### 194###
187# Kernel objects 195# Kernel objects
188 196
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 3ca9762e1649..3bf000fab0ae 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,6 +5,7 @@ genhdr-y += unistd_64.h
5genhdr-y += unistd_x32.h 5genhdr-y += unistd_x32.h
6 6
7generic-y += clkdev.h 7generic-y += clkdev.h
8generic-y += early_ioremap.h
9generic-y += cputime.h 8generic-y += cputime.h
9generic-y += early_ioremap.h
10generic-y += mcs_spinlock.h 10generic-y += mcs_spinlock.h
11generic-y += scatterlist.h
diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h
index 66873297e9f5..1b010a859b8b 100644
--- a/arch/x86/include/asm/acenv.h
+++ b/arch/x86/include/asm/acenv.h
@@ -18,8 +18,6 @@
18 18
19#define ACPI_FLUSH_CPU_CACHE() wbinvd() 19#define ACPI_FLUSH_CPU_CACHE() wbinvd()
20 20
21#ifdef CONFIG_ACPI
22
23int __acpi_acquire_global_lock(unsigned int *lock); 21int __acpi_acquire_global_lock(unsigned int *lock);
24int __acpi_release_global_lock(unsigned int *lock); 22int __acpi_release_global_lock(unsigned int *lock);
25 23
@@ -44,6 +42,4 @@ int __acpi_release_global_lock(unsigned int *lock);
44 : "=r"(n_hi), "=r"(n_lo) \ 42 : "=r"(n_hi), "=r"(n_lo) \
45 : "0"(n_hi), "1"(n_lo)) 43 : "0"(n_hi), "1"(n_lo))
46 44
47#endif
48
49#endif /* _ASM_X86_ACENV_H */ 45#endif /* _ASM_X86_ACENV_H */
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index e06225eda635..0ab4f9fd2687 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -121,6 +121,11 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
121 buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); 121 buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
122} 122}
123 123
124static inline bool acpi_has_cpu_in_madt(void)
125{
126 return !!acpi_lapic;
127}
128
124#else /* !CONFIG_ACPI */ 129#else /* !CONFIG_ACPI */
125 130
126#define acpi_lapic 0 131#define acpi_lapic 0
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 0a3f9c9f98d5..473bdbee378a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -161,6 +161,20 @@ static inline int alternatives_text_reserved(void *start, void *end)
161 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ 161 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
162 : : "i" (0), ## input) 162 : : "i" (0), ## input)
163 163
164/*
165 * This is similar to alternative_input. But it has two features and
166 * respective instructions.
167 *
168 * If CPU has feature2, newinstr2 is used.
169 * Otherwise, if CPU has feature1, newinstr1 is used.
170 * Otherwise, oldinstr is used.
171 */
172#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \
173 feature2, input...) \
174 asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
175 newinstr2, feature2) \
176 : : "i" (0), ## input)
177
164/* Like alternative_input, but with a single output argument */ 178/* Like alternative_input, but with a single output argument */
165#define alternative_io(oldinstr, newinstr, feature, output, input...) \ 179#define alternative_io(oldinstr, newinstr, feature, output, input...) \
166 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ 180 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 79752f2bdec5..465b309af254 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -85,14 +85,6 @@ static inline bool apic_from_smp_config(void)
85#include <asm/paravirt.h> 85#include <asm/paravirt.h>
86#endif 86#endif
87 87
88#ifdef CONFIG_X86_64
89extern int is_vsmp_box(void);
90#else
91static inline int is_vsmp_box(void)
92{
93 return 0;
94}
95#endif
96extern int setup_profiling_timer(unsigned int); 88extern int setup_profiling_timer(unsigned int);
97 89
98static inline void native_apic_mem_write(u32 reg, u32 v) 90static inline void native_apic_mem_write(u32 reg, u32 v)
@@ -300,7 +292,6 @@ struct apic {
300 292
301 int dest_logical; 293 int dest_logical;
302 unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); 294 unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
303 unsigned long (*check_apicid_present)(int apicid);
304 295
305 void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, 296 void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
306 const struct cpumask *mask); 297 const struct cpumask *mask);
@@ -309,21 +300,11 @@ struct apic {
309 void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); 300 void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
310 301
311 void (*setup_apic_routing)(void); 302 void (*setup_apic_routing)(void);
312 int (*multi_timer_check)(int apic, int irq);
313 int (*cpu_present_to_apicid)(int mps_cpu); 303 int (*cpu_present_to_apicid)(int mps_cpu);
314 void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); 304 void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
315 void (*setup_portio_remap)(void);
316 int (*check_phys_apicid_present)(int phys_apicid); 305 int (*check_phys_apicid_present)(int phys_apicid);
317 void (*enable_apic_mode)(void);
318 int (*phys_pkg_id)(int cpuid_apic, int index_msb); 306 int (*phys_pkg_id)(int cpuid_apic, int index_msb);
319 307
320 /*
321 * When one of the next two hooks returns 1 the apic
322 * is switched to this. Essentially they are additional
323 * probe functions:
324 */
325 int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid);
326
327 unsigned int (*get_apic_id)(unsigned long x); 308 unsigned int (*get_apic_id)(unsigned long x);
328 unsigned long (*set_apic_id)(unsigned int id); 309 unsigned long (*set_apic_id)(unsigned int id);
329 unsigned long apic_id_mask; 310 unsigned long apic_id_mask;
@@ -343,11 +324,7 @@ struct apic {
343 /* wakeup_secondary_cpu */ 324 /* wakeup_secondary_cpu */
344 int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); 325 int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
345 326
346 int trampoline_phys_low;
347 int trampoline_phys_high;
348
349 bool wait_for_init_deassert; 327 bool wait_for_init_deassert;
350 void (*smp_callin_clear_local_apic)(void);
351 void (*inquire_remote_apic)(int apicid); 328 void (*inquire_remote_apic)(int apicid);
352 329
353 /* apic ops */ 330 /* apic ops */
@@ -378,14 +355,6 @@ struct apic {
378 * won't be applied properly during early boot in this case. 355 * won't be applied properly during early boot in this case.
379 */ 356 */
380 int (*x86_32_early_logical_apicid)(int cpu); 357 int (*x86_32_early_logical_apicid)(int cpu);
381
382 /*
383 * Optional method called from setup_local_APIC() after logical
384 * apicid is guaranteed to be known to initialize apicid -> node
385 * mapping if NUMA initialization hasn't done so already. Don't
386 * add new users.
387 */
388 int (*x86_32_numa_cpu_node)(int cpu);
389#endif 358#endif
390}; 359};
391 360
@@ -496,14 +465,12 @@ static inline unsigned default_get_apic_id(unsigned long x)
496} 465}
497 466
498/* 467/*
499 * Warm reset vector default position: 468 * Warm reset vector position:
500 */ 469 */
501#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467 470#define TRAMPOLINE_PHYS_LOW 0x467
502#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 471#define TRAMPOLINE_PHYS_HIGH 0x469
503 472
504#ifdef CONFIG_X86_64 473#ifdef CONFIG_X86_64
505extern int default_acpi_madt_oem_check(char *, char *);
506
507extern void apic_send_IPI_self(int vector); 474extern void apic_send_IPI_self(int vector);
508 475
509DECLARE_PER_CPU(int, x2apic_extra_bits); 476DECLARE_PER_CPU(int, x2apic_extra_bits);
@@ -552,6 +519,8 @@ static inline int default_apic_id_valid(int apicid)
552 return (apicid < 255); 519 return (apicid < 255);
553} 520}
554 521
522extern int default_acpi_madt_oem_check(char *, char *);
523
555extern void default_setup_apic_routing(void); 524extern void default_setup_apic_routing(void);
556 525
557extern struct apic apic_noop; 526extern struct apic apic_noop;
@@ -635,11 +604,6 @@ static inline unsigned long default_check_apicid_used(physid_mask_t *map, int ap
635 return physid_isset(apicid, *map); 604 return physid_isset(apicid, *map);
636} 605}
637 606
638static inline unsigned long default_check_apicid_present(int bit)
639{
640 return physid_isset(bit, phys_cpu_present_map);
641}
642
643static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) 607static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
644{ 608{
645 *retmap = *phys_map; 609 *retmap = *phys_map;
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
new file mode 100644
index 000000000000..f498411f2500
--- /dev/null
+++ b/arch/x86/include/asm/crash.h
@@ -0,0 +1,9 @@
1#ifndef _ASM_X86_CRASH_H
2#define _ASM_X86_CRASH_H
3
4int crash_load_segments(struct kimage *image);
5int crash_copy_backup_region(struct kimage *image);
6int crash_setup_memmap_entries(struct kimage *image,
7 struct boot_params *params);
8
9#endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index e3b85422cf12..412ececa00b9 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -508,9 +508,12 @@ static inline void user_fpu_begin(void)
508 508
509static inline void __save_fpu(struct task_struct *tsk) 509static inline void __save_fpu(struct task_struct *tsk)
510{ 510{
511 if (use_xsave()) 511 if (use_xsave()) {
512 xsave_state(&tsk->thread.fpu.state->xsave, -1); 512 if (unlikely(system_state == SYSTEM_BOOTING))
513 else 513 xsave_state_booting(&tsk->thread.fpu.state->xsave, -1);
514 else
515 xsave_state(&tsk->thread.fpu.state->xsave, -1);
516 } else
514 fpu_fxsave(&tsk->thread.fpu); 517 fpu_fxsave(&tsk->thread.fpu);
515} 518}
516 519
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 230853da4ec0..0f5fb6b6567e 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -40,9 +40,6 @@ typedef struct {
40 40
41DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 41DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
42 42
43/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
44#define MAX_HARDIRQS_PER_CPU NR_VECTORS
45
46#define __ARCH_IRQ_STAT 43#define __ARCH_IRQ_STAT
47 44
48#define inc_irq_stat(member) this_cpu_inc(irq_stat.member) 45#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index a20365953bf8..ccffa53750a8 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -67,4 +67,9 @@ struct legacy_pic {
67extern struct legacy_pic *legacy_pic; 67extern struct legacy_pic *legacy_pic;
68extern struct legacy_pic null_legacy_pic; 68extern struct legacy_pic null_legacy_pic;
69 69
70static inline int nr_legacy_irqs(void)
71{
72 return legacy_pic->nr_legacy_irqs;
73}
74
70#endif /* _ASM_X86_I8259_H */ 75#endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 90f97b4b9347..0aeed5ca356e 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -98,6 +98,8 @@ struct IR_IO_APIC_route_entry {
98#define IOAPIC_AUTO -1 98#define IOAPIC_AUTO -1
99#define IOAPIC_EDGE 0 99#define IOAPIC_EDGE 0
100#define IOAPIC_LEVEL 1 100#define IOAPIC_LEVEL 1
101#define IOAPIC_MAP_ALLOC 0x1
102#define IOAPIC_MAP_CHECK 0x2
101 103
102#ifdef CONFIG_X86_IO_APIC 104#ifdef CONFIG_X86_IO_APIC
103 105
@@ -118,9 +120,6 @@ extern int mp_irq_entries;
118/* MP IRQ source entries */ 120/* MP IRQ source entries */
119extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; 121extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
120 122
121/* non-0 if default (table-less) MP configuration */
122extern int mpc_default_type;
123
124/* Older SiS APIC requires we rewrite the index register */ 123/* Older SiS APIC requires we rewrite the index register */
125extern int sis_apic_bug; 124extern int sis_apic_bug;
126 125
@@ -133,9 +132,6 @@ extern int noioapicquirk;
133/* -1 if "noapic" boot option passed */ 132/* -1 if "noapic" boot option passed */
134extern int noioapicreroute; 133extern int noioapicreroute;
135 134
136/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
137extern int timer_through_8259;
138
139/* 135/*
140 * If we use the IO-APIC for IRQ routing, disable automatic 136 * If we use the IO-APIC for IRQ routing, disable automatic
141 * assignment of PCI IRQ's. 137 * assignment of PCI IRQ's.
@@ -145,24 +141,17 @@ extern int timer_through_8259;
145 141
146struct io_apic_irq_attr; 142struct io_apic_irq_attr;
147struct irq_cfg; 143struct irq_cfg;
148extern int io_apic_set_pci_routing(struct device *dev, int irq,
149 struct io_apic_irq_attr *irq_attr);
150void setup_IO_APIC_irq_extra(u32 gsi);
151extern void ioapic_insert_resources(void); 144extern void ioapic_insert_resources(void);
152 145
153extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, 146extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
154 unsigned int, int, 147 unsigned int, int,
155 struct io_apic_irq_attr *); 148 struct io_apic_irq_attr *);
156extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
157 unsigned int, int,
158 struct io_apic_irq_attr *);
159extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); 149extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
160 150
161extern void native_compose_msi_msg(struct pci_dev *pdev, 151extern void native_compose_msi_msg(struct pci_dev *pdev,
162 unsigned int irq, unsigned int dest, 152 unsigned int irq, unsigned int dest,
163 struct msi_msg *msg, u8 hpet_id); 153 struct msi_msg *msg, u8 hpet_id);
164extern void native_eoi_ioapic_pin(int apic, int pin, int vector); 154extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
165int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
166 155
167extern int save_ioapic_entries(void); 156extern int save_ioapic_entries(void);
168extern void mask_ioapic_entries(void); 157extern void mask_ioapic_entries(void);
@@ -171,15 +160,40 @@ extern int restore_ioapic_entries(void);
171extern void setup_ioapic_ids_from_mpc(void); 160extern void setup_ioapic_ids_from_mpc(void);
172extern void setup_ioapic_ids_from_mpc_nocheck(void); 161extern void setup_ioapic_ids_from_mpc_nocheck(void);
173 162
163enum ioapic_domain_type {
164 IOAPIC_DOMAIN_INVALID,
165 IOAPIC_DOMAIN_LEGACY,
166 IOAPIC_DOMAIN_STRICT,
167 IOAPIC_DOMAIN_DYNAMIC,
168};
169
170struct device_node;
171struct irq_domain;
172struct irq_domain_ops;
173
174struct ioapic_domain_cfg {
175 enum ioapic_domain_type type;
176 const struct irq_domain_ops *ops;
177 struct device_node *dev;
178};
179
174struct mp_ioapic_gsi{ 180struct mp_ioapic_gsi{
175 u32 gsi_base; 181 u32 gsi_base;
176 u32 gsi_end; 182 u32 gsi_end;
177}; 183};
178extern struct mp_ioapic_gsi mp_gsi_routing[];
179extern u32 gsi_top; 184extern u32 gsi_top;
180int mp_find_ioapic(u32 gsi); 185
181int mp_find_ioapic_pin(int ioapic, u32 gsi); 186extern int mp_find_ioapic(u32 gsi);
182void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); 187extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
188extern u32 mp_pin_to_gsi(int ioapic, int pin);
189extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags);
190extern void mp_unmap_irq(int irq);
191extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
192 struct ioapic_domain_cfg *cfg);
193extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
194 irq_hw_number_t hwirq);
195extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq);
196extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node);
183extern void __init pre_init_apic_IRQ0(void); 197extern void __init pre_init_apic_IRQ0(void);
184 198
185extern void mp_save_irq(struct mpc_intsrc *m); 199extern void mp_save_irq(struct mpc_intsrc *m);
@@ -217,14 +231,12 @@ extern void io_apic_eoi(unsigned int apic, unsigned int vector);
217 231
218#define io_apic_assign_pci_irqs 0 232#define io_apic_assign_pci_irqs 0
219#define setup_ioapic_ids_from_mpc x86_init_noop 233#define setup_ioapic_ids_from_mpc x86_init_noop
220static const int timer_through_8259 = 0;
221static inline void ioapic_insert_resources(void) { } 234static inline void ioapic_insert_resources(void) { }
222#define gsi_top (NR_IRQS_LEGACY) 235#define gsi_top (NR_IRQS_LEGACY)
223static inline int mp_find_ioapic(u32 gsi) { return 0; } 236static inline int mp_find_ioapic(u32 gsi) { return 0; }
224 237static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; }
225struct io_apic_irq_attr; 238static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; }
226static inline int io_apic_set_pci_routing(struct device *dev, int irq, 239static inline void mp_unmap_irq(int irq) { }
227 struct io_apic_irq_attr *irq_attr) { return 0; }
228 240
229static inline int save_ioapic_entries(void) 241static inline int save_ioapic_entries(void)
230{ 242{
diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h
new file mode 100644
index 000000000000..d1b5d194e31d
--- /dev/null
+++ b/arch/x86/include/asm/kexec-bzimage64.h
@@ -0,0 +1,6 @@
1#ifndef _ASM_KEXEC_BZIMAGE64_H
2#define _ASM_KEXEC_BZIMAGE64_H
3
4extern struct kexec_file_ops kexec_bzImage64_ops;
5
6#endif /* _ASM_KEXE_BZIMAGE64_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 17483a492f18..d2434c1cad05 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -23,6 +23,9 @@
23 23
24#include <asm/page.h> 24#include <asm/page.h>
25#include <asm/ptrace.h> 25#include <asm/ptrace.h>
26#include <asm/bootparam.h>
27
28struct kimage;
26 29
27/* 30/*
28 * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. 31 * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
@@ -61,6 +64,10 @@
61# define KEXEC_ARCH KEXEC_ARCH_X86_64 64# define KEXEC_ARCH KEXEC_ARCH_X86_64
62#endif 65#endif
63 66
67/* Memory to backup during crash kdump */
68#define KEXEC_BACKUP_SRC_START (0UL)
69#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */
70
64/* 71/*
65 * CPU does not save ss and sp on stack if execution is already 72 * CPU does not save ss and sp on stack if execution is already
66 * running in kernel mode at the time of NMI occurrence. This code 73 * running in kernel mode at the time of NMI occurrence. This code
@@ -160,6 +167,44 @@ struct kimage_arch {
160 pud_t *pud; 167 pud_t *pud;
161 pmd_t *pmd; 168 pmd_t *pmd;
162 pte_t *pte; 169 pte_t *pte;
170 /* Details of backup region */
171 unsigned long backup_src_start;
172 unsigned long backup_src_sz;
173
174 /* Physical address of backup segment */
175 unsigned long backup_load_addr;
176
177 /* Core ELF header buffer */
178 void *elf_headers;
179 unsigned long elf_headers_sz;
180 unsigned long elf_load_addr;
181};
182#endif /* CONFIG_X86_32 */
183
184#ifdef CONFIG_X86_64
185/*
186 * Number of elements and order of elements in this structure should match
187 * with the ones in arch/x86/purgatory/entry64.S. If you make a change here
188 * make an appropriate change in purgatory too.
189 */
190struct kexec_entry64_regs {
191 uint64_t rax;
192 uint64_t rcx;
193 uint64_t rdx;
194 uint64_t rbx;
195 uint64_t rsp;
196 uint64_t rbp;
197 uint64_t rsi;
198 uint64_t rdi;
199 uint64_t r8;
200 uint64_t r9;
201 uint64_t r10;
202 uint64_t r11;
203 uint64_t r12;
204 uint64_t r13;
205 uint64_t r14;
206 uint64_t r15;
207 uint64_t rip;
163}; 208};
164#endif 209#endif
165 210
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index f5a617956735..b07233b64578 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -40,8 +40,6 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES];
40extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); 40extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
41 41
42extern unsigned int boot_cpu_physical_apicid; 42extern unsigned int boot_cpu_physical_apicid;
43extern unsigned int max_physical_apicid;
44extern int mpc_default_type;
45extern unsigned long mp_lapic_addr; 43extern unsigned long mp_lapic_addr;
46 44
47#ifdef CONFIG_X86_LOCAL_APIC 45#ifdef CONFIG_X86_LOCAL_APIC
@@ -88,15 +86,6 @@ static inline void early_reserve_e820_mpc_new(void) { }
88#endif 86#endif
89 87
90int generic_processor_info(int apicid, int version); 88int generic_processor_info(int apicid, int version);
91#ifdef CONFIG_ACPI
92extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
93extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
94 u32 gsi);
95extern void mp_config_acpi_legacy_irqs(void);
96struct device;
97extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
98 int active_high_low);
99#endif /* CONFIG_ACPI */
100 89
101#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) 90#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC)
102 91
@@ -161,8 +150,4 @@ static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
161 150
162extern physid_mask_t phys_cpu_present_map; 151extern physid_mask_t phys_cpu_present_map;
163 152
164extern int generic_mps_oem_check(struct mpc_table *, char *, char *);
165
166extern int default_acpi_madt_oem_check(char *, char *);
167
168#endif /* _ASM_X86_MPSPEC_H */ 153#endif /* _ASM_X86_MPSPEC_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 775873d3be55..802dde30c928 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -70,7 +70,6 @@ extern bool __virt_addr_valid(unsigned long kaddr);
70#include <asm-generic/memory_model.h> 70#include <asm-generic/memory_model.h>
71#include <asm-generic/getorder.h> 71#include <asm-generic/getorder.h>
72 72
73#define __HAVE_ARCH_GATE_AREA 1
74#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 73#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
75 74
76#endif /* __KERNEL__ */ 75#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 0f1ddee6a0ce..f408caf73430 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -39,4 +39,6 @@ void copy_page(void *to, void *from);
39 39
40#endif /* !__ASSEMBLY__ */ 40#endif /* !__ASSEMBLY__ */
41 41
42#define __HAVE_ARCH_GATE_AREA 1
43
42#endif /* _ASM_X86_PAGE_64_H */ 44#endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h
new file mode 100644
index 000000000000..0a4e140315b6
--- /dev/null
+++ b/arch/x86/include/asm/platform_sst_audio.h
@@ -0,0 +1,78 @@
1/*
2 * platform_sst_audio.h: sst audio platform data header file
3 *
4 * Copyright (C) 2012-14 Intel Corporation
5 * Author: Jeeja KP <jeeja.kp@intel.com>
6 * Omair Mohammed Abdullah <omair.m.abdullah@intel.com>
7 * Vinod Koul ,vinod.koul@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; version 2
12 * of the License.
13 */
14#ifndef _PLATFORM_SST_AUDIO_H_
15#define _PLATFORM_SST_AUDIO_H_
16
17#include <linux/sfi.h>
18
19enum sst_audio_task_id_mrfld {
20 SST_TASK_ID_NONE = 0,
21 SST_TASK_ID_SBA = 1,
22 SST_TASK_ID_MEDIA = 3,
23 SST_TASK_ID_MAX = SST_TASK_ID_MEDIA,
24};
25
26/* Device IDs for Merrifield are Pipe IDs,
27 * ref: DSP spec v0.75 */
28enum sst_audio_device_id_mrfld {
29 /* Output pipeline IDs */
30 PIPE_ID_OUT_START = 0x0,
31 PIPE_CODEC_OUT0 = 0x2,
32 PIPE_CODEC_OUT1 = 0x3,
33 PIPE_SPROT_LOOP_OUT = 0x4,
34 PIPE_MEDIA_LOOP1_OUT = 0x5,
35 PIPE_MEDIA_LOOP2_OUT = 0x6,
36 PIPE_VOIP_OUT = 0xC,
37 PIPE_PCM0_OUT = 0xD,
38 PIPE_PCM1_OUT = 0xE,
39 PIPE_PCM2_OUT = 0xF,
40 PIPE_MEDIA0_OUT = 0x12,
41 PIPE_MEDIA1_OUT = 0x13,
42/* Input Pipeline IDs */
43 PIPE_ID_IN_START = 0x80,
44 PIPE_CODEC_IN0 = 0x82,
45 PIPE_CODEC_IN1 = 0x83,
46 PIPE_SPROT_LOOP_IN = 0x84,
47 PIPE_MEDIA_LOOP1_IN = 0x85,
48 PIPE_MEDIA_LOOP2_IN = 0x86,
49 PIPE_VOIP_IN = 0x8C,
50 PIPE_PCM0_IN = 0x8D,
51 PIPE_PCM1_IN = 0x8E,
52 PIPE_MEDIA0_IN = 0x8F,
53 PIPE_MEDIA1_IN = 0x90,
54 PIPE_MEDIA2_IN = 0x91,
55 PIPE_RSVD = 0xFF,
56};
57
58/* The stream map for each platform consists of an array of the below
59 * stream map structure.
60 */
61struct sst_dev_stream_map {
62 u8 dev_num; /* device id */
63 u8 subdev_num; /* substream */
64 u8 direction;
65 u8 device_id; /* fw id */
66 u8 task_id; /* fw task */
67 u8 status;
68};
69
70struct sst_platform_data {
71 /* Intel software platform id*/
72 struct sst_dev_stream_map *pdev_strm_map;
73 unsigned int strm_map_size;
74};
75
76int add_sst_platform_device(void);
77#endif
78
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ee30b9f0b91c..eb71ec794732 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -385,8 +385,8 @@ struct bndcsr_struct {
385 385
386struct xsave_hdr_struct { 386struct xsave_hdr_struct {
387 u64 xstate_bv; 387 u64 xstate_bv;
388 u64 reserved1[2]; 388 u64 xcomp_bv;
389 u64 reserved2[5]; 389 u64 reserved[6];
390} __attribute__((packed)); 390} __attribute__((packed));
391 391
392struct xsave_struct { 392struct xsave_struct {
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index fbeb06ed0eaa..1d081ac1cd69 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -26,12 +26,10 @@
26extern int of_ioapic; 26extern int of_ioapic;
27extern u64 initial_dtb; 27extern u64 initial_dtb;
28extern void add_dtb(u64 data); 28extern void add_dtb(u64 data);
29extern void x86_add_irq_domains(void);
30void x86_of_pci_init(void); 29void x86_of_pci_init(void);
31void x86_dtb_init(void); 30void x86_dtb_init(void);
32#else 31#else
33static inline void add_dtb(u64 data) { } 32static inline void add_dtb(u64 data) { }
34static inline void x86_add_irq_domains(void) { }
35static inline void x86_of_pci_init(void) { } 33static inline void x86_of_pci_init(void) { }
36static inline void x86_dtb_init(void) { } 34static inline void x86_dtb_init(void) { }
37#define of_ioapic 0 35#define of_ioapic 0
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
deleted file mode 100644
index 4240878b9d76..000000000000
--- a/arch/x86/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef _ASM_X86_SCATTERLIST_H
2#define _ASM_X86_SCATTERLIST_H
3
4#include <asm-generic/scatterlist.h>
5
6#define ARCH_HAS_SG_CHAIN
7
8#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 49adfd7bb4a4..0da7409f0bec 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -17,11 +17,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
17 spin_unlock_irqrestore(&rtc_lock, flags); 17 spin_unlock_irqrestore(&rtc_lock, flags);
18 local_flush_tlb(); 18 local_flush_tlb();
19 pr_debug("1.\n"); 19 pr_debug("1.\n");
20 *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_high)) = 20 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
21 start_eip >> 4; 21 start_eip >> 4;
22 pr_debug("2.\n"); 22 pr_debug("2.\n");
23 *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_low)) = 23 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
24 start_eip & 0xf; 24 start_eip & 0xf;
25 pr_debug("3.\n"); 25 pr_debug("3.\n");
26} 26}
27 27
@@ -42,7 +42,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
42 CMOS_WRITE(0, 0xf); 42 CMOS_WRITE(0, 0xf);
43 spin_unlock_irqrestore(&rtc_lock, flags); 43 spin_unlock_irqrestore(&rtc_lock, flags);
44 44
45 *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0; 45 *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
46} 46}
47 47
48static inline void __init smpboot_setup_io_apic(void) 48static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index d949ef28c48b..7e7a79ada658 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -52,24 +52,170 @@ extern void xsave_init(void);
52extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); 52extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
53extern int init_fpu(struct task_struct *child); 53extern int init_fpu(struct task_struct *child);
54 54
55static inline int fpu_xrstor_checking(struct xsave_struct *fx) 55/* These macros all use (%edi)/(%rdi) as the single memory argument. */
56#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
57#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
58#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
59#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
60#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
61
62#define xstate_fault ".section .fixup,\"ax\"\n" \
63 "3: movl $-1,%[err]\n" \
64 " jmp 2b\n" \
65 ".previous\n" \
66 _ASM_EXTABLE(1b, 3b) \
67 : [err] "=r" (err)
68
69/*
70 * This function is called only during boot time when x86 caps are not set
71 * up and alternative can not be used yet.
72 */
73static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
56{ 74{
57 int err; 75 u32 lmask = mask;
76 u32 hmask = mask >> 32;
77 int err = 0;
78
79 WARN_ON(system_state != SYSTEM_BOOTING);
80
81 if (boot_cpu_has(X86_FEATURE_XSAVES))
82 asm volatile("1:"XSAVES"\n\t"
83 "2:\n\t"
84 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
85 : "memory");
86 else
87 asm volatile("1:"XSAVE"\n\t"
88 "2:\n\t"
89 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
90 : "memory");
91
92 asm volatile(xstate_fault
93 : "0" (0)
94 : "memory");
95
96 return err;
97}
98
99/*
100 * This function is called only during boot time when x86 caps are not set
101 * up and alternative can not be used yet.
102 */
103static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
104{
105 u32 lmask = mask;
106 u32 hmask = mask >> 32;
107 int err = 0;
108
109 WARN_ON(system_state != SYSTEM_BOOTING);
58 110
59 asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" 111 if (boot_cpu_has(X86_FEATURE_XSAVES))
60 "2:\n" 112 asm volatile("1:"XRSTORS"\n\t"
61 ".section .fixup,\"ax\"\n" 113 "2:\n\t"
62 "3: movl $-1,%[err]\n" 114 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
63 " jmp 2b\n" 115 : "memory");
64 ".previous\n" 116 else
65 _ASM_EXTABLE(1b, 3b) 117 asm volatile("1:"XRSTOR"\n\t"
66 : [err] "=r" (err) 118 "2:\n\t"
67 : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) 119 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
120 : "memory");
121
122 asm volatile(xstate_fault
123 : "0" (0)
124 : "memory");
125
126 return err;
127}
128
129/*
130 * Save processor xstate to xsave area.
131 */
132static inline int xsave_state(struct xsave_struct *fx, u64 mask)
133{
134 u32 lmask = mask;
135 u32 hmask = mask >> 32;
136 int err = 0;
137
138 /*
139 * If xsaves is enabled, xsaves replaces xsaveopt because
140 * it supports compact format and supervisor states in addition to
141 * modified optimization in xsaveopt.
142 *
143 * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
144 * because xsaveopt supports modified optimization which is not
145 * supported by xsave.
146 *
147 * If none of xsaves and xsaveopt is enabled, use xsave.
148 */
149 alternative_input_2(
150 "1:"XSAVE,
151 "1:"XSAVEOPT,
152 X86_FEATURE_XSAVEOPT,
153 "1:"XSAVES,
154 X86_FEATURE_XSAVES,
155 [fx] "D" (fx), "a" (lmask), "d" (hmask) :
156 "memory");
157 asm volatile("2:\n\t"
158 xstate_fault
159 : "0" (0)
68 : "memory"); 160 : "memory");
69 161
70 return err; 162 return err;
71} 163}
72 164
165/*
166 * Restore processor xstate from xsave area.
167 */
168static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
169{
170 int err = 0;
171 u32 lmask = mask;
172 u32 hmask = mask >> 32;
173
174 /*
175 * Use xrstors to restore context if it is enabled. xrstors supports
176 * compacted format of xsave area which is not supported by xrstor.
177 */
178 alternative_input(
179 "1: " XRSTOR,
180 "1: " XRSTORS,
181 X86_FEATURE_XSAVES,
182 "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
183 : "memory");
184
185 asm volatile("2:\n"
186 xstate_fault
187 : "0" (0)
188 : "memory");
189
190 return err;
191}
192
193/*
194 * Save xstate context for old process during context switch.
195 */
196static inline void fpu_xsave(struct fpu *fpu)
197{
198 xsave_state(&fpu->state->xsave, -1);
199}
200
201/*
202 * Restore xstate context for new process during context switch.
203 */
204static inline int fpu_xrstor_checking(struct xsave_struct *fx)
205{
206 return xrstor_state(fx, -1);
207}
208
209/*
210 * Save xstate to user space xsave area.
211 *
212 * We don't use modified optimization because xrstor/xrstors might track
213 * a different application.
214 *
215 * We don't use compacted format xsave area for
216 * backward compatibility for old applications which don't understand
217 * compacted format of xsave area.
218 */
73static inline int xsave_user(struct xsave_struct __user *buf) 219static inline int xsave_user(struct xsave_struct __user *buf)
74{ 220{
75 int err; 221 int err;
@@ -83,69 +229,34 @@ static inline int xsave_user(struct xsave_struct __user *buf)
83 return -EFAULT; 229 return -EFAULT;
84 230
85 __asm__ __volatile__(ASM_STAC "\n" 231 __asm__ __volatile__(ASM_STAC "\n"
86 "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" 232 "1:"XSAVE"\n"
87 "2: " ASM_CLAC "\n" 233 "2: " ASM_CLAC "\n"
88 ".section .fixup,\"ax\"\n" 234 xstate_fault
89 "3: movl $-1,%[err]\n"
90 " jmp 2b\n"
91 ".previous\n"
92 _ASM_EXTABLE(1b,3b)
93 : [err] "=r" (err)
94 : "D" (buf), "a" (-1), "d" (-1), "0" (0) 235 : "D" (buf), "a" (-1), "d" (-1), "0" (0)
95 : "memory"); 236 : "memory");
96 return err; 237 return err;
97} 238}
98 239
240/*
241 * Restore xstate from user space xsave area.
242 */
99static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) 243static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
100{ 244{
101 int err; 245 int err = 0;
102 struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); 246 struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
103 u32 lmask = mask; 247 u32 lmask = mask;
104 u32 hmask = mask >> 32; 248 u32 hmask = mask >> 32;
105 249
106 __asm__ __volatile__(ASM_STAC "\n" 250 __asm__ __volatile__(ASM_STAC "\n"
107 "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" 251 "1:"XRSTOR"\n"
108 "2: " ASM_CLAC "\n" 252 "2: " ASM_CLAC "\n"
109 ".section .fixup,\"ax\"\n" 253 xstate_fault
110 "3: movl $-1,%[err]\n"
111 " jmp 2b\n"
112 ".previous\n"
113 _ASM_EXTABLE(1b,3b)
114 : [err] "=r" (err)
115 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) 254 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
116 : "memory"); /* memory required? */ 255 : "memory"); /* memory required? */
117 return err; 256 return err;
118} 257}
119 258
120static inline void xrstor_state(struct xsave_struct *fx, u64 mask) 259void *get_xsave_addr(struct xsave_struct *xsave, int xstate);
121{ 260void setup_xstate_comp(void);
122 u32 lmask = mask;
123 u32 hmask = mask >> 32;
124
125 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
126 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
127 : "memory");
128}
129
130static inline void xsave_state(struct xsave_struct *fx, u64 mask)
131{
132 u32 lmask = mask;
133 u32 hmask = mask >> 32;
134 261
135 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
136 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
137 : "memory");
138}
139
140static inline void fpu_xsave(struct fpu *fpu)
141{
142 /* This, however, we can work around by forcing the compiler to select
143 an addressing mode that doesn't require extended registers. */
144 alternative_input(
145 ".byte " REX_PREFIX "0x0f,0xae,0x27",
146 ".byte " REX_PREFIX "0x0f,0xae,0x37",
147 X86_FEATURE_XSAVEOPT,
148 [fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) :
149 "memory");
150}
151#endif 262#endif
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index eac9e92fe181..e21331ce368f 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -149,6 +149,9 @@
149 149
150#define MSR_CORE_C1_RES 0x00000660 150#define MSR_CORE_C1_RES 0x00000660
151 151
152#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
153#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
154
152#define MSR_AMD64_MC0_MASK 0xc0010044 155#define MSR_AMD64_MC0_MASK 0xc0010044
153 156
154#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) 157#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bde3993624f1..b5ea75c4a4b4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -118,4 +118,5 @@ ifeq ($(CONFIG_X86_64),y)
118 118
119 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o 119 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
120 obj-y += vsmp_64.o 120 obj-y += vsmp_64.o
121 obj-$(CONFIG_KEXEC) += kexec-bzimage64.o
121endif 122endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a531f6564ed0..b436fc735aa4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -31,6 +31,7 @@
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/dmi.h> 32#include <linux/dmi.h>
33#include <linux/irq.h> 33#include <linux/irq.h>
34#include <linux/irqdomain.h>
34#include <linux/slab.h> 35#include <linux/slab.h>
35#include <linux/bootmem.h> 36#include <linux/bootmem.h>
36#include <linux/ioport.h> 37#include <linux/ioport.h>
@@ -43,6 +44,7 @@
43#include <asm/io.h> 44#include <asm/io.h>
44#include <asm/mpspec.h> 45#include <asm/mpspec.h>
45#include <asm/smp.h> 46#include <asm/smp.h>
47#include <asm/i8259.h>
46 48
47#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */ 49#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
48static int __initdata acpi_force = 0; 50static int __initdata acpi_force = 0;
@@ -93,44 +95,7 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = {
93 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 95 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
94}; 96};
95 97
96static unsigned int gsi_to_irq(unsigned int gsi) 98#define ACPI_INVALID_GSI INT_MIN
97{
98 unsigned int irq = gsi + NR_IRQS_LEGACY;
99 unsigned int i;
100
101 for (i = 0; i < NR_IRQS_LEGACY; i++) {
102 if (isa_irq_to_gsi[i] == gsi) {
103 return i;
104 }
105 }
106
107 /* Provide an identity mapping of gsi == irq
108 * except on truly weird platforms that have
109 * non isa irqs in the first 16 gsis.
110 */
111 if (gsi >= NR_IRQS_LEGACY)
112 irq = gsi;
113 else
114 irq = gsi_top + gsi;
115
116 return irq;
117}
118
119static u32 irq_to_gsi(int irq)
120{
121 unsigned int gsi;
122
123 if (irq < NR_IRQS_LEGACY)
124 gsi = isa_irq_to_gsi[irq];
125 else if (irq < gsi_top)
126 gsi = irq;
127 else if (irq < (gsi_top + NR_IRQS_LEGACY))
128 gsi = irq - gsi_top;
129 else
130 gsi = 0xffffffff;
131
132 return gsi;
133}
134 99
135/* 100/*
136 * This is just a simple wrapper around early_ioremap(), 101 * This is just a simple wrapper around early_ioremap(),
@@ -341,11 +306,145 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
341#endif /*CONFIG_X86_LOCAL_APIC */ 306#endif /*CONFIG_X86_LOCAL_APIC */
342 307
343#ifdef CONFIG_X86_IO_APIC 308#ifdef CONFIG_X86_IO_APIC
309#define MP_ISA_BUS 0
310
311static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
312 u32 gsi)
313{
314 int ioapic;
315 int pin;
316 struct mpc_intsrc mp_irq;
317
318 /*
319 * Convert 'gsi' to 'ioapic.pin'.
320 */
321 ioapic = mp_find_ioapic(gsi);
322 if (ioapic < 0)
323 return;
324 pin = mp_find_ioapic_pin(ioapic, gsi);
325
326 /*
327 * TBD: This check is for faulty timer entries, where the override
328 * erroneously sets the trigger to level, resulting in a HUGE
329 * increase of timer interrupts!
330 */
331 if ((bus_irq == 0) && (trigger == 3))
332 trigger = 1;
333
334 mp_irq.type = MP_INTSRC;
335 mp_irq.irqtype = mp_INT;
336 mp_irq.irqflag = (trigger << 2) | polarity;
337 mp_irq.srcbus = MP_ISA_BUS;
338 mp_irq.srcbusirq = bus_irq; /* IRQ */
339 mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
340 mp_irq.dstirq = pin; /* INTIN# */
341
342 mp_save_irq(&mp_irq);
343
344 /*
345 * Reset default identity mapping if gsi is also an legacy IRQ,
346 * otherwise there will be more than one entry with the same GSI
347 * and acpi_isa_irq_to_gsi() may give wrong result.
348 */
349 if (gsi < nr_legacy_irqs() && isa_irq_to_gsi[gsi] == gsi)
350 isa_irq_to_gsi[gsi] = ACPI_INVALID_GSI;
351 isa_irq_to_gsi[bus_irq] = gsi;
352}
353
354static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
355 int polarity)
356{
357#ifdef CONFIG_X86_MPPARSE
358 struct mpc_intsrc mp_irq;
359 struct pci_dev *pdev;
360 unsigned char number;
361 unsigned int devfn;
362 int ioapic;
363 u8 pin;
364
365 if (!acpi_ioapic)
366 return 0;
367 if (!dev || !dev_is_pci(dev))
368 return 0;
369
370 pdev = to_pci_dev(dev);
371 number = pdev->bus->number;
372 devfn = pdev->devfn;
373 pin = pdev->pin;
374 /* print the entry should happen on mptable identically */
375 mp_irq.type = MP_INTSRC;
376 mp_irq.irqtype = mp_INT;
377 mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
378 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
379 mp_irq.srcbus = number;
380 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
381 ioapic = mp_find_ioapic(gsi);
382 mp_irq.dstapic = mpc_ioapic_id(ioapic);
383 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
384
385 mp_save_irq(&mp_irq);
386#endif
387 return 0;
388}
389
390static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
391 int polarity)
392{
393 int irq, node;
394
395 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
396 return gsi;
397
398 /* Don't set up the ACPI SCI because it's already set up */
399 if (acpi_gbl_FADT.sci_interrupt == gsi)
400 return gsi;
401
402 trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
403 polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
404 node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
405 if (mp_set_gsi_attr(gsi, trigger, polarity, node)) {
406 pr_warn("Failed to set pin attr for GSI%d\n", gsi);
407 return -1;
408 }
409
410 irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
411 if (irq < 0)
412 return irq;
413
414 if (enable_update_mptable)
415 mp_config_acpi_gsi(dev, gsi, trigger, polarity);
416
417 return irq;
418}
419
420static void mp_unregister_gsi(u32 gsi)
421{
422 int irq;
423
424 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
425 return;
426
427 if (acpi_gbl_FADT.sci_interrupt == gsi)
428 return;
429
430 irq = mp_map_gsi_to_irq(gsi, 0);
431 if (irq > 0)
432 mp_unmap_irq(irq);
433}
434
435static struct irq_domain_ops acpi_irqdomain_ops = {
436 .map = mp_irqdomain_map,
437 .unmap = mp_irqdomain_unmap,
438};
344 439
345static int __init 440static int __init
346acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) 441acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
347{ 442{
348 struct acpi_madt_io_apic *ioapic = NULL; 443 struct acpi_madt_io_apic *ioapic = NULL;
444 struct ioapic_domain_cfg cfg = {
445 .type = IOAPIC_DOMAIN_DYNAMIC,
446 .ops = &acpi_irqdomain_ops,
447 };
349 448
350 ioapic = (struct acpi_madt_io_apic *)header; 449 ioapic = (struct acpi_madt_io_apic *)header;
351 450
@@ -354,8 +453,12 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
354 453
355 acpi_table_print_madt_entry(header); 454 acpi_table_print_madt_entry(header);
356 455
357 mp_register_ioapic(ioapic->id, 456 /* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */
358 ioapic->address, ioapic->global_irq_base); 457 if (ioapic->global_irq_base < nr_legacy_irqs())
458 cfg.type = IOAPIC_DOMAIN_LEGACY;
459
460 mp_register_ioapic(ioapic->id, ioapic->address, ioapic->global_irq_base,
461 &cfg);
359 462
360 return 0; 463 return 0;
361} 464}
@@ -378,11 +481,6 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
378 if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) 481 if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
379 polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; 482 polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
380 483
381 /*
382 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
383 * If GSI is < 16, this will update its flags,
384 * else it will create a new mp_irqs[] entry.
385 */
386 mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); 484 mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
387 485
388 /* 486 /*
@@ -504,25 +602,28 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
504 outb(new >> 8, 0x4d1); 602 outb(new >> 8, 0x4d1);
505} 603}
506 604
507int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) 605int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
508{ 606{
509 *irq = gsi_to_irq(gsi); 607 int irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
510 608
511#ifdef CONFIG_X86_IO_APIC 609 if (irq >= 0) {
512 if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) 610 *irqp = irq;
513 setup_IO_APIC_irq_extra(gsi); 611 return 0;
514#endif 612 }
515 613
516 return 0; 614 return -1;
517} 615}
518EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); 616EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
519 617
520int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) 618int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
521{ 619{
522 if (isa_irq >= 16) 620 if (isa_irq < nr_legacy_irqs() &&
523 return -1; 621 isa_irq_to_gsi[isa_irq] != ACPI_INVALID_GSI) {
524 *gsi = irq_to_gsi(isa_irq); 622 *gsi = isa_irq_to_gsi[isa_irq];
525 return 0; 623 return 0;
624 }
625
626 return -1;
526} 627}
527 628
528static int acpi_register_gsi_pic(struct device *dev, u32 gsi, 629static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
@@ -542,15 +643,25 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
542static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, 643static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
543 int trigger, int polarity) 644 int trigger, int polarity)
544{ 645{
646 int irq = gsi;
647
545#ifdef CONFIG_X86_IO_APIC 648#ifdef CONFIG_X86_IO_APIC
546 gsi = mp_register_gsi(dev, gsi, trigger, polarity); 649 irq = mp_register_gsi(dev, gsi, trigger, polarity);
547#endif 650#endif
548 651
549 return gsi; 652 return irq;
653}
654
655static void acpi_unregister_gsi_ioapic(u32 gsi)
656{
657#ifdef CONFIG_X86_IO_APIC
658 mp_unregister_gsi(gsi);
659#endif
550} 660}
551 661
552int (*__acpi_register_gsi)(struct device *dev, u32 gsi, 662int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
553 int trigger, int polarity) = acpi_register_gsi_pic; 663 int trigger, int polarity) = acpi_register_gsi_pic;
664void (*__acpi_unregister_gsi)(u32 gsi) = NULL;
554 665
555#ifdef CONFIG_ACPI_SLEEP 666#ifdef CONFIG_ACPI_SLEEP
556int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel; 667int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel;
@@ -564,32 +675,22 @@ int (*acpi_suspend_lowlevel)(void);
564 */ 675 */
565int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) 676int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
566{ 677{
567 unsigned int irq; 678 return __acpi_register_gsi(dev, gsi, trigger, polarity);
568 unsigned int plat_gsi = gsi;
569
570 plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity);
571 irq = gsi_to_irq(plat_gsi);
572
573 return irq;
574} 679}
575EXPORT_SYMBOL_GPL(acpi_register_gsi); 680EXPORT_SYMBOL_GPL(acpi_register_gsi);
576 681
577void acpi_unregister_gsi(u32 gsi) 682void acpi_unregister_gsi(u32 gsi)
578{ 683{
684 if (__acpi_unregister_gsi)
685 __acpi_unregister_gsi(gsi);
579} 686}
580EXPORT_SYMBOL_GPL(acpi_unregister_gsi); 687EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
581 688
582void __init acpi_set_irq_model_pic(void) 689static void __init acpi_set_irq_model_ioapic(void)
583{
584 acpi_irq_model = ACPI_IRQ_MODEL_PIC;
585 __acpi_register_gsi = acpi_register_gsi_pic;
586 acpi_ioapic = 0;
587}
588
589void __init acpi_set_irq_model_ioapic(void)
590{ 690{
591 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; 691 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
592 __acpi_register_gsi = acpi_register_gsi_ioapic; 692 __acpi_register_gsi = acpi_register_gsi_ioapic;
693 __acpi_unregister_gsi = acpi_unregister_gsi_ioapic;
593 acpi_ioapic = 1; 694 acpi_ioapic = 1;
594} 695}
595 696
@@ -825,9 +926,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
825 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). 926 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
826 */ 927 */
827 928
828 count = 929 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
829 acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, 930 acpi_parse_lapic_addr_ovr, 0);
830 acpi_parse_lapic_addr_ovr, 0);
831 if (count < 0) { 931 if (count < 0) {
832 printk(KERN_ERR PREFIX 932 printk(KERN_ERR PREFIX
833 "Error parsing LAPIC address override entry\n"); 933 "Error parsing LAPIC address override entry\n");
@@ -852,9 +952,8 @@ static int __init acpi_parse_madt_lapic_entries(void)
852 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). 952 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
853 */ 953 */
854 954
855 count = 955 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
856 acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, 956 acpi_parse_lapic_addr_ovr, 0);
857 acpi_parse_lapic_addr_ovr, 0);
858 if (count < 0) { 957 if (count < 0) {
859 printk(KERN_ERR PREFIX 958 printk(KERN_ERR PREFIX
860 "Error parsing LAPIC address override entry\n"); 959 "Error parsing LAPIC address override entry\n");
@@ -882,11 +981,10 @@ static int __init acpi_parse_madt_lapic_entries(void)
882 return count; 981 return count;
883 } 982 }
884 983
885 x2count = 984 x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
886 acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI, 985 acpi_parse_x2apic_nmi, 0);
887 acpi_parse_x2apic_nmi, 0); 986 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
888 count = 987 acpi_parse_lapic_nmi, 0);
889 acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0);
890 if (count < 0 || x2count < 0) { 988 if (count < 0 || x2count < 0) {
891 printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); 989 printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
892 /* TBD: Cleanup to allow fallback to MPS */ 990 /* TBD: Cleanup to allow fallback to MPS */
@@ -897,44 +995,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
897#endif /* CONFIG_X86_LOCAL_APIC */ 995#endif /* CONFIG_X86_LOCAL_APIC */
898 996
899#ifdef CONFIG_X86_IO_APIC 997#ifdef CONFIG_X86_IO_APIC
900#define MP_ISA_BUS 0 998static void __init mp_config_acpi_legacy_irqs(void)
901
902void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
903{
904 int ioapic;
905 int pin;
906 struct mpc_intsrc mp_irq;
907
908 /*
909 * Convert 'gsi' to 'ioapic.pin'.
910 */
911 ioapic = mp_find_ioapic(gsi);
912 if (ioapic < 0)
913 return;
914 pin = mp_find_ioapic_pin(ioapic, gsi);
915
916 /*
917 * TBD: This check is for faulty timer entries, where the override
918 * erroneously sets the trigger to level, resulting in a HUGE
919 * increase of timer interrupts!
920 */
921 if ((bus_irq == 0) && (trigger == 3))
922 trigger = 1;
923
924 mp_irq.type = MP_INTSRC;
925 mp_irq.irqtype = mp_INT;
926 mp_irq.irqflag = (trigger << 2) | polarity;
927 mp_irq.srcbus = MP_ISA_BUS;
928 mp_irq.srcbusirq = bus_irq; /* IRQ */
929 mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
930 mp_irq.dstirq = pin; /* INTIN# */
931
932 mp_save_irq(&mp_irq);
933
934 isa_irq_to_gsi[bus_irq] = gsi;
935}
936
937void __init mp_config_acpi_legacy_irqs(void)
938{ 999{
939 int i; 1000 int i;
940 struct mpc_intsrc mp_irq; 1001 struct mpc_intsrc mp_irq;
@@ -952,7 +1013,7 @@ void __init mp_config_acpi_legacy_irqs(void)
952 * Use the default configuration for the IRQs 0-15. Unless 1013 * Use the default configuration for the IRQs 0-15. Unless
953 * overridden by (MADT) interrupt source override entries. 1014 * overridden by (MADT) interrupt source override entries.
954 */ 1015 */
955 for (i = 0; i < 16; i++) { 1016 for (i = 0; i < nr_legacy_irqs(); i++) {
956 int ioapic, pin; 1017 int ioapic, pin;
957 unsigned int dstapic; 1018 unsigned int dstapic;
958 int idx; 1019 int idx;
@@ -1000,84 +1061,6 @@ void __init mp_config_acpi_legacy_irqs(void)
1000 } 1061 }
1001} 1062}
1002 1063
1003static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
1004 int polarity)
1005{
1006#ifdef CONFIG_X86_MPPARSE
1007 struct mpc_intsrc mp_irq;
1008 struct pci_dev *pdev;
1009 unsigned char number;
1010 unsigned int devfn;
1011 int ioapic;
1012 u8 pin;
1013
1014 if (!acpi_ioapic)
1015 return 0;
1016 if (!dev || !dev_is_pci(dev))
1017 return 0;
1018
1019 pdev = to_pci_dev(dev);
1020 number = pdev->bus->number;
1021 devfn = pdev->devfn;
1022 pin = pdev->pin;
1023 /* print the entry should happen on mptable identically */
1024 mp_irq.type = MP_INTSRC;
1025 mp_irq.irqtype = mp_INT;
1026 mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1027 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1028 mp_irq.srcbus = number;
1029 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1030 ioapic = mp_find_ioapic(gsi);
1031 mp_irq.dstapic = mpc_ioapic_id(ioapic);
1032 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
1033
1034 mp_save_irq(&mp_irq);
1035#endif
1036 return 0;
1037}
1038
1039int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
1040{
1041 int ioapic;
1042 int ioapic_pin;
1043 struct io_apic_irq_attr irq_attr;
1044 int ret;
1045
1046 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
1047 return gsi;
1048
1049 /* Don't set up the ACPI SCI because it's already set up */
1050 if (acpi_gbl_FADT.sci_interrupt == gsi)
1051 return gsi;
1052
1053 ioapic = mp_find_ioapic(gsi);
1054 if (ioapic < 0) {
1055 printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
1056 return gsi;
1057 }
1058
1059 ioapic_pin = mp_find_ioapic_pin(ioapic, gsi);
1060
1061 if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
1062 printk(KERN_ERR "Invalid reference to IOAPIC pin "
1063 "%d-%d\n", mpc_ioapic_id(ioapic),
1064 ioapic_pin);
1065 return gsi;
1066 }
1067
1068 if (enable_update_mptable)
1069 mp_config_acpi_gsi(dev, gsi, trigger, polarity);
1070
1071 set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
1072 trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
1073 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
1074 ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr);
1075 if (ret < 0)
1076 gsi = INT_MIN;
1077
1078 return gsi;
1079}
1080
1081/* 1064/*
1082 * Parse IOAPIC related entries in MADT 1065 * Parse IOAPIC related entries in MADT
1083 * returns 0 on success, < 0 on error 1066 * returns 0 on success, < 0 on error
@@ -1107,9 +1090,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1107 return -ENODEV; 1090 return -ENODEV;
1108 } 1091 }
1109 1092
1110 count = 1093 count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic,
1111 acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic, 1094 MAX_IO_APICS);
1112 MAX_IO_APICS);
1113 if (!count) { 1095 if (!count) {
1114 printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); 1096 printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
1115 return -ENODEV; 1097 return -ENODEV;
@@ -1118,9 +1100,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1118 return count; 1100 return count;
1119 } 1101 }
1120 1102
1121 count = 1103 count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE,
1122 acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, 1104 acpi_parse_int_src_ovr, nr_irqs);
1123 nr_irqs);
1124 if (count < 0) { 1105 if (count < 0) {
1125 printk(KERN_ERR PREFIX 1106 printk(KERN_ERR PREFIX
1126 "Error parsing interrupt source overrides entry\n"); 1107 "Error parsing interrupt source overrides entry\n");
@@ -1139,9 +1120,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1139 /* Fill in identity legacy mappings where no override */ 1120 /* Fill in identity legacy mappings where no override */
1140 mp_config_acpi_legacy_irqs(); 1121 mp_config_acpi_legacy_irqs();
1141 1122
1142 count = 1123 count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE,
1143 acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, 1124 acpi_parse_nmi_src, nr_irqs);
1144 nr_irqs);
1145 if (count < 0) { 1125 if (count < 0) {
1146 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); 1126 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
1147 /* TBD: Cleanup to allow fallback to MPS */ 1127 /* TBD: Cleanup to allow fallback to MPS */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ad28db7e6bde..67760275544b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -67,7 +67,7 @@ EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
67/* 67/*
68 * The highest APIC ID seen during enumeration. 68 * The highest APIC ID seen during enumeration.
69 */ 69 */
70unsigned int max_physical_apicid; 70static unsigned int max_physical_apicid;
71 71
72/* 72/*
73 * Bitmask of physically existing CPUs: 73 * Bitmask of physically existing CPUs:
@@ -1342,17 +1342,6 @@ void setup_local_APIC(void)
1342 /* always use the value from LDR */ 1342 /* always use the value from LDR */
1343 early_per_cpu(x86_cpu_to_logical_apicid, cpu) = 1343 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
1344 logical_smp_processor_id(); 1344 logical_smp_processor_id();
1345
1346 /*
1347 * Some NUMA implementations (NUMAQ) don't initialize apicid to
1348 * node mapping during NUMA init. Now that logical apicid is
1349 * guaranteed to be known, give it another chance. This is already
1350 * a bit too late - percpu allocation has already happened without
1351 * proper NUMA affinity.
1352 */
1353 if (apic->x86_32_numa_cpu_node)
1354 set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
1355 apic->x86_32_numa_cpu_node(cpu));
1356#endif 1345#endif
1357 1346
1358 /* 1347 /*
@@ -2053,8 +2042,6 @@ void __init connect_bsp_APIC(void)
2053 imcr_pic_to_apic(); 2042 imcr_pic_to_apic();
2054 } 2043 }
2055#endif 2044#endif
2056 if (apic->enable_apic_mode)
2057 apic->enable_apic_mode();
2058} 2045}
2059 2046
2060/** 2047/**
@@ -2451,51 +2438,6 @@ static void apic_pm_activate(void) { }
2451 2438
2452#ifdef CONFIG_X86_64 2439#ifdef CONFIG_X86_64
2453 2440
2454static int apic_cluster_num(void)
2455{
2456 int i, clusters, zeros;
2457 unsigned id;
2458 u16 *bios_cpu_apicid;
2459 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
2460
2461 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2462 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2463
2464 for (i = 0; i < nr_cpu_ids; i++) {
2465 /* are we being called early in kernel startup? */
2466 if (bios_cpu_apicid) {
2467 id = bios_cpu_apicid[i];
2468 } else if (i < nr_cpu_ids) {
2469 if (cpu_present(i))
2470 id = per_cpu(x86_bios_cpu_apicid, i);
2471 else
2472 continue;
2473 } else
2474 break;
2475
2476 if (id != BAD_APICID)
2477 __set_bit(APIC_CLUSTERID(id), clustermap);
2478 }
2479
2480 /* Problem: Partially populated chassis may not have CPUs in some of
2481 * the APIC clusters they have been allocated. Only present CPUs have
2482 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
2483 * Since clusters are allocated sequentially, count zeros only if
2484 * they are bounded by ones.
2485 */
2486 clusters = 0;
2487 zeros = 0;
2488 for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
2489 if (test_bit(i, clustermap)) {
2490 clusters += 1 + zeros;
2491 zeros = 0;
2492 } else
2493 ++zeros;
2494 }
2495
2496 return clusters;
2497}
2498
2499static int multi_checked; 2441static int multi_checked;
2500static int multi; 2442static int multi;
2501 2443
@@ -2540,20 +2482,7 @@ static void dmi_check_multi(void)
2540int apic_is_clustered_box(void) 2482int apic_is_clustered_box(void)
2541{ 2483{
2542 dmi_check_multi(); 2484 dmi_check_multi();
2543 if (multi) 2485 return multi;
2544 return 1;
2545
2546 if (!is_vsmp_box())
2547 return 0;
2548
2549 /*
2550 * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
2551 * not guaranteed to be synced between boards
2552 */
2553 if (apic_cluster_num() > 1)
2554 return 1;
2555
2556 return 0;
2557} 2486}
2558#endif 2487#endif
2559 2488
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 7c1b29479513..de918c410eae 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -168,21 +168,16 @@ static struct apic apic_flat = {
168 .disable_esr = 0, 168 .disable_esr = 0,
169 .dest_logical = APIC_DEST_LOGICAL, 169 .dest_logical = APIC_DEST_LOGICAL,
170 .check_apicid_used = NULL, 170 .check_apicid_used = NULL,
171 .check_apicid_present = NULL,
172 171
173 .vector_allocation_domain = flat_vector_allocation_domain, 172 .vector_allocation_domain = flat_vector_allocation_domain,
174 .init_apic_ldr = flat_init_apic_ldr, 173 .init_apic_ldr = flat_init_apic_ldr,
175 174
176 .ioapic_phys_id_map = NULL, 175 .ioapic_phys_id_map = NULL,
177 .setup_apic_routing = NULL, 176 .setup_apic_routing = NULL,
178 .multi_timer_check = NULL,
179 .cpu_present_to_apicid = default_cpu_present_to_apicid, 177 .cpu_present_to_apicid = default_cpu_present_to_apicid,
180 .apicid_to_cpu_present = NULL, 178 .apicid_to_cpu_present = NULL,
181 .setup_portio_remap = NULL,
182 .check_phys_apicid_present = default_check_phys_apicid_present, 179 .check_phys_apicid_present = default_check_phys_apicid_present,
183 .enable_apic_mode = NULL,
184 .phys_pkg_id = flat_phys_pkg_id, 180 .phys_pkg_id = flat_phys_pkg_id,
185 .mps_oem_check = NULL,
186 181
187 .get_apic_id = flat_get_apic_id, 182 .get_apic_id = flat_get_apic_id,
188 .set_apic_id = set_apic_id, 183 .set_apic_id = set_apic_id,
@@ -196,10 +191,7 @@ static struct apic apic_flat = {
196 .send_IPI_all = flat_send_IPI_all, 191 .send_IPI_all = flat_send_IPI_all,
197 .send_IPI_self = apic_send_IPI_self, 192 .send_IPI_self = apic_send_IPI_self,
198 193
199 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
200 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
201 .wait_for_init_deassert = false, 194 .wait_for_init_deassert = false,
202 .smp_callin_clear_local_apic = NULL,
203 .inquire_remote_apic = default_inquire_remote_apic, 195 .inquire_remote_apic = default_inquire_remote_apic,
204 196
205 .read = native_apic_mem_read, 197 .read = native_apic_mem_read,
@@ -283,7 +275,6 @@ static struct apic apic_physflat = {
283 .disable_esr = 0, 275 .disable_esr = 0,
284 .dest_logical = 0, 276 .dest_logical = 0,
285 .check_apicid_used = NULL, 277 .check_apicid_used = NULL,
286 .check_apicid_present = NULL,
287 278
288 .vector_allocation_domain = default_vector_allocation_domain, 279 .vector_allocation_domain = default_vector_allocation_domain,
289 /* not needed, but shouldn't hurt: */ 280 /* not needed, but shouldn't hurt: */
@@ -291,14 +282,10 @@ static struct apic apic_physflat = {
291 282
292 .ioapic_phys_id_map = NULL, 283 .ioapic_phys_id_map = NULL,
293 .setup_apic_routing = NULL, 284 .setup_apic_routing = NULL,
294 .multi_timer_check = NULL,
295 .cpu_present_to_apicid = default_cpu_present_to_apicid, 285 .cpu_present_to_apicid = default_cpu_present_to_apicid,
296 .apicid_to_cpu_present = NULL, 286 .apicid_to_cpu_present = NULL,
297 .setup_portio_remap = NULL,
298 .check_phys_apicid_present = default_check_phys_apicid_present, 287 .check_phys_apicid_present = default_check_phys_apicid_present,
299 .enable_apic_mode = NULL,
300 .phys_pkg_id = flat_phys_pkg_id, 288 .phys_pkg_id = flat_phys_pkg_id,
301 .mps_oem_check = NULL,
302 289
303 .get_apic_id = flat_get_apic_id, 290 .get_apic_id = flat_get_apic_id,
304 .set_apic_id = set_apic_id, 291 .set_apic_id = set_apic_id,
@@ -312,10 +299,7 @@ static struct apic apic_physflat = {
312 .send_IPI_all = physflat_send_IPI_all, 299 .send_IPI_all = physflat_send_IPI_all,
313 .send_IPI_self = apic_send_IPI_self, 300 .send_IPI_self = apic_send_IPI_self,
314 301
315 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
316 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
317 .wait_for_init_deassert = false, 302 .wait_for_init_deassert = false,
318 .smp_callin_clear_local_apic = NULL,
319 .inquire_remote_apic = default_inquire_remote_apic, 303 .inquire_remote_apic = default_inquire_remote_apic,
320 304
321 .read = native_apic_mem_read, 305 .read = native_apic_mem_read,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 8c7c98249c20..b205cdbdbe6a 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -89,16 +89,6 @@ static const struct cpumask *noop_target_cpus(void)
89 return cpumask_of(0); 89 return cpumask_of(0);
90} 90}
91 91
92static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid)
93{
94 return physid_isset(apicid, *map);
95}
96
97static unsigned long noop_check_apicid_present(int bit)
98{
99 return physid_isset(bit, phys_cpu_present_map);
100}
101
102static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, 92static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
103 const struct cpumask *mask) 93 const struct cpumask *mask)
104{ 94{
@@ -133,27 +123,21 @@ struct apic apic_noop = {
133 .target_cpus = noop_target_cpus, 123 .target_cpus = noop_target_cpus,
134 .disable_esr = 0, 124 .disable_esr = 0,
135 .dest_logical = APIC_DEST_LOGICAL, 125 .dest_logical = APIC_DEST_LOGICAL,
136 .check_apicid_used = noop_check_apicid_used, 126 .check_apicid_used = default_check_apicid_used,
137 .check_apicid_present = noop_check_apicid_present,
138 127
139 .vector_allocation_domain = noop_vector_allocation_domain, 128 .vector_allocation_domain = noop_vector_allocation_domain,
140 .init_apic_ldr = noop_init_apic_ldr, 129 .init_apic_ldr = noop_init_apic_ldr,
141 130
142 .ioapic_phys_id_map = default_ioapic_phys_id_map, 131 .ioapic_phys_id_map = default_ioapic_phys_id_map,
143 .setup_apic_routing = NULL, 132 .setup_apic_routing = NULL,
144 .multi_timer_check = NULL,
145 133
146 .cpu_present_to_apicid = default_cpu_present_to_apicid, 134 .cpu_present_to_apicid = default_cpu_present_to_apicid,
147 .apicid_to_cpu_present = physid_set_mask_of_physid, 135 .apicid_to_cpu_present = physid_set_mask_of_physid,
148 136
149 .setup_portio_remap = NULL,
150 .check_phys_apicid_present = default_check_phys_apicid_present, 137 .check_phys_apicid_present = default_check_phys_apicid_present,
151 .enable_apic_mode = NULL,
152 138
153 .phys_pkg_id = noop_phys_pkg_id, 139 .phys_pkg_id = noop_phys_pkg_id,
154 140
155 .mps_oem_check = NULL,
156
157 .get_apic_id = noop_get_apic_id, 141 .get_apic_id = noop_get_apic_id,
158 .set_apic_id = NULL, 142 .set_apic_id = NULL,
159 .apic_id_mask = 0x0F << 24, 143 .apic_id_mask = 0x0F << 24,
@@ -168,12 +152,7 @@ struct apic apic_noop = {
168 152
169 .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, 153 .wakeup_secondary_cpu = noop_wakeup_secondary_cpu,
170 154
171 /* should be safe */
172 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
173 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
174
175 .wait_for_init_deassert = false, 155 .wait_for_init_deassert = false,
176 .smp_callin_clear_local_apic = NULL,
177 .inquire_remote_apic = NULL, 156 .inquire_remote_apic = NULL,
178 157
179 .read = noop_apic_read, 158 .read = noop_apic_read,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index a5b45df8bc88..ae915391ebec 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -217,21 +217,16 @@ static const struct apic apic_numachip __refconst = {
217 .disable_esr = 0, 217 .disable_esr = 0,
218 .dest_logical = 0, 218 .dest_logical = 0,
219 .check_apicid_used = NULL, 219 .check_apicid_used = NULL,
220 .check_apicid_present = NULL,
221 220
222 .vector_allocation_domain = default_vector_allocation_domain, 221 .vector_allocation_domain = default_vector_allocation_domain,
223 .init_apic_ldr = flat_init_apic_ldr, 222 .init_apic_ldr = flat_init_apic_ldr,
224 223
225 .ioapic_phys_id_map = NULL, 224 .ioapic_phys_id_map = NULL,
226 .setup_apic_routing = NULL, 225 .setup_apic_routing = NULL,
227 .multi_timer_check = NULL,
228 .cpu_present_to_apicid = default_cpu_present_to_apicid, 226 .cpu_present_to_apicid = default_cpu_present_to_apicid,
229 .apicid_to_cpu_present = NULL, 227 .apicid_to_cpu_present = NULL,
230 .setup_portio_remap = NULL,
231 .check_phys_apicid_present = default_check_phys_apicid_present, 228 .check_phys_apicid_present = default_check_phys_apicid_present,
232 .enable_apic_mode = NULL,
233 .phys_pkg_id = numachip_phys_pkg_id, 229 .phys_pkg_id = numachip_phys_pkg_id,
234 .mps_oem_check = NULL,
235 230
236 .get_apic_id = get_apic_id, 231 .get_apic_id = get_apic_id,
237 .set_apic_id = set_apic_id, 232 .set_apic_id = set_apic_id,
@@ -246,10 +241,7 @@ static const struct apic apic_numachip __refconst = {
246 .send_IPI_self = numachip_send_IPI_self, 241 .send_IPI_self = numachip_send_IPI_self,
247 242
248 .wakeup_secondary_cpu = numachip_wakeup_secondary, 243 .wakeup_secondary_cpu = numachip_wakeup_secondary,
249 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
250 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
251 .wait_for_init_deassert = false, 244 .wait_for_init_deassert = false,
252 .smp_callin_clear_local_apic = NULL,
253 .inquire_remote_apic = NULL, /* REMRD not supported */ 245 .inquire_remote_apic = NULL, /* REMRD not supported */
254 246
255 .read = native_apic_mem_read, 247 .read = native_apic_mem_read,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index e4840aa7a255..c4a8d63f8220 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -31,11 +31,6 @@ static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
31 return 0; 31 return 0;
32} 32}
33 33
34static unsigned long bigsmp_check_apicid_present(int bit)
35{
36 return 1;
37}
38
39static int bigsmp_early_logical_apicid(int cpu) 34static int bigsmp_early_logical_apicid(int cpu)
40{ 35{
41 /* on bigsmp, logical apicid is the same as physical */ 36 /* on bigsmp, logical apicid is the same as physical */
@@ -168,21 +163,16 @@ static struct apic apic_bigsmp = {
168 .disable_esr = 1, 163 .disable_esr = 1,
169 .dest_logical = 0, 164 .dest_logical = 0,
170 .check_apicid_used = bigsmp_check_apicid_used, 165 .check_apicid_used = bigsmp_check_apicid_used,
171 .check_apicid_present = bigsmp_check_apicid_present,
172 166
173 .vector_allocation_domain = default_vector_allocation_domain, 167 .vector_allocation_domain = default_vector_allocation_domain,
174 .init_apic_ldr = bigsmp_init_apic_ldr, 168 .init_apic_ldr = bigsmp_init_apic_ldr,
175 169
176 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, 170 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
177 .setup_apic_routing = bigsmp_setup_apic_routing, 171 .setup_apic_routing = bigsmp_setup_apic_routing,
178 .multi_timer_check = NULL,
179 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, 172 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
180 .apicid_to_cpu_present = physid_set_mask_of_physid, 173 .apicid_to_cpu_present = physid_set_mask_of_physid,
181 .setup_portio_remap = NULL,
182 .check_phys_apicid_present = bigsmp_check_phys_apicid_present, 174 .check_phys_apicid_present = bigsmp_check_phys_apicid_present,
183 .enable_apic_mode = NULL,
184 .phys_pkg_id = bigsmp_phys_pkg_id, 175 .phys_pkg_id = bigsmp_phys_pkg_id,
185 .mps_oem_check = NULL,
186 176
187 .get_apic_id = bigsmp_get_apic_id, 177 .get_apic_id = bigsmp_get_apic_id,
188 .set_apic_id = NULL, 178 .set_apic_id = NULL,
@@ -196,11 +186,7 @@ static struct apic apic_bigsmp = {
196 .send_IPI_all = bigsmp_send_IPI_all, 186 .send_IPI_all = bigsmp_send_IPI_all,
197 .send_IPI_self = default_send_IPI_self, 187 .send_IPI_self = default_send_IPI_self,
198 188
199 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
200 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
201
202 .wait_for_init_deassert = true, 189 .wait_for_init_deassert = true,
203 .smp_callin_clear_local_apic = NULL,
204 .inquire_remote_apic = default_inquire_remote_apic, 190 .inquire_remote_apic = default_inquire_remote_apic,
205 191
206 .read = native_apic_mem_read, 192 .read = native_apic_mem_read,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 81e08eff05ee..29290f554e79 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -31,6 +31,7 @@
31#include <linux/acpi.h> 31#include <linux/acpi.h>
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/syscore_ops.h> 33#include <linux/syscore_ops.h>
34#include <linux/irqdomain.h>
34#include <linux/msi.h> 35#include <linux/msi.h>
35#include <linux/htirq.h> 36#include <linux/htirq.h>
36#include <linux/freezer.h> 37#include <linux/freezer.h>
@@ -62,6 +63,16 @@
62 63
63#define __apicdebuginit(type) static type __init 64#define __apicdebuginit(type) static type __init
64 65
66#define for_each_ioapic(idx) \
67 for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
68#define for_each_ioapic_reverse(idx) \
69 for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--)
70#define for_each_pin(idx, pin) \
71 for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++)
72#define for_each_ioapic_pin(idx, pin) \
73 for_each_ioapic((idx)) \
74 for_each_pin((idx), (pin))
75
65#define for_each_irq_pin(entry, head) \ 76#define for_each_irq_pin(entry, head) \
66 for (entry = head; entry; entry = entry->next) 77 for (entry = head; entry; entry = entry->next)
67 78
@@ -73,6 +84,17 @@ int sis_apic_bug = -1;
73 84
74static DEFINE_RAW_SPINLOCK(ioapic_lock); 85static DEFINE_RAW_SPINLOCK(ioapic_lock);
75static DEFINE_RAW_SPINLOCK(vector_lock); 86static DEFINE_RAW_SPINLOCK(vector_lock);
87static DEFINE_MUTEX(ioapic_mutex);
88static unsigned int ioapic_dynirq_base;
89static int ioapic_initialized;
90
91struct mp_pin_info {
92 int trigger;
93 int polarity;
94 int node;
95 int set;
96 u32 count;
97};
76 98
77static struct ioapic { 99static struct ioapic {
78 /* 100 /*
@@ -87,7 +109,9 @@ static struct ioapic {
87 struct mpc_ioapic mp_config; 109 struct mpc_ioapic mp_config;
88 /* IO APIC gsi routing info */ 110 /* IO APIC gsi routing info */
89 struct mp_ioapic_gsi gsi_config; 111 struct mp_ioapic_gsi gsi_config;
90 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); 112 struct ioapic_domain_cfg irqdomain_cfg;
113 struct irq_domain *irqdomain;
114 struct mp_pin_info *pin_info;
91} ioapics[MAX_IO_APICS]; 115} ioapics[MAX_IO_APICS];
92 116
93#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver 117#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver
@@ -107,6 +131,41 @@ struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
107 return &ioapics[ioapic_idx].gsi_config; 131 return &ioapics[ioapic_idx].gsi_config;
108} 132}
109 133
134static inline int mp_ioapic_pin_count(int ioapic)
135{
136 struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
137
138 return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
139}
140
141u32 mp_pin_to_gsi(int ioapic, int pin)
142{
143 return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin;
144}
145
146/*
147 * Initialize all legacy IRQs and all pins on the first IOAPIC
148 * if we have legacy interrupt controller. Kernel boot option "pirq="
149 * may rely on non-legacy pins on the first IOAPIC.
150 */
151static inline int mp_init_irq_at_boot(int ioapic, int irq)
152{
153 if (!nr_legacy_irqs())
154 return 0;
155
156 return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs());
157}
158
159static inline struct mp_pin_info *mp_pin_info(int ioapic_idx, int pin)
160{
161 return ioapics[ioapic_idx].pin_info + pin;
162}
163
164static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
165{
166 return ioapics[ioapic].irqdomain;
167}
168
110int nr_ioapics; 169int nr_ioapics;
111 170
112/* The one past the highest gsi number used */ 171/* The one past the highest gsi number used */
@@ -118,9 +177,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
118/* # of MP IRQ source entries */ 177/* # of MP IRQ source entries */
119int mp_irq_entries; 178int mp_irq_entries;
120 179
121/* GSI interrupts */
122static int nr_irqs_gsi = NR_IRQS_LEGACY;
123
124#ifdef CONFIG_EISA 180#ifdef CONFIG_EISA
125int mp_bus_id_to_type[MAX_MP_BUSSES]; 181int mp_bus_id_to_type[MAX_MP_BUSSES];
126#endif 182#endif
@@ -149,8 +205,7 @@ static int __init parse_noapic(char *str)
149} 205}
150early_param("noapic", parse_noapic); 206early_param("noapic", parse_noapic);
151 207
152static int io_apic_setup_irq_pin(unsigned int irq, int node, 208static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
153 struct io_apic_irq_attr *attr);
154 209
155/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ 210/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
156void mp_save_irq(struct mpc_intsrc *m) 211void mp_save_irq(struct mpc_intsrc *m)
@@ -182,19 +237,15 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
182 return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); 237 return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
183} 238}
184 239
185
186/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
187static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
188
189int __init arch_early_irq_init(void) 240int __init arch_early_irq_init(void)
190{ 241{
191 struct irq_cfg *cfg; 242 struct irq_cfg *cfg;
192 int count, node, i; 243 int i, node = cpu_to_node(0);
193 244
194 if (!legacy_pic->nr_legacy_irqs) 245 if (!nr_legacy_irqs())
195 io_apic_irqs = ~0UL; 246 io_apic_irqs = ~0UL;
196 247
197 for (i = 0; i < nr_ioapics; i++) { 248 for_each_ioapic(i) {
198 ioapics[i].saved_registers = 249 ioapics[i].saved_registers =
199 kzalloc(sizeof(struct IO_APIC_route_entry) * 250 kzalloc(sizeof(struct IO_APIC_route_entry) *
200 ioapics[i].nr_registers, GFP_KERNEL); 251 ioapics[i].nr_registers, GFP_KERNEL);
@@ -202,28 +253,20 @@ int __init arch_early_irq_init(void)
202 pr_err("IOAPIC %d: suspend/resume impossible!\n", i); 253 pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
203 } 254 }
204 255
205 cfg = irq_cfgx; 256 /*
206 count = ARRAY_SIZE(irq_cfgx); 257 * For legacy IRQ's, start with assigning irq0 to irq15 to
207 node = cpu_to_node(0); 258 * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
208 259 */
209 for (i = 0; i < count; i++) { 260 for (i = 0; i < nr_legacy_irqs(); i++) {
210 irq_set_chip_data(i, &cfg[i]); 261 cfg = alloc_irq_and_cfg_at(i, node);
211 zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); 262 cfg->vector = IRQ0_VECTOR + i;
212 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); 263 cpumask_setall(cfg->domain);
213 /*
214 * For legacy IRQ's, start with assigning irq0 to irq15 to
215 * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
216 */
217 if (i < legacy_pic->nr_legacy_irqs) {
218 cfg[i].vector = IRQ0_VECTOR + i;
219 cpumask_setall(cfg[i].domain);
220 }
221 } 264 }
222 265
223 return 0; 266 return 0;
224} 267}
225 268
226static struct irq_cfg *irq_cfg(unsigned int irq) 269static inline struct irq_cfg *irq_cfg(unsigned int irq)
227{ 270{
228 return irq_get_chip_data(irq); 271 return irq_get_chip_data(irq);
229} 272}
@@ -265,7 +308,7 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
265 if (res < 0) { 308 if (res < 0) {
266 if (res != -EEXIST) 309 if (res != -EEXIST)
267 return NULL; 310 return NULL;
268 cfg = irq_get_chip_data(at); 311 cfg = irq_cfg(at);
269 if (cfg) 312 if (cfg)
270 return cfg; 313 return cfg;
271 } 314 }
@@ -425,6 +468,21 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi
425 return 0; 468 return 0;
426} 469}
427 470
471static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
472{
473 struct irq_pin_list **last, *entry;
474
475 last = &cfg->irq_2_pin;
476 for_each_irq_pin(entry, cfg->irq_2_pin)
477 if (entry->apic == apic && entry->pin == pin) {
478 *last = entry->next;
479 kfree(entry);
480 return;
481 } else {
482 last = &entry->next;
483 }
484}
485
428static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) 486static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
429{ 487{
430 if (__add_pin_to_irq_node(cfg, node, apic, pin)) 488 if (__add_pin_to_irq_node(cfg, node, apic, pin))
@@ -627,9 +685,8 @@ static void clear_IO_APIC (void)
627{ 685{
628 int apic, pin; 686 int apic, pin;
629 687
630 for (apic = 0; apic < nr_ioapics; apic++) 688 for_each_ioapic_pin(apic, pin)
631 for (pin = 0; pin < ioapics[apic].nr_registers; pin++) 689 clear_IO_APIC_pin(apic, pin);
632 clear_IO_APIC_pin(apic, pin);
633} 690}
634 691
635#ifdef CONFIG_X86_32 692#ifdef CONFIG_X86_32
@@ -678,13 +735,13 @@ int save_ioapic_entries(void)
678 int apic, pin; 735 int apic, pin;
679 int err = 0; 736 int err = 0;
680 737
681 for (apic = 0; apic < nr_ioapics; apic++) { 738 for_each_ioapic(apic) {
682 if (!ioapics[apic].saved_registers) { 739 if (!ioapics[apic].saved_registers) {
683 err = -ENOMEM; 740 err = -ENOMEM;
684 continue; 741 continue;
685 } 742 }
686 743
687 for (pin = 0; pin < ioapics[apic].nr_registers; pin++) 744 for_each_pin(apic, pin)
688 ioapics[apic].saved_registers[pin] = 745 ioapics[apic].saved_registers[pin] =
689 ioapic_read_entry(apic, pin); 746 ioapic_read_entry(apic, pin);
690 } 747 }
@@ -699,11 +756,11 @@ void mask_ioapic_entries(void)
699{ 756{
700 int apic, pin; 757 int apic, pin;
701 758
702 for (apic = 0; apic < nr_ioapics; apic++) { 759 for_each_ioapic(apic) {
703 if (!ioapics[apic].saved_registers) 760 if (!ioapics[apic].saved_registers)
704 continue; 761 continue;
705 762
706 for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { 763 for_each_pin(apic, pin) {
707 struct IO_APIC_route_entry entry; 764 struct IO_APIC_route_entry entry;
708 765
709 entry = ioapics[apic].saved_registers[pin]; 766 entry = ioapics[apic].saved_registers[pin];
@@ -722,11 +779,11 @@ int restore_ioapic_entries(void)
722{ 779{
723 int apic, pin; 780 int apic, pin;
724 781
725 for (apic = 0; apic < nr_ioapics; apic++) { 782 for_each_ioapic(apic) {
726 if (!ioapics[apic].saved_registers) 783 if (!ioapics[apic].saved_registers)
727 continue; 784 continue;
728 785
729 for (pin = 0; pin < ioapics[apic].nr_registers; pin++) 786 for_each_pin(apic, pin)
730 ioapic_write_entry(apic, pin, 787 ioapic_write_entry(apic, pin,
731 ioapics[apic].saved_registers[pin]); 788 ioapics[apic].saved_registers[pin]);
732 } 789 }
@@ -785,7 +842,7 @@ static int __init find_isa_irq_apic(int irq, int type)
785 if (i < mp_irq_entries) { 842 if (i < mp_irq_entries) {
786 int ioapic_idx; 843 int ioapic_idx;
787 844
788 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) 845 for_each_ioapic(ioapic_idx)
789 if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic) 846 if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
790 return ioapic_idx; 847 return ioapic_idx;
791 } 848 }
@@ -799,7 +856,7 @@ static int __init find_isa_irq_apic(int irq, int type)
799 */ 856 */
800static int EISA_ELCR(unsigned int irq) 857static int EISA_ELCR(unsigned int irq)
801{ 858{
802 if (irq < legacy_pic->nr_legacy_irqs) { 859 if (irq < nr_legacy_irqs()) {
803 unsigned int port = 0x4d0 + (irq >> 3); 860 unsigned int port = 0x4d0 + (irq >> 3);
804 return (inb(port) >> (irq & 7)) & 1; 861 return (inb(port) >> (irq & 7)) & 1;
805 } 862 }
@@ -939,29 +996,101 @@ static int irq_trigger(int idx)
939 return trigger; 996 return trigger;
940} 997}
941 998
942static int pin_2_irq(int idx, int apic, int pin) 999static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin)
1000{
1001 int irq = -1;
1002 int ioapic = (int)(long)domain->host_data;
1003 int type = ioapics[ioapic].irqdomain_cfg.type;
1004
1005 switch (type) {
1006 case IOAPIC_DOMAIN_LEGACY:
1007 /*
1008 * Dynamically allocate IRQ number for non-ISA IRQs in the first 16
1009 * GSIs on some weird platforms.
1010 */
1011 if (gsi < nr_legacy_irqs())
1012 irq = irq_create_mapping(domain, pin);
1013 else if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
1014 irq = gsi;
1015 break;
1016 case IOAPIC_DOMAIN_STRICT:
1017 if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
1018 irq = gsi;
1019 break;
1020 case IOAPIC_DOMAIN_DYNAMIC:
1021 irq = irq_create_mapping(domain, pin);
1022 break;
1023 default:
1024 WARN(1, "ioapic: unknown irqdomain type %d\n", type);
1025 break;
1026 }
1027
1028 return irq > 0 ? irq : -1;
1029}
1030
1031static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
1032 unsigned int flags)
943{ 1033{
944 int irq; 1034 int irq;
945 int bus = mp_irqs[idx].srcbus; 1035 struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
946 struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic); 1036 struct mp_pin_info *info = mp_pin_info(ioapic, pin);
1037
1038 if (!domain)
1039 return -1;
1040
1041 mutex_lock(&ioapic_mutex);
947 1042
948 /* 1043 /*
949 * Debugging check, we are in big trouble if this message pops up! 1044 * Don't use irqdomain to manage ISA IRQs because there may be
1045 * multiple IOAPIC pins sharing the same ISA IRQ number and
1046 * irqdomain only supports 1:1 mapping between IOAPIC pin and
1047 * IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are used
1048 * for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H).
1049 * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are
1050 * available, and some BIOSes may use MP Interrupt Source records
1051 * to override IRQ numbers for PIRQs instead of reprogramming
1052 * the interrupt routing logic. Thus there may be multiple pins
1053 * sharing the same legacy IRQ number when ACPI is disabled.
950 */ 1054 */
951 if (mp_irqs[idx].dstirq != pin) 1055 if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
952 pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
953
954 if (test_bit(bus, mp_bus_not_pci)) {
955 irq = mp_irqs[idx].srcbusirq; 1056 irq = mp_irqs[idx].srcbusirq;
1057 if (flags & IOAPIC_MAP_ALLOC) {
1058 if (info->count == 0 &&
1059 mp_irqdomain_map(domain, irq, pin) != 0)
1060 irq = -1;
1061
1062 /* special handling for timer IRQ0 */
1063 if (irq == 0)
1064 info->count++;
1065 }
956 } else { 1066 } else {
957 u32 gsi = gsi_cfg->gsi_base + pin; 1067 irq = irq_find_mapping(domain, pin);
1068 if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC))
1069 irq = alloc_irq_from_domain(domain, gsi, pin);
1070 }
958 1071
959 if (gsi >= NR_IRQS_LEGACY) 1072 if (flags & IOAPIC_MAP_ALLOC) {
960 irq = gsi; 1073 if (irq > 0)
961 else 1074 info->count++;
962 irq = gsi_top + gsi; 1075 else if (info->count == 0)
1076 info->set = 0;
963 } 1077 }
964 1078
1079 mutex_unlock(&ioapic_mutex);
1080
1081 return irq > 0 ? irq : -1;
1082}
1083
1084static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
1085{
1086 u32 gsi = mp_pin_to_gsi(ioapic, pin);
1087
1088 /*
1089 * Debugging check, we are in big trouble if this message pops up!
1090 */
1091 if (mp_irqs[idx].dstirq != pin)
1092 pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
1093
965#ifdef CONFIG_X86_32 1094#ifdef CONFIG_X86_32
966 /* 1095 /*
967 * PCI IRQ command line redirection. Yes, limits are hardcoded. 1096 * PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -972,16 +1101,58 @@ static int pin_2_irq(int idx, int apic, int pin)
972 apic_printk(APIC_VERBOSE, KERN_DEBUG 1101 apic_printk(APIC_VERBOSE, KERN_DEBUG
973 "disabling PIRQ%d\n", pin-16); 1102 "disabling PIRQ%d\n", pin-16);
974 } else { 1103 } else {
975 irq = pirq_entries[pin-16]; 1104 int irq = pirq_entries[pin-16];
976 apic_printk(APIC_VERBOSE, KERN_DEBUG 1105 apic_printk(APIC_VERBOSE, KERN_DEBUG
977 "using PIRQ%d -> IRQ %d\n", 1106 "using PIRQ%d -> IRQ %d\n",
978 pin-16, irq); 1107 pin-16, irq);
1108 return irq;
979 } 1109 }
980 } 1110 }
981 } 1111 }
982#endif 1112#endif
983 1113
984 return irq; 1114 return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
1115}
1116
1117int mp_map_gsi_to_irq(u32 gsi, unsigned int flags)
1118{
1119 int ioapic, pin, idx;
1120
1121 ioapic = mp_find_ioapic(gsi);
1122 if (ioapic < 0)
1123 return -1;
1124
1125 pin = mp_find_ioapic_pin(ioapic, gsi);
1126 idx = find_irq_entry(ioapic, pin, mp_INT);
1127 if ((flags & IOAPIC_MAP_CHECK) && idx < 0)
1128 return -1;
1129
1130 return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
1131}
1132
1133void mp_unmap_irq(int irq)
1134{
1135 struct irq_data *data = irq_get_irq_data(irq);
1136 struct mp_pin_info *info;
1137 int ioapic, pin;
1138
1139 if (!data || !data->domain)
1140 return;
1141
1142 ioapic = (int)(long)data->domain->host_data;
1143 pin = (int)data->hwirq;
1144 info = mp_pin_info(ioapic, pin);
1145
1146 mutex_lock(&ioapic_mutex);
1147 if (--info->count == 0) {
1148 info->set = 0;
1149 if (irq < nr_legacy_irqs() &&
1150 ioapics[ioapic].irqdomain_cfg.type == IOAPIC_DOMAIN_LEGACY)
1151 mp_irqdomain_unmap(data->domain, irq);
1152 else
1153 irq_dispose_mapping(irq);
1154 }
1155 mutex_unlock(&ioapic_mutex);
985} 1156}
986 1157
987/* 1158/*
@@ -991,7 +1162,7 @@ static int pin_2_irq(int idx, int apic, int pin)
991int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, 1162int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
992 struct io_apic_irq_attr *irq_attr) 1163 struct io_apic_irq_attr *irq_attr)
993{ 1164{
994 int ioapic_idx, i, best_guess = -1; 1165 int irq, i, best_ioapic = -1, best_idx = -1;
995 1166
996 apic_printk(APIC_DEBUG, 1167 apic_printk(APIC_DEBUG,
997 "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", 1168 "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
@@ -1001,44 +1172,56 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
1001 "PCI BIOS passed nonexistent PCI bus %d!\n", bus); 1172 "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
1002 return -1; 1173 return -1;
1003 } 1174 }
1175
1004 for (i = 0; i < mp_irq_entries; i++) { 1176 for (i = 0; i < mp_irq_entries; i++) {
1005 int lbus = mp_irqs[i].srcbus; 1177 int lbus = mp_irqs[i].srcbus;
1178 int ioapic_idx, found = 0;
1006 1179
1007 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) 1180 if (bus != lbus || mp_irqs[i].irqtype != mp_INT ||
1181 slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f))
1182 continue;
1183
1184 for_each_ioapic(ioapic_idx)
1008 if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic || 1185 if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
1009 mp_irqs[i].dstapic == MP_APIC_ALL) 1186 mp_irqs[i].dstapic == MP_APIC_ALL) {
1187 found = 1;
1010 break; 1188 break;
1189 }
1190 if (!found)
1191 continue;
1011 1192
1012 if (!test_bit(lbus, mp_bus_not_pci) && 1193 /* Skip ISA IRQs */
1013 !mp_irqs[i].irqtype && 1194 irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0);
1014 (bus == lbus) && 1195 if (irq > 0 && !IO_APIC_IRQ(irq))
1015 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { 1196 continue;
1016 int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq);
1017 1197
1018 if (!(ioapic_idx || IO_APIC_IRQ(irq))) 1198 if (pin == (mp_irqs[i].srcbusirq & 3)) {
1019 continue; 1199 best_idx = i;
1200 best_ioapic = ioapic_idx;
1201 goto out;
1202 }
1020 1203
1021 if (pin == (mp_irqs[i].srcbusirq & 3)) { 1204 /*
1022 set_io_apic_irq_attr(irq_attr, ioapic_idx, 1205 * Use the first all-but-pin matching entry as a
1023 mp_irqs[i].dstirq, 1206 * best-guess fuzzy result for broken mptables.
1024 irq_trigger(i), 1207 */
1025 irq_polarity(i)); 1208 if (best_idx < 0) {
1026 return irq; 1209 best_idx = i;
1027 } 1210 best_ioapic = ioapic_idx;
1028 /*
1029 * Use the first all-but-pin matching entry as a
1030 * best-guess fuzzy result for broken mptables.
1031 */
1032 if (best_guess < 0) {
1033 set_io_apic_irq_attr(irq_attr, ioapic_idx,
1034 mp_irqs[i].dstirq,
1035 irq_trigger(i),
1036 irq_polarity(i));
1037 best_guess = irq;
1038 }
1039 } 1211 }
1040 } 1212 }
1041 return best_guess; 1213 if (best_idx < 0)
1214 return -1;
1215
1216out:
1217 irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
1218 IOAPIC_MAP_ALLOC);
1219 if (irq > 0)
1220 set_io_apic_irq_attr(irq_attr, best_ioapic,
1221 mp_irqs[best_idx].dstirq,
1222 irq_trigger(best_idx),
1223 irq_polarity(best_idx));
1224 return irq;
1042} 1225}
1043EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); 1226EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
1044 1227
@@ -1198,7 +1381,7 @@ void __setup_vector_irq(int cpu)
1198 raw_spin_lock(&vector_lock); 1381 raw_spin_lock(&vector_lock);
1199 /* Mark the inuse vectors */ 1382 /* Mark the inuse vectors */
1200 for_each_active_irq(irq) { 1383 for_each_active_irq(irq) {
1201 cfg = irq_get_chip_data(irq); 1384 cfg = irq_cfg(irq);
1202 if (!cfg) 1385 if (!cfg)
1203 continue; 1386 continue;
1204 1387
@@ -1227,12 +1410,10 @@ static inline int IO_APIC_irq_trigger(int irq)
1227{ 1410{
1228 int apic, idx, pin; 1411 int apic, idx, pin;
1229 1412
1230 for (apic = 0; apic < nr_ioapics; apic++) { 1413 for_each_ioapic_pin(apic, pin) {
1231 for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { 1414 idx = find_irq_entry(apic, pin, mp_INT);
1232 idx = find_irq_entry(apic, pin, mp_INT); 1415 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0)))
1233 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) 1416 return irq_trigger(idx);
1234 return irq_trigger(idx);
1235 }
1236 } 1417 }
1237 /* 1418 /*
1238 * nonexistent IRQs are edge default 1419 * nonexistent IRQs are edge default
@@ -1330,95 +1511,29 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1330 } 1511 }
1331 1512
1332 ioapic_register_intr(irq, cfg, attr->trigger); 1513 ioapic_register_intr(irq, cfg, attr->trigger);
1333 if (irq < legacy_pic->nr_legacy_irqs) 1514 if (irq < nr_legacy_irqs())
1334 legacy_pic->mask(irq); 1515 legacy_pic->mask(irq);
1335 1516
1336 ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); 1517 ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
1337} 1518}
1338 1519
1339static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin)
1340{
1341 if (idx != -1)
1342 return false;
1343
1344 apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
1345 mpc_ioapic_id(ioapic_idx), pin);
1346 return true;
1347}
1348
1349static void __init __io_apic_setup_irqs(unsigned int ioapic_idx)
1350{
1351 int idx, node = cpu_to_node(0);
1352 struct io_apic_irq_attr attr;
1353 unsigned int pin, irq;
1354
1355 for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) {
1356 idx = find_irq_entry(ioapic_idx, pin, mp_INT);
1357 if (io_apic_pin_not_connected(idx, ioapic_idx, pin))
1358 continue;
1359
1360 irq = pin_2_irq(idx, ioapic_idx, pin);
1361
1362 if ((ioapic_idx > 0) && (irq > 16))
1363 continue;
1364
1365 /*
1366 * Skip the timer IRQ if there's a quirk handler
1367 * installed and if it returns 1:
1368 */
1369 if (apic->multi_timer_check &&
1370 apic->multi_timer_check(ioapic_idx, irq))
1371 continue;
1372
1373 set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
1374 irq_polarity(idx));
1375
1376 io_apic_setup_irq_pin(irq, node, &attr);
1377 }
1378}
1379
1380static void __init setup_IO_APIC_irqs(void) 1520static void __init setup_IO_APIC_irqs(void)
1381{ 1521{
1382 unsigned int ioapic_idx; 1522 unsigned int ioapic, pin;
1523 int idx;
1383 1524
1384 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1525 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1385 1526
1386 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) 1527 for_each_ioapic_pin(ioapic, pin) {
1387 __io_apic_setup_irqs(ioapic_idx); 1528 idx = find_irq_entry(ioapic, pin, mp_INT);
1388} 1529 if (idx < 0)
1389 1530 apic_printk(APIC_VERBOSE,
1390/* 1531 KERN_DEBUG " apic %d pin %d not connected\n",
1391 * for the gsit that is not in first ioapic 1532 mpc_ioapic_id(ioapic), pin);
1392 * but could not use acpi_register_gsi() 1533 else
1393 * like some special sci in IBM x3330 1534 pin_2_irq(idx, ioapic, pin,
1394 */ 1535 ioapic ? 0 : IOAPIC_MAP_ALLOC);
1395void setup_IO_APIC_irq_extra(u32 gsi) 1536 }
1396{
1397 int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0);
1398 struct io_apic_irq_attr attr;
1399
1400 /*
1401 * Convert 'gsi' to 'ioapic.pin'.
1402 */
1403 ioapic_idx = mp_find_ioapic(gsi);
1404 if (ioapic_idx < 0)
1405 return;
1406
1407 pin = mp_find_ioapic_pin(ioapic_idx, gsi);
1408 idx = find_irq_entry(ioapic_idx, pin, mp_INT);
1409 if (idx == -1)
1410 return;
1411
1412 irq = pin_2_irq(idx, ioapic_idx, pin);
1413
1414 /* Only handle the non legacy irqs on secondary ioapics */
1415 if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY)
1416 return;
1417
1418 set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
1419 irq_polarity(idx));
1420
1421 io_apic_setup_irq_pin_once(irq, node, &attr);
1422} 1537}
1423 1538
1424/* 1539/*
@@ -1586,7 +1701,7 @@ __apicdebuginit(void) print_IO_APICs(void)
1586 struct irq_chip *chip; 1701 struct irq_chip *chip;
1587 1702
1588 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1703 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1589 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) 1704 for_each_ioapic(ioapic_idx)
1590 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1705 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1591 mpc_ioapic_id(ioapic_idx), 1706 mpc_ioapic_id(ioapic_idx),
1592 ioapics[ioapic_idx].nr_registers); 1707 ioapics[ioapic_idx].nr_registers);
@@ -1597,7 +1712,7 @@ __apicdebuginit(void) print_IO_APICs(void)
1597 */ 1712 */
1598 printk(KERN_INFO "testing the IO APIC.......................\n"); 1713 printk(KERN_INFO "testing the IO APIC.......................\n");
1599 1714
1600 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) 1715 for_each_ioapic(ioapic_idx)
1601 print_IO_APIC(ioapic_idx); 1716 print_IO_APIC(ioapic_idx);
1602 1717
1603 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1718 printk(KERN_DEBUG "IRQ to pin mappings:\n");
@@ -1608,7 +1723,7 @@ __apicdebuginit(void) print_IO_APICs(void)
1608 if (chip != &ioapic_chip) 1723 if (chip != &ioapic_chip)
1609 continue; 1724 continue;
1610 1725
1611 cfg = irq_get_chip_data(irq); 1726 cfg = irq_cfg(irq);
1612 if (!cfg) 1727 if (!cfg)
1613 continue; 1728 continue;
1614 entry = cfg->irq_2_pin; 1729 entry = cfg->irq_2_pin;
@@ -1758,7 +1873,7 @@ __apicdebuginit(void) print_PIC(void)
1758 unsigned int v; 1873 unsigned int v;
1759 unsigned long flags; 1874 unsigned long flags;
1760 1875
1761 if (!legacy_pic->nr_legacy_irqs) 1876 if (!nr_legacy_irqs())
1762 return; 1877 return;
1763 1878
1764 printk(KERN_DEBUG "\nprinting PIC contents\n"); 1879 printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1828,26 +1943,22 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
1828void __init enable_IO_APIC(void) 1943void __init enable_IO_APIC(void)
1829{ 1944{
1830 int i8259_apic, i8259_pin; 1945 int i8259_apic, i8259_pin;
1831 int apic; 1946 int apic, pin;
1832 1947
1833 if (!legacy_pic->nr_legacy_irqs) 1948 if (!nr_legacy_irqs())
1834 return; 1949 return;
1835 1950
1836 for(apic = 0; apic < nr_ioapics; apic++) { 1951 for_each_ioapic_pin(apic, pin) {
1837 int pin;
1838 /* See if any of the pins is in ExtINT mode */ 1952 /* See if any of the pins is in ExtINT mode */
1839 for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { 1953 struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin);
1840 struct IO_APIC_route_entry entry;
1841 entry = ioapic_read_entry(apic, pin);
1842 1954
1843 /* If the interrupt line is enabled and in ExtInt mode 1955 /* If the interrupt line is enabled and in ExtInt mode
1844 * I have found the pin where the i8259 is connected. 1956 * I have found the pin where the i8259 is connected.
1845 */ 1957 */
1846 if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { 1958 if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1847 ioapic_i8259.apic = apic; 1959 ioapic_i8259.apic = apic;
1848 ioapic_i8259.pin = pin; 1960 ioapic_i8259.pin = pin;
1849 goto found_i8259; 1961 goto found_i8259;
1850 }
1851 } 1962 }
1852 } 1963 }
1853 found_i8259: 1964 found_i8259:
@@ -1919,7 +2030,7 @@ void disable_IO_APIC(void)
1919 */ 2030 */
1920 clear_IO_APIC(); 2031 clear_IO_APIC();
1921 2032
1922 if (!legacy_pic->nr_legacy_irqs) 2033 if (!nr_legacy_irqs())
1923 return; 2034 return;
1924 2035
1925 x86_io_apic_ops.disable(); 2036 x86_io_apic_ops.disable();
@@ -1950,7 +2061,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
1950 /* 2061 /*
1951 * Set the IOAPIC ID to the value stored in the MPC table. 2062 * Set the IOAPIC ID to the value stored in the MPC table.
1952 */ 2063 */
1953 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { 2064 for_each_ioapic(ioapic_idx) {
1954 /* Read the register 0 value */ 2065 /* Read the register 0 value */
1955 raw_spin_lock_irqsave(&ioapic_lock, flags); 2066 raw_spin_lock_irqsave(&ioapic_lock, flags);
1956 reg_00.raw = io_apic_read(ioapic_idx, 0); 2067 reg_00.raw = io_apic_read(ioapic_idx, 0);
@@ -2123,7 +2234,7 @@ static unsigned int startup_ioapic_irq(struct irq_data *data)
2123 unsigned long flags; 2234 unsigned long flags;
2124 2235
2125 raw_spin_lock_irqsave(&ioapic_lock, flags); 2236 raw_spin_lock_irqsave(&ioapic_lock, flags);
2126 if (irq < legacy_pic->nr_legacy_irqs) { 2237 if (irq < nr_legacy_irqs()) {
2127 legacy_pic->mask(irq); 2238 legacy_pic->mask(irq);
2128 if (legacy_pic->irq_pending(irq)) 2239 if (legacy_pic->irq_pending(irq))
2129 was_pending = 1; 2240 was_pending = 1;
@@ -2225,7 +2336,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
2225 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); 2336 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
2226 goto unlock; 2337 goto unlock;
2227 } 2338 }
2228 __this_cpu_write(vector_irq[vector], -1); 2339 __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
2229unlock: 2340unlock:
2230 raw_spin_unlock(&desc->lock); 2341 raw_spin_unlock(&desc->lock);
2231 } 2342 }
@@ -2253,7 +2364,7 @@ static void irq_complete_move(struct irq_cfg *cfg)
2253 2364
2254void irq_force_complete_move(int irq) 2365void irq_force_complete_move(int irq)
2255{ 2366{
2256 struct irq_cfg *cfg = irq_get_chip_data(irq); 2367 struct irq_cfg *cfg = irq_cfg(irq);
2257 2368
2258 if (!cfg) 2369 if (!cfg)
2259 return; 2370 return;
@@ -2514,26 +2625,15 @@ static inline void init_IO_APIC_traps(void)
2514 struct irq_cfg *cfg; 2625 struct irq_cfg *cfg;
2515 unsigned int irq; 2626 unsigned int irq;
2516 2627
2517 /*
2518 * NOTE! The local APIC isn't very good at handling
2519 * multiple interrupts at the same interrupt level.
2520 * As the interrupt level is determined by taking the
2521 * vector number and shifting that right by 4, we
2522 * want to spread these out a bit so that they don't
2523 * all fall in the same interrupt level.
2524 *
2525 * Also, we've got to be careful not to trash gate
2526 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2527 */
2528 for_each_active_irq(irq) { 2628 for_each_active_irq(irq) {
2529 cfg = irq_get_chip_data(irq); 2629 cfg = irq_cfg(irq);
2530 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { 2630 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2531 /* 2631 /*
2532 * Hmm.. We don't have an entry for this, 2632 * Hmm.. We don't have an entry for this,
2533 * so default to an old-fashioned 8259 2633 * so default to an old-fashioned 8259
2534 * interrupt if we can.. 2634 * interrupt if we can..
2535 */ 2635 */
2536 if (irq < legacy_pic->nr_legacy_irqs) 2636 if (irq < nr_legacy_irqs())
2537 legacy_pic->make_irq(irq); 2637 legacy_pic->make_irq(irq);
2538 else 2638 else
2539 /* Strange. Oh, well.. */ 2639 /* Strange. Oh, well.. */
@@ -2649,8 +2749,6 @@ static int __init disable_timer_pin_setup(char *arg)
2649} 2749}
2650early_param("disable_timer_pin_1", disable_timer_pin_setup); 2750early_param("disable_timer_pin_1", disable_timer_pin_setup);
2651 2751
2652int timer_through_8259 __initdata;
2653
2654/* 2752/*
2655 * This code may look a bit paranoid, but it's supposed to cooperate with 2753 * This code may look a bit paranoid, but it's supposed to cooperate with
2656 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 2754 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
@@ -2661,7 +2759,7 @@ int timer_through_8259 __initdata;
2661 */ 2759 */
2662static inline void __init check_timer(void) 2760static inline void __init check_timer(void)
2663{ 2761{
2664 struct irq_cfg *cfg = irq_get_chip_data(0); 2762 struct irq_cfg *cfg = irq_cfg(0);
2665 int node = cpu_to_node(0); 2763 int node = cpu_to_node(0);
2666 int apic1, pin1, apic2, pin2; 2764 int apic1, pin1, apic2, pin2;
2667 unsigned long flags; 2765 unsigned long flags;
@@ -2755,7 +2853,6 @@ static inline void __init check_timer(void)
2755 legacy_pic->unmask(0); 2853 legacy_pic->unmask(0);
2756 if (timer_irq_works()) { 2854 if (timer_irq_works()) {
2757 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2855 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2758 timer_through_8259 = 1;
2759 goto out; 2856 goto out;
2760 } 2857 }
2761 /* 2858 /*
@@ -2827,15 +2924,54 @@ out:
2827 */ 2924 */
2828#define PIC_IRQS (1UL << PIC_CASCADE_IR) 2925#define PIC_IRQS (1UL << PIC_CASCADE_IR)
2829 2926
2927static int mp_irqdomain_create(int ioapic)
2928{
2929 size_t size;
2930 int hwirqs = mp_ioapic_pin_count(ioapic);
2931 struct ioapic *ip = &ioapics[ioapic];
2932 struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
2933 struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
2934
2935 size = sizeof(struct mp_pin_info) * mp_ioapic_pin_count(ioapic);
2936 ip->pin_info = kzalloc(size, GFP_KERNEL);
2937 if (!ip->pin_info)
2938 return -ENOMEM;
2939
2940 if (cfg->type == IOAPIC_DOMAIN_INVALID)
2941 return 0;
2942
2943 ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops,
2944 (void *)(long)ioapic);
2945 if(!ip->irqdomain) {
2946 kfree(ip->pin_info);
2947 ip->pin_info = NULL;
2948 return -ENOMEM;
2949 }
2950
2951 if (cfg->type == IOAPIC_DOMAIN_LEGACY ||
2952 cfg->type == IOAPIC_DOMAIN_STRICT)
2953 ioapic_dynirq_base = max(ioapic_dynirq_base,
2954 gsi_cfg->gsi_end + 1);
2955
2956 if (gsi_cfg->gsi_base == 0)
2957 irq_set_default_host(ip->irqdomain);
2958
2959 return 0;
2960}
2961
2830void __init setup_IO_APIC(void) 2962void __init setup_IO_APIC(void)
2831{ 2963{
2964 int ioapic;
2832 2965
2833 /* 2966 /*
2834 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 2967 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
2835 */ 2968 */
2836 io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL; 2969 io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
2837 2970
2838 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 2971 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
2972 for_each_ioapic(ioapic)
2973 BUG_ON(mp_irqdomain_create(ioapic));
2974
2839 /* 2975 /*
2840 * Set up IO-APIC IRQ routing. 2976 * Set up IO-APIC IRQ routing.
2841 */ 2977 */
@@ -2844,8 +2980,10 @@ void __init setup_IO_APIC(void)
2844 sync_Arb_IDs(); 2980 sync_Arb_IDs();
2845 setup_IO_APIC_irqs(); 2981 setup_IO_APIC_irqs();
2846 init_IO_APIC_traps(); 2982 init_IO_APIC_traps();
2847 if (legacy_pic->nr_legacy_irqs) 2983 if (nr_legacy_irqs())
2848 check_timer(); 2984 check_timer();
2985
2986 ioapic_initialized = 1;
2849} 2987}
2850 2988
2851/* 2989/*
@@ -2880,7 +3018,7 @@ static void ioapic_resume(void)
2880{ 3018{
2881 int ioapic_idx; 3019 int ioapic_idx;
2882 3020
2883 for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--) 3021 for_each_ioapic_reverse(ioapic_idx)
2884 resume_ioapic_id(ioapic_idx); 3022 resume_ioapic_id(ioapic_idx);
2885 3023
2886 restore_ioapic_entries(); 3024 restore_ioapic_entries();
@@ -2926,7 +3064,7 @@ int arch_setup_hwirq(unsigned int irq, int node)
2926 3064
2927void arch_teardown_hwirq(unsigned int irq) 3065void arch_teardown_hwirq(unsigned int irq)
2928{ 3066{
2929 struct irq_cfg *cfg = irq_get_chip_data(irq); 3067 struct irq_cfg *cfg = irq_cfg(irq);
2930 unsigned long flags; 3068 unsigned long flags;
2931 3069
2932 free_remapped_irq(irq); 3070 free_remapped_irq(irq);
@@ -3053,7 +3191,7 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
3053 if (!irq_offset) 3191 if (!irq_offset)
3054 write_msi_msg(irq, &msg); 3192 write_msi_msg(irq, &msg);
3055 3193
3056 setup_remapped_irq(irq, irq_get_chip_data(irq), chip); 3194 setup_remapped_irq(irq, irq_cfg(irq), chip);
3057 3195
3058 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3196 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3059 3197
@@ -3192,7 +3330,7 @@ int default_setup_hpet_msi(unsigned int irq, unsigned int id)
3192 3330
3193 hpet_msi_write(irq_get_handler_data(irq), &msg); 3331 hpet_msi_write(irq_get_handler_data(irq), &msg);
3194 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3332 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3195 setup_remapped_irq(irq, irq_get_chip_data(irq), chip); 3333 setup_remapped_irq(irq, irq_cfg(irq), chip);
3196 3334
3197 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3335 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3198 return 0; 3336 return 0;
@@ -3303,27 +3441,6 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3303 return ret; 3441 return ret;
3304} 3442}
3305 3443
3306int io_apic_setup_irq_pin_once(unsigned int irq, int node,
3307 struct io_apic_irq_attr *attr)
3308{
3309 unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
3310 int ret;
3311 struct IO_APIC_route_entry orig_entry;
3312
3313 /* Avoid redundant programming */
3314 if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
3315 pr_debug("Pin %d-%d already programmed\n", mpc_ioapic_id(ioapic_idx), pin);
3316 orig_entry = ioapic_read_entry(attr->ioapic, pin);
3317 if (attr->trigger == orig_entry.trigger && attr->polarity == orig_entry.polarity)
3318 return 0;
3319 return -EBUSY;
3320 }
3321 ret = io_apic_setup_irq_pin(irq, node, attr);
3322 if (!ret)
3323 set_bit(pin, ioapics[ioapic_idx].pin_programmed);
3324 return ret;
3325}
3326
3327static int __init io_apic_get_redir_entries(int ioapic) 3444static int __init io_apic_get_redir_entries(int ioapic)
3328{ 3445{
3329 union IO_APIC_reg_01 reg_01; 3446 union IO_APIC_reg_01 reg_01;
@@ -3340,20 +3457,13 @@ static int __init io_apic_get_redir_entries(int ioapic)
3340 return reg_01.bits.entries + 1; 3457 return reg_01.bits.entries + 1;
3341} 3458}
3342 3459
3343static void __init probe_nr_irqs_gsi(void)
3344{
3345 int nr;
3346
3347 nr = gsi_top + NR_IRQS_LEGACY;
3348 if (nr > nr_irqs_gsi)
3349 nr_irqs_gsi = nr;
3350
3351 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
3352}
3353
3354unsigned int arch_dynirq_lower_bound(unsigned int from) 3460unsigned int arch_dynirq_lower_bound(unsigned int from)
3355{ 3461{
3356 return from < nr_irqs_gsi ? nr_irqs_gsi : from; 3462 /*
3463 * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
3464 * gsi_top if ioapic_dynirq_base hasn't been initialized yet.
3465 */
3466 return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
3357} 3467}
3358 3468
3359int __init arch_probe_nr_irqs(void) 3469int __init arch_probe_nr_irqs(void)
@@ -3363,33 +3473,17 @@ int __init arch_probe_nr_irqs(void)
3363 if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) 3473 if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
3364 nr_irqs = NR_VECTORS * nr_cpu_ids; 3474 nr_irqs = NR_VECTORS * nr_cpu_ids;
3365 3475
3366 nr = nr_irqs_gsi + 8 * nr_cpu_ids; 3476 nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
3367#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) 3477#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
3368 /* 3478 /*
3369 * for MSI and HT dyn irq 3479 * for MSI and HT dyn irq
3370 */ 3480 */
3371 nr += nr_irqs_gsi * 16; 3481 nr += gsi_top * 16;
3372#endif 3482#endif
3373 if (nr < nr_irqs) 3483 if (nr < nr_irqs)
3374 nr_irqs = nr; 3484 nr_irqs = nr;
3375 3485
3376 return NR_IRQS_LEGACY; 3486 return 0;
3377}
3378
3379int io_apic_set_pci_routing(struct device *dev, int irq,
3380 struct io_apic_irq_attr *irq_attr)
3381{
3382 int node;
3383
3384 if (!IO_APIC_IRQ(irq)) {
3385 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3386 irq_attr->ioapic);
3387 return -EINVAL;
3388 }
3389
3390 node = dev ? dev_to_node(dev) : cpu_to_node(0);
3391
3392 return io_apic_setup_irq_pin_once(irq, node, irq_attr);
3393} 3487}
3394 3488
3395#ifdef CONFIG_X86_32 3489#ifdef CONFIG_X86_32
@@ -3483,9 +3577,8 @@ static u8 __init io_apic_unique_id(u8 id)
3483 DECLARE_BITMAP(used, 256); 3577 DECLARE_BITMAP(used, 256);
3484 3578
3485 bitmap_zero(used, 256); 3579 bitmap_zero(used, 256);
3486 for (i = 0; i < nr_ioapics; i++) { 3580 for_each_ioapic(i)
3487 __set_bit(mpc_ioapic_id(i), used); 3581 __set_bit(mpc_ioapic_id(i), used);
3488 }
3489 if (!test_bit(id, used)) 3582 if (!test_bit(id, used))
3490 return id; 3583 return id;
3491 return find_first_zero_bit(used, 256); 3584 return find_first_zero_bit(used, 256);
@@ -3543,14 +3636,13 @@ void __init setup_ioapic_dest(void)
3543 if (skip_ioapic_setup == 1) 3636 if (skip_ioapic_setup == 1)
3544 return; 3637 return;
3545 3638
3546 for (ioapic = 0; ioapic < nr_ioapics; ioapic++) 3639 for_each_ioapic_pin(ioapic, pin) {
3547 for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) {
3548 irq_entry = find_irq_entry(ioapic, pin, mp_INT); 3640 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
3549 if (irq_entry == -1) 3641 if (irq_entry == -1)
3550 continue; 3642 continue;
3551 irq = pin_2_irq(irq_entry, ioapic, pin);
3552 3643
3553 if ((ioapic > 0) && (irq > 16)) 3644 irq = pin_2_irq(irq_entry, ioapic, pin, 0);
3645 if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
3554 continue; 3646 continue;
3555 3647
3556 idata = irq_get_irq_data(irq); 3648 idata = irq_get_irq_data(irq);
@@ -3573,29 +3665,33 @@ void __init setup_ioapic_dest(void)
3573 3665
3574static struct resource *ioapic_resources; 3666static struct resource *ioapic_resources;
3575 3667
3576static struct resource * __init ioapic_setup_resources(int nr_ioapics) 3668static struct resource * __init ioapic_setup_resources(void)
3577{ 3669{
3578 unsigned long n; 3670 unsigned long n;
3579 struct resource *res; 3671 struct resource *res;
3580 char *mem; 3672 char *mem;
3581 int i; 3673 int i, num = 0;
3582 3674
3583 if (nr_ioapics <= 0) 3675 for_each_ioapic(i)
3676 num++;
3677 if (num == 0)
3584 return NULL; 3678 return NULL;
3585 3679
3586 n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); 3680 n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
3587 n *= nr_ioapics; 3681 n *= num;
3588 3682
3589 mem = alloc_bootmem(n); 3683 mem = alloc_bootmem(n);
3590 res = (void *)mem; 3684 res = (void *)mem;
3591 3685
3592 mem += sizeof(struct resource) * nr_ioapics; 3686 mem += sizeof(struct resource) * num;
3593 3687
3594 for (i = 0; i < nr_ioapics; i++) { 3688 num = 0;
3595 res[i].name = mem; 3689 for_each_ioapic(i) {
3596 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; 3690 res[num].name = mem;
3691 res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
3597 snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); 3692 snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
3598 mem += IOAPIC_RESOURCE_NAME_SIZE; 3693 mem += IOAPIC_RESOURCE_NAME_SIZE;
3694 num++;
3599 } 3695 }
3600 3696
3601 ioapic_resources = res; 3697 ioapic_resources = res;
@@ -3609,8 +3705,8 @@ void __init native_io_apic_init_mappings(void)
3609 struct resource *ioapic_res; 3705 struct resource *ioapic_res;
3610 int i; 3706 int i;
3611 3707
3612 ioapic_res = ioapic_setup_resources(nr_ioapics); 3708 ioapic_res = ioapic_setup_resources();
3613 for (i = 0; i < nr_ioapics; i++) { 3709 for_each_ioapic(i) {
3614 if (smp_found_config) { 3710 if (smp_found_config) {
3615 ioapic_phys = mpc_ioapic_addr(i); 3711 ioapic_phys = mpc_ioapic_addr(i);
3616#ifdef CONFIG_X86_32 3712#ifdef CONFIG_X86_32
@@ -3641,8 +3737,6 @@ fake_ioapic_page:
3641 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; 3737 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
3642 ioapic_res++; 3738 ioapic_res++;
3643 } 3739 }
3644
3645 probe_nr_irqs_gsi();
3646} 3740}
3647 3741
3648void __init ioapic_insert_resources(void) 3742void __init ioapic_insert_resources(void)
@@ -3657,7 +3751,7 @@ void __init ioapic_insert_resources(void)
3657 return; 3751 return;
3658 } 3752 }
3659 3753
3660 for (i = 0; i < nr_ioapics; i++) { 3754 for_each_ioapic(i) {
3661 insert_resource(&iomem_resource, r); 3755 insert_resource(&iomem_resource, r);
3662 r++; 3756 r++;
3663 } 3757 }
@@ -3665,16 +3759,15 @@ void __init ioapic_insert_resources(void)
3665 3759
3666int mp_find_ioapic(u32 gsi) 3760int mp_find_ioapic(u32 gsi)
3667{ 3761{
3668 int i = 0; 3762 int i;
3669 3763
3670 if (nr_ioapics == 0) 3764 if (nr_ioapics == 0)
3671 return -1; 3765 return -1;
3672 3766
3673 /* Find the IOAPIC that manages this GSI. */ 3767 /* Find the IOAPIC that manages this GSI. */
3674 for (i = 0; i < nr_ioapics; i++) { 3768 for_each_ioapic(i) {
3675 struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i); 3769 struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
3676 if ((gsi >= gsi_cfg->gsi_base) 3770 if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end)
3677 && (gsi <= gsi_cfg->gsi_end))
3678 return i; 3771 return i;
3679 } 3772 }
3680 3773
@@ -3686,7 +3779,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
3686{ 3779{
3687 struct mp_ioapic_gsi *gsi_cfg; 3780 struct mp_ioapic_gsi *gsi_cfg;
3688 3781
3689 if (WARN_ON(ioapic == -1)) 3782 if (WARN_ON(ioapic < 0))
3690 return -1; 3783 return -1;
3691 3784
3692 gsi_cfg = mp_ioapic_gsi_routing(ioapic); 3785 gsi_cfg = mp_ioapic_gsi_routing(ioapic);
@@ -3729,7 +3822,8 @@ static __init int bad_ioapic_register(int idx)
3729 return 0; 3822 return 0;
3730} 3823}
3731 3824
3732void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) 3825void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
3826 struct ioapic_domain_cfg *cfg)
3733{ 3827{
3734 int idx = 0; 3828 int idx = 0;
3735 int entries; 3829 int entries;
@@ -3743,6 +3837,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
3743 ioapics[idx].mp_config.type = MP_IOAPIC; 3837 ioapics[idx].mp_config.type = MP_IOAPIC;
3744 ioapics[idx].mp_config.flags = MPC_APIC_USABLE; 3838 ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
3745 ioapics[idx].mp_config.apicaddr = address; 3839 ioapics[idx].mp_config.apicaddr = address;
3840 ioapics[idx].irqdomain = NULL;
3841 ioapics[idx].irqdomain_cfg = *cfg;
3746 3842
3747 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 3843 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
3748 3844
@@ -3779,6 +3875,77 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
3779 nr_ioapics++; 3875 nr_ioapics++;
3780} 3876}
3781 3877
3878int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
3879 irq_hw_number_t hwirq)
3880{
3881 int ioapic = (int)(long)domain->host_data;
3882 struct mp_pin_info *info = mp_pin_info(ioapic, hwirq);
3883 struct io_apic_irq_attr attr;
3884
3885 /* Get default attribute if not set by caller yet */
3886 if (!info->set) {
3887 u32 gsi = mp_pin_to_gsi(ioapic, hwirq);
3888
3889 if (acpi_get_override_irq(gsi, &info->trigger,
3890 &info->polarity) < 0) {
3891 /*
3892 * PCI interrupts are always polarity one level
3893 * triggered.
3894 */
3895 info->trigger = 1;
3896 info->polarity = 1;
3897 }
3898 info->node = NUMA_NO_NODE;
3899 info->set = 1;
3900 }
3901 set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger,
3902 info->polarity);
3903
3904 return io_apic_setup_irq_pin(virq, info->node, &attr);
3905}
3906
3907void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq)
3908{
3909 struct irq_data *data = irq_get_irq_data(virq);
3910 struct irq_cfg *cfg = irq_cfg(virq);
3911 int ioapic = (int)(long)domain->host_data;
3912 int pin = (int)data->hwirq;
3913
3914 ioapic_mask_entry(ioapic, pin);
3915 __remove_pin_from_irq(cfg, ioapic, pin);
3916 WARN_ON(cfg->irq_2_pin != NULL);
3917 arch_teardown_hwirq(virq);
3918}
3919
3920int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node)
3921{
3922 int ret = 0;
3923 int ioapic, pin;
3924 struct mp_pin_info *info;
3925
3926 ioapic = mp_find_ioapic(gsi);
3927 if (ioapic < 0)
3928 return -ENODEV;
3929
3930 pin = mp_find_ioapic_pin(ioapic, gsi);
3931 info = mp_pin_info(ioapic, pin);
3932 trigger = trigger ? 1 : 0;
3933 polarity = polarity ? 1 : 0;
3934
3935 mutex_lock(&ioapic_mutex);
3936 if (!info->set) {
3937 info->trigger = trigger;
3938 info->polarity = polarity;
3939 info->node = node;
3940 info->set = 1;
3941 } else if (info->trigger != trigger || info->polarity != polarity) {
3942 ret = -EBUSY;
3943 }
3944 mutex_unlock(&ioapic_mutex);
3945
3946 return ret;
3947}
3948
3782/* Enable IOAPIC early just for system timer */ 3949/* Enable IOAPIC early just for system timer */
3783void __init pre_init_apic_IRQ0(void) 3950void __init pre_init_apic_IRQ0(void)
3784{ 3951{
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index cceb352c968c..bda488680dbc 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -88,21 +88,16 @@ static struct apic apic_default = {
88 .disable_esr = 0, 88 .disable_esr = 0,
89 .dest_logical = APIC_DEST_LOGICAL, 89 .dest_logical = APIC_DEST_LOGICAL,
90 .check_apicid_used = default_check_apicid_used, 90 .check_apicid_used = default_check_apicid_used,
91 .check_apicid_present = default_check_apicid_present,
92 91
93 .vector_allocation_domain = flat_vector_allocation_domain, 92 .vector_allocation_domain = flat_vector_allocation_domain,
94 .init_apic_ldr = default_init_apic_ldr, 93 .init_apic_ldr = default_init_apic_ldr,
95 94
96 .ioapic_phys_id_map = default_ioapic_phys_id_map, 95 .ioapic_phys_id_map = default_ioapic_phys_id_map,
97 .setup_apic_routing = setup_apic_flat_routing, 96 .setup_apic_routing = setup_apic_flat_routing,
98 .multi_timer_check = NULL,
99 .cpu_present_to_apicid = default_cpu_present_to_apicid, 97 .cpu_present_to_apicid = default_cpu_present_to_apicid,
100 .apicid_to_cpu_present = physid_set_mask_of_physid, 98 .apicid_to_cpu_present = physid_set_mask_of_physid,
101 .setup_portio_remap = NULL,
102 .check_phys_apicid_present = default_check_phys_apicid_present, 99 .check_phys_apicid_present = default_check_phys_apicid_present,
103 .enable_apic_mode = NULL,
104 .phys_pkg_id = default_phys_pkg_id, 100 .phys_pkg_id = default_phys_pkg_id,
105 .mps_oem_check = NULL,
106 101
107 .get_apic_id = default_get_apic_id, 102 .get_apic_id = default_get_apic_id,
108 .set_apic_id = NULL, 103 .set_apic_id = NULL,
@@ -116,11 +111,7 @@ static struct apic apic_default = {
116 .send_IPI_all = default_send_IPI_all, 111 .send_IPI_all = default_send_IPI_all,
117 .send_IPI_self = default_send_IPI_self, 112 .send_IPI_self = default_send_IPI_self,
118 113
119 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
120 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
121
122 .wait_for_init_deassert = true, 114 .wait_for_init_deassert = true,
123 .smp_callin_clear_local_apic = NULL,
124 .inquire_remote_apic = default_inquire_remote_apic, 115 .inquire_remote_apic = default_inquire_remote_apic,
125 116
126 .read = native_apic_mem_read, 117 .read = native_apic_mem_read,
@@ -214,29 +205,7 @@ void __init generic_apic_probe(void)
214 printk(KERN_INFO "Using APIC driver %s\n", apic->name); 205 printk(KERN_INFO "Using APIC driver %s\n", apic->name);
215} 206}
216 207
217/* These functions can switch the APIC even after the initial ->probe() */ 208/* This function can switch the APIC even after the initial ->probe() */
218
219int __init
220generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
221{
222 struct apic **drv;
223
224 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
225 if (!((*drv)->mps_oem_check))
226 continue;
227 if (!(*drv)->mps_oem_check(mpc, oem, productid))
228 continue;
229
230 if (!cmdline_apic) {
231 apic = *drv;
232 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
233 apic->name);
234 }
235 return 1;
236 }
237 return 0;
238}
239
240int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 209int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
241{ 210{
242 struct apic **drv; 211 struct apic **drv;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index e66766bf1641..6ce600f9bc78 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -249,21 +249,16 @@ static struct apic apic_x2apic_cluster = {
249 .disable_esr = 0, 249 .disable_esr = 0,
250 .dest_logical = APIC_DEST_LOGICAL, 250 .dest_logical = APIC_DEST_LOGICAL,
251 .check_apicid_used = NULL, 251 .check_apicid_used = NULL,
252 .check_apicid_present = NULL,
253 252
254 .vector_allocation_domain = cluster_vector_allocation_domain, 253 .vector_allocation_domain = cluster_vector_allocation_domain,
255 .init_apic_ldr = init_x2apic_ldr, 254 .init_apic_ldr = init_x2apic_ldr,
256 255
257 .ioapic_phys_id_map = NULL, 256 .ioapic_phys_id_map = NULL,
258 .setup_apic_routing = NULL, 257 .setup_apic_routing = NULL,
259 .multi_timer_check = NULL,
260 .cpu_present_to_apicid = default_cpu_present_to_apicid, 258 .cpu_present_to_apicid = default_cpu_present_to_apicid,
261 .apicid_to_cpu_present = NULL, 259 .apicid_to_cpu_present = NULL,
262 .setup_portio_remap = NULL,
263 .check_phys_apicid_present = default_check_phys_apicid_present, 260 .check_phys_apicid_present = default_check_phys_apicid_present,
264 .enable_apic_mode = NULL,
265 .phys_pkg_id = x2apic_phys_pkg_id, 261 .phys_pkg_id = x2apic_phys_pkg_id,
266 .mps_oem_check = NULL,
267 262
268 .get_apic_id = x2apic_get_apic_id, 263 .get_apic_id = x2apic_get_apic_id,
269 .set_apic_id = x2apic_set_apic_id, 264 .set_apic_id = x2apic_set_apic_id,
@@ -277,10 +272,7 @@ static struct apic apic_x2apic_cluster = {
277 .send_IPI_all = x2apic_send_IPI_all, 272 .send_IPI_all = x2apic_send_IPI_all,
278 .send_IPI_self = x2apic_send_IPI_self, 273 .send_IPI_self = x2apic_send_IPI_self,
279 274
280 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
281 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
282 .wait_for_init_deassert = false, 275 .wait_for_init_deassert = false,
283 .smp_callin_clear_local_apic = NULL,
284 .inquire_remote_apic = NULL, 276 .inquire_remote_apic = NULL,
285 277
286 .read = native_apic_msr_read, 278 .read = native_apic_msr_read,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 6d600ebf6c12..6fae733e9194 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -103,21 +103,16 @@ static struct apic apic_x2apic_phys = {
103 .disable_esr = 0, 103 .disable_esr = 0,
104 .dest_logical = 0, 104 .dest_logical = 0,
105 .check_apicid_used = NULL, 105 .check_apicid_used = NULL,
106 .check_apicid_present = NULL,
107 106
108 .vector_allocation_domain = default_vector_allocation_domain, 107 .vector_allocation_domain = default_vector_allocation_domain,
109 .init_apic_ldr = init_x2apic_ldr, 108 .init_apic_ldr = init_x2apic_ldr,
110 109
111 .ioapic_phys_id_map = NULL, 110 .ioapic_phys_id_map = NULL,
112 .setup_apic_routing = NULL, 111 .setup_apic_routing = NULL,
113 .multi_timer_check = NULL,
114 .cpu_present_to_apicid = default_cpu_present_to_apicid, 112 .cpu_present_to_apicid = default_cpu_present_to_apicid,
115 .apicid_to_cpu_present = NULL, 113 .apicid_to_cpu_present = NULL,
116 .setup_portio_remap = NULL,
117 .check_phys_apicid_present = default_check_phys_apicid_present, 114 .check_phys_apicid_present = default_check_phys_apicid_present,
118 .enable_apic_mode = NULL,
119 .phys_pkg_id = x2apic_phys_pkg_id, 115 .phys_pkg_id = x2apic_phys_pkg_id,
120 .mps_oem_check = NULL,
121 116
122 .get_apic_id = x2apic_get_apic_id, 117 .get_apic_id = x2apic_get_apic_id,
123 .set_apic_id = x2apic_set_apic_id, 118 .set_apic_id = x2apic_set_apic_id,
@@ -131,10 +126,7 @@ static struct apic apic_x2apic_phys = {
131 .send_IPI_all = x2apic_send_IPI_all, 126 .send_IPI_all = x2apic_send_IPI_all,
132 .send_IPI_self = x2apic_send_IPI_self, 127 .send_IPI_self = x2apic_send_IPI_self,
133 128
134 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
135 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
136 .wait_for_init_deassert = false, 129 .wait_for_init_deassert = false,
137 .smp_callin_clear_local_apic = NULL,
138 .inquire_remote_apic = NULL, 130 .inquire_remote_apic = NULL,
139 131
140 .read = native_apic_msr_read, 132 .read = native_apic_msr_read,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 293b41df54ef..004f017aa7b9 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -365,21 +365,16 @@ static struct apic __refdata apic_x2apic_uv_x = {
365 .disable_esr = 0, 365 .disable_esr = 0,
366 .dest_logical = APIC_DEST_LOGICAL, 366 .dest_logical = APIC_DEST_LOGICAL,
367 .check_apicid_used = NULL, 367 .check_apicid_used = NULL,
368 .check_apicid_present = NULL,
369 368
370 .vector_allocation_domain = default_vector_allocation_domain, 369 .vector_allocation_domain = default_vector_allocation_domain,
371 .init_apic_ldr = uv_init_apic_ldr, 370 .init_apic_ldr = uv_init_apic_ldr,
372 371
373 .ioapic_phys_id_map = NULL, 372 .ioapic_phys_id_map = NULL,
374 .setup_apic_routing = NULL, 373 .setup_apic_routing = NULL,
375 .multi_timer_check = NULL,
376 .cpu_present_to_apicid = default_cpu_present_to_apicid, 374 .cpu_present_to_apicid = default_cpu_present_to_apicid,
377 .apicid_to_cpu_present = NULL, 375 .apicid_to_cpu_present = NULL,
378 .setup_portio_remap = NULL,
379 .check_phys_apicid_present = default_check_phys_apicid_present, 376 .check_phys_apicid_present = default_check_phys_apicid_present,
380 .enable_apic_mode = NULL,
381 .phys_pkg_id = uv_phys_pkg_id, 377 .phys_pkg_id = uv_phys_pkg_id,
382 .mps_oem_check = NULL,
383 378
384 .get_apic_id = x2apic_get_apic_id, 379 .get_apic_id = x2apic_get_apic_id,
385 .set_apic_id = set_apic_id, 380 .set_apic_id = set_apic_id,
@@ -394,10 +389,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
394 .send_IPI_self = uv_send_IPI_self, 389 .send_IPI_self = uv_send_IPI_self,
395 390
396 .wakeup_secondary_cpu = uv_wakeup_secondary, 391 .wakeup_secondary_cpu = uv_wakeup_secondary,
397 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
398 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
399 .wait_for_init_deassert = false, 392 .wait_for_init_deassert = false,
400 .smp_callin_clear_local_apic = NULL,
401 .inquire_remote_apic = NULL, 393 .inquire_remote_apic = NULL,
402 394
403 .read = native_apic_msr_read, 395 .read = native_apic_msr_read,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 333fd5209336..e4ab2b42bd6f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -148,6 +148,7 @@ static int __init x86_xsave_setup(char *s)
148{ 148{
149 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 149 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
150 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 150 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
151 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
151 setup_clear_cpu_cap(X86_FEATURE_AVX); 152 setup_clear_cpu_cap(X86_FEATURE_AVX);
152 setup_clear_cpu_cap(X86_FEATURE_AVX2); 153 setup_clear_cpu_cap(X86_FEATURE_AVX2);
153 return 1; 154 return 1;
@@ -161,6 +162,13 @@ static int __init x86_xsaveopt_setup(char *s)
161} 162}
162__setup("noxsaveopt", x86_xsaveopt_setup); 163__setup("noxsaveopt", x86_xsaveopt_setup);
163 164
165static int __init x86_xsaves_setup(char *s)
166{
167 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
168 return 1;
169}
170__setup("noxsaves", x86_xsaves_setup);
171
164#ifdef CONFIG_X86_32 172#ifdef CONFIG_X86_32
165static int cachesize_override = -1; 173static int cachesize_override = -1;
166static int disable_x86_serial_nr = 1; 174static int disable_x86_serial_nr = 1;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 9c8f7394c612..c7035073dfc1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -461,7 +461,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
461 461
462 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); 462 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
463 463
464 if (strict_strtoul(buf, 10, &val) < 0) 464 if (kstrtoul(buf, 10, &val) < 0)
465 return -EINVAL; 465 return -EINVAL;
466 466
467 err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); 467 err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
@@ -511,7 +511,7 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
511 if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) 511 if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
512 return -EINVAL; 512 return -EINVAL;
513 513
514 if (strict_strtoul(buf, 16, &val) < 0) 514 if (kstrtoul(buf, 16, &val) < 0)
515 return -EINVAL; 515 return -EINVAL;
516 516
517 if (amd_set_subcaches(cpu, val)) 517 if (amd_set_subcaches(cpu, val))
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 4fc57975acc1..bd9ccda8087f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2136,7 +2136,7 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
2136{ 2136{
2137 u64 new; 2137 u64 new;
2138 2138
2139 if (strict_strtoull(buf, 0, &new) < 0) 2139 if (kstrtou64(buf, 0, &new) < 0)
2140 return -EINVAL; 2140 return -EINVAL;
2141 2141
2142 attr_to_bank(attr)->ctl = new; 2142 attr_to_bank(attr)->ctl = new;
@@ -2174,7 +2174,7 @@ static ssize_t set_ignore_ce(struct device *s,
2174{ 2174{
2175 u64 new; 2175 u64 new;
2176 2176
2177 if (strict_strtoull(buf, 0, &new) < 0) 2177 if (kstrtou64(buf, 0, &new) < 0)
2178 return -EINVAL; 2178 return -EINVAL;
2179 2179
2180 if (mca_cfg.ignore_ce ^ !!new) { 2180 if (mca_cfg.ignore_ce ^ !!new) {
@@ -2198,7 +2198,7 @@ static ssize_t set_cmci_disabled(struct device *s,
2198{ 2198{
2199 u64 new; 2199 u64 new;
2200 2200
2201 if (strict_strtoull(buf, 0, &new) < 0) 2201 if (kstrtou64(buf, 0, &new) < 0)
2202 return -EINVAL; 2202 return -EINVAL;
2203 2203
2204 if (mca_cfg.cmci_disabled ^ !!new) { 2204 if (mca_cfg.cmci_disabled ^ !!new) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 603df4f74640..1e49f8f41276 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -353,7 +353,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
353 if (!b->interrupt_capable) 353 if (!b->interrupt_capable)
354 return -EINVAL; 354 return -EINVAL;
355 355
356 if (strict_strtoul(buf, 0, &new) < 0) 356 if (kstrtoul(buf, 0, &new) < 0)
357 return -EINVAL; 357 return -EINVAL;
358 358
359 b->interrupt_enable = !!new; 359 b->interrupt_enable = !!new;
@@ -372,7 +372,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
372 struct thresh_restart tr; 372 struct thresh_restart tr;
373 unsigned long new; 373 unsigned long new;
374 374
375 if (strict_strtoul(buf, 0, &new) < 0) 375 if (kstrtoul(buf, 0, &new) < 0)
376 return -EINVAL; 376 return -EINVAL;
377 377
378 if (new > THRESHOLD_MAX) 378 if (new > THRESHOLD_MAX)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 9a316b21df8b..3bdb95ae8c43 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -42,7 +42,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
42 * cmci_discover_lock protects against parallel discovery attempts 42 * cmci_discover_lock protects against parallel discovery attempts
43 * which could race against each other. 43 * which could race against each other.
44 */ 44 */
45static DEFINE_SPINLOCK(cmci_discover_lock); 45static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
46 46
47#define CMCI_THRESHOLD 1 47#define CMCI_THRESHOLD 1
48#define CMCI_POLL_INTERVAL (30 * HZ) 48#define CMCI_POLL_INTERVAL (30 * HZ)
@@ -144,14 +144,14 @@ static void cmci_storm_disable_banks(void)
144 int bank; 144 int bank;
145 u64 val; 145 u64 val;
146 146
147 spin_lock_irqsave(&cmci_discover_lock, flags); 147 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
148 owned = __get_cpu_var(mce_banks_owned); 148 owned = __get_cpu_var(mce_banks_owned);
149 for_each_set_bit(bank, owned, MAX_NR_BANKS) { 149 for_each_set_bit(bank, owned, MAX_NR_BANKS) {
150 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 150 rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
151 val &= ~MCI_CTL2_CMCI_EN; 151 val &= ~MCI_CTL2_CMCI_EN;
152 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 152 wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
153 } 153 }
154 spin_unlock_irqrestore(&cmci_discover_lock, flags); 154 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
155} 155}
156 156
157static bool cmci_storm_detect(void) 157static bool cmci_storm_detect(void)
@@ -211,7 +211,7 @@ static void cmci_discover(int banks)
211 int i; 211 int i;
212 int bios_wrong_thresh = 0; 212 int bios_wrong_thresh = 0;
213 213
214 spin_lock_irqsave(&cmci_discover_lock, flags); 214 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
215 for (i = 0; i < banks; i++) { 215 for (i = 0; i < banks; i++) {
216 u64 val; 216 u64 val;
217 int bios_zero_thresh = 0; 217 int bios_zero_thresh = 0;
@@ -266,7 +266,7 @@ static void cmci_discover(int banks)
266 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); 266 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
267 } 267 }
268 } 268 }
269 spin_unlock_irqrestore(&cmci_discover_lock, flags); 269 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
270 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { 270 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
271 pr_info_once( 271 pr_info_once(
272 "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 272 "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
@@ -316,10 +316,10 @@ void cmci_clear(void)
316 316
317 if (!cmci_supported(&banks)) 317 if (!cmci_supported(&banks))
318 return; 318 return;
319 spin_lock_irqsave(&cmci_discover_lock, flags); 319 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
320 for (i = 0; i < banks; i++) 320 for (i = 0; i < banks; i++)
321 __cmci_disable_bank(i); 321 __cmci_disable_bank(i);
322 spin_unlock_irqrestore(&cmci_discover_lock, flags); 322 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
323} 323}
324 324
325static void cmci_rediscover_work_func(void *arg) 325static void cmci_rediscover_work_func(void *arg)
@@ -360,9 +360,9 @@ void cmci_disable_bank(int bank)
360 if (!cmci_supported(&banks)) 360 if (!cmci_supported(&banks))
361 return; 361 return;
362 362
363 spin_lock_irqsave(&cmci_discover_lock, flags); 363 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
364 __cmci_disable_bank(bank); 364 __cmci_disable_bank(bank);
365 spin_unlock_irqrestore(&cmci_discover_lock, flags); 365 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
366} 366}
367 367
368static void intel_init_cmci(void) 368static void intel_init_cmci(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index cfc6f9dfcd90..0939f86f543d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -945,7 +945,7 @@ static struct intel_uncore_type *snbep_pci_uncores[] = {
945 NULL, 945 NULL,
946}; 946};
947 947
948static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { 948static const struct pci_device_id snbep_uncore_pci_ids[] = {
949 { /* Home Agent */ 949 { /* Home Agent */
950 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), 950 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
951 .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0), 951 .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
@@ -1510,7 +1510,7 @@ static struct intel_uncore_type *ivt_pci_uncores[] = {
1510 NULL, 1510 NULL,
1511}; 1511};
1512 1512
1513static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { 1513static const struct pci_device_id ivt_uncore_pci_ids[] = {
1514 { /* Home Agent 0 */ 1514 { /* Home Agent 0 */
1515 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30), 1515 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
1516 .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0), 1516 .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0),
@@ -1985,7 +1985,7 @@ static struct intel_uncore_type *snb_pci_uncores[] = {
1985 NULL, 1985 NULL,
1986}; 1986};
1987 1987
1988static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = { 1988static const struct pci_device_id snb_uncore_pci_ids[] = {
1989 { /* IMC */ 1989 { /* IMC */
1990 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), 1990 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
1991 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 1991 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
@@ -1993,7 +1993,7 @@ static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = {
1993 { /* end: all zeroes */ }, 1993 { /* end: all zeroes */ },
1994}; 1994};
1995 1995
1996static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = { 1996static const struct pci_device_id ivb_uncore_pci_ids[] = {
1997 { /* IMC */ 1997 { /* IMC */
1998 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), 1998 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
1999 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 1999 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
@@ -2001,7 +2001,7 @@ static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = {
2001 { /* end: all zeroes */ }, 2001 { /* end: all zeroes */ },
2002}; 2002};
2003 2003
2004static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = { 2004static const struct pci_device_id hsw_uncore_pci_ids[] = {
2005 { /* IMC */ 2005 { /* IMC */
2006 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), 2006 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
2007 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), 2007 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 507de8066594..0553a34fa0df 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -4,9 +4,14 @@
4 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) 4 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
5 * 5 *
6 * Copyright (C) IBM Corporation, 2004. All rights reserved. 6 * Copyright (C) IBM Corporation, 2004. All rights reserved.
7 * Copyright (C) Red Hat Inc., 2014. All rights reserved.
8 * Authors:
9 * Vivek Goyal <vgoyal@redhat.com>
7 * 10 *
8 */ 11 */
9 12
13#define pr_fmt(fmt) "kexec: " fmt
14
10#include <linux/types.h> 15#include <linux/types.h>
11#include <linux/kernel.h> 16#include <linux/kernel.h>
12#include <linux/smp.h> 17#include <linux/smp.h>
@@ -16,6 +21,7 @@
16#include <linux/elf.h> 21#include <linux/elf.h>
17#include <linux/elfcore.h> 22#include <linux/elfcore.h>
18#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/slab.h>
19 25
20#include <asm/processor.h> 26#include <asm/processor.h>
21#include <asm/hardirq.h> 27#include <asm/hardirq.h>
@@ -28,6 +34,45 @@
28#include <asm/reboot.h> 34#include <asm/reboot.h>
29#include <asm/virtext.h> 35#include <asm/virtext.h>
30 36
37/* Alignment required for elf header segment */
38#define ELF_CORE_HEADER_ALIGN 4096
39
40/* This primarily represents number of split ranges due to exclusion */
41#define CRASH_MAX_RANGES 16
42
43struct crash_mem_range {
44 u64 start, end;
45};
46
47struct crash_mem {
48 unsigned int nr_ranges;
49 struct crash_mem_range ranges[CRASH_MAX_RANGES];
50};
51
52/* Misc data about ram ranges needed to prepare elf headers */
53struct crash_elf_data {
54 struct kimage *image;
55 /*
56 * Total number of ram ranges we have after various adjustments for
57 * GART, crash reserved region etc.
58 */
59 unsigned int max_nr_ranges;
60 unsigned long gart_start, gart_end;
61
62 /* Pointer to elf header */
63 void *ehdr;
64 /* Pointer to next phdr */
65 void *bufp;
66 struct crash_mem mem;
67};
68
69/* Used while preparing memory map entries for second kernel */
70struct crash_memmap_data {
71 struct boot_params *params;
72 /* Type of memory */
73 unsigned int type;
74};
75
31int in_crash_kexec; 76int in_crash_kexec;
32 77
33/* 78/*
@@ -39,6 +84,7 @@ int in_crash_kexec;
39 */ 84 */
40crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; 85crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
41EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); 86EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
87unsigned long crash_zero_bytes;
42 88
43static inline void cpu_crash_vmclear_loaded_vmcss(void) 89static inline void cpu_crash_vmclear_loaded_vmcss(void)
44{ 90{
@@ -135,3 +181,520 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
135#endif 181#endif
136 crash_save_cpu(regs, safe_smp_processor_id()); 182 crash_save_cpu(regs, safe_smp_processor_id());
137} 183}
184
185#ifdef CONFIG_X86_64
186
187static int get_nr_ram_ranges_callback(unsigned long start_pfn,
188 unsigned long nr_pfn, void *arg)
189{
190 int *nr_ranges = arg;
191
192 (*nr_ranges)++;
193 return 0;
194}
195
196static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
197{
198 struct crash_elf_data *ced = arg;
199
200 ced->gart_start = start;
201 ced->gart_end = end;
202
203 /* Not expecting more than 1 gart aperture */
204 return 1;
205}
206
207
208/* Gather all the required information to prepare elf headers for ram regions */
209static void fill_up_crash_elf_data(struct crash_elf_data *ced,
210 struct kimage *image)
211{
212 unsigned int nr_ranges = 0;
213
214 ced->image = image;
215
216 walk_system_ram_range(0, -1, &nr_ranges,
217 get_nr_ram_ranges_callback);
218
219 ced->max_nr_ranges = nr_ranges;
220
221 /*
222 * We don't create ELF headers for GART aperture as an attempt
223 * to dump this memory in second kernel leads to hang/crash.
224 * If gart aperture is present, one needs to exclude that region
225 * and that could lead to need of extra phdr.
226 */
227 walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
228 ced, get_gart_ranges_callback);
229
230 /*
231 * If we have gart region, excluding that could potentially split
232 * a memory range, resulting in extra header. Account for that.
233 */
234 if (ced->gart_end)
235 ced->max_nr_ranges++;
236
237 /* Exclusion of crash region could split memory ranges */
238 ced->max_nr_ranges++;
239
240 /* If crashk_low_res is not 0, another range split possible */
241 if (crashk_low_res.end != 0)
242 ced->max_nr_ranges++;
243}
244
245static int exclude_mem_range(struct crash_mem *mem,
246 unsigned long long mstart, unsigned long long mend)
247{
248 int i, j;
249 unsigned long long start, end;
250 struct crash_mem_range temp_range = {0, 0};
251
252 for (i = 0; i < mem->nr_ranges; i++) {
253 start = mem->ranges[i].start;
254 end = mem->ranges[i].end;
255
256 if (mstart > end || mend < start)
257 continue;
258
259 /* Truncate any area outside of range */
260 if (mstart < start)
261 mstart = start;
262 if (mend > end)
263 mend = end;
264
265 /* Found completely overlapping range */
266 if (mstart == start && mend == end) {
267 mem->ranges[i].start = 0;
268 mem->ranges[i].end = 0;
269 if (i < mem->nr_ranges - 1) {
270 /* Shift rest of the ranges to left */
271 for (j = i; j < mem->nr_ranges - 1; j++) {
272 mem->ranges[j].start =
273 mem->ranges[j+1].start;
274 mem->ranges[j].end =
275 mem->ranges[j+1].end;
276 }
277 }
278 mem->nr_ranges--;
279 return 0;
280 }
281
282 if (mstart > start && mend < end) {
283 /* Split original range */
284 mem->ranges[i].end = mstart - 1;
285 temp_range.start = mend + 1;
286 temp_range.end = end;
287 } else if (mstart != start)
288 mem->ranges[i].end = mstart - 1;
289 else
290 mem->ranges[i].start = mend + 1;
291 break;
292 }
293
294 /* If a split happend, add the split to array */
295 if (!temp_range.end)
296 return 0;
297
298 /* Split happened */
299 if (i == CRASH_MAX_RANGES - 1) {
300 pr_err("Too many crash ranges after split\n");
301 return -ENOMEM;
302 }
303
304 /* Location where new range should go */
305 j = i + 1;
306 if (j < mem->nr_ranges) {
307 /* Move over all ranges one slot towards the end */
308 for (i = mem->nr_ranges - 1; i >= j; i--)
309 mem->ranges[i + 1] = mem->ranges[i];
310 }
311
312 mem->ranges[j].start = temp_range.start;
313 mem->ranges[j].end = temp_range.end;
314 mem->nr_ranges++;
315 return 0;
316}
317
318/*
319 * Look for any unwanted ranges between mstart, mend and remove them. This
320 * might lead to split and split ranges are put in ced->mem.ranges[] array
321 */
322static int elf_header_exclude_ranges(struct crash_elf_data *ced,
323 unsigned long long mstart, unsigned long long mend)
324{
325 struct crash_mem *cmem = &ced->mem;
326 int ret = 0;
327
328 memset(cmem->ranges, 0, sizeof(cmem->ranges));
329
330 cmem->ranges[0].start = mstart;
331 cmem->ranges[0].end = mend;
332 cmem->nr_ranges = 1;
333
334 /* Exclude crashkernel region */
335 ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
336 if (ret)
337 return ret;
338
339 ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
340 if (ret)
341 return ret;
342
343 /* Exclude GART region */
344 if (ced->gart_end) {
345 ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
346 if (ret)
347 return ret;
348 }
349
350 return ret;
351}
352
353static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
354{
355 struct crash_elf_data *ced = arg;
356 Elf64_Ehdr *ehdr;
357 Elf64_Phdr *phdr;
358 unsigned long mstart, mend;
359 struct kimage *image = ced->image;
360 struct crash_mem *cmem;
361 int ret, i;
362
363 ehdr = ced->ehdr;
364
365 /* Exclude unwanted mem ranges */
366 ret = elf_header_exclude_ranges(ced, start, end);
367 if (ret)
368 return ret;
369
370 /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */
371 cmem = &ced->mem;
372
373 for (i = 0; i < cmem->nr_ranges; i++) {
374 mstart = cmem->ranges[i].start;
375 mend = cmem->ranges[i].end;
376
377 phdr = ced->bufp;
378 ced->bufp += sizeof(Elf64_Phdr);
379
380 phdr->p_type = PT_LOAD;
381 phdr->p_flags = PF_R|PF_W|PF_X;
382 phdr->p_offset = mstart;
383
384 /*
385 * If a range matches backup region, adjust offset to backup
386 * segment.
387 */
388 if (mstart == image->arch.backup_src_start &&
389 (mend - mstart + 1) == image->arch.backup_src_sz)
390 phdr->p_offset = image->arch.backup_load_addr;
391
392 phdr->p_paddr = mstart;
393 phdr->p_vaddr = (unsigned long long) __va(mstart);
394 phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
395 phdr->p_align = 0;
396 ehdr->e_phnum++;
397 pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
398 phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
399 ehdr->e_phnum, phdr->p_offset);
400 }
401
402 return ret;
403}
404
405static int prepare_elf64_headers(struct crash_elf_data *ced,
406 void **addr, unsigned long *sz)
407{
408 Elf64_Ehdr *ehdr;
409 Elf64_Phdr *phdr;
410 unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
411 unsigned char *buf, *bufp;
412 unsigned int cpu;
413 unsigned long long notes_addr;
414 int ret;
415
416 /* extra phdr for vmcoreinfo elf note */
417 nr_phdr = nr_cpus + 1;
418 nr_phdr += ced->max_nr_ranges;
419
420 /*
421 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
422 * area on x86_64 (ffffffff80000000 - ffffffffa0000000).
423 * I think this is required by tools like gdb. So same physical
424 * memory will be mapped in two elf headers. One will contain kernel
425 * text virtual addresses and other will have __va(physical) addresses.
426 */
427
428 nr_phdr++;
429 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
430 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
431
432 buf = vzalloc(elf_sz);
433 if (!buf)
434 return -ENOMEM;
435
436 bufp = buf;
437 ehdr = (Elf64_Ehdr *)bufp;
438 bufp += sizeof(Elf64_Ehdr);
439 memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
440 ehdr->e_ident[EI_CLASS] = ELFCLASS64;
441 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
442 ehdr->e_ident[EI_VERSION] = EV_CURRENT;
443 ehdr->e_ident[EI_OSABI] = ELF_OSABI;
444 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
445 ehdr->e_type = ET_CORE;
446 ehdr->e_machine = ELF_ARCH;
447 ehdr->e_version = EV_CURRENT;
448 ehdr->e_phoff = sizeof(Elf64_Ehdr);
449 ehdr->e_ehsize = sizeof(Elf64_Ehdr);
450 ehdr->e_phentsize = sizeof(Elf64_Phdr);
451
452 /* Prepare one phdr of type PT_NOTE for each present cpu */
453 for_each_present_cpu(cpu) {
454 phdr = (Elf64_Phdr *)bufp;
455 bufp += sizeof(Elf64_Phdr);
456 phdr->p_type = PT_NOTE;
457 notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
458 phdr->p_offset = phdr->p_paddr = notes_addr;
459 phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
460 (ehdr->e_phnum)++;
461 }
462
463 /* Prepare one PT_NOTE header for vmcoreinfo */
464 phdr = (Elf64_Phdr *)bufp;
465 bufp += sizeof(Elf64_Phdr);
466 phdr->p_type = PT_NOTE;
467 phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
468 phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
469 (ehdr->e_phnum)++;
470
471#ifdef CONFIG_X86_64
472 /* Prepare PT_LOAD type program header for kernel text region */
473 phdr = (Elf64_Phdr *)bufp;
474 bufp += sizeof(Elf64_Phdr);
475 phdr->p_type = PT_LOAD;
476 phdr->p_flags = PF_R|PF_W|PF_X;
477 phdr->p_vaddr = (Elf64_Addr)_text;
478 phdr->p_filesz = phdr->p_memsz = _end - _text;
479 phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
480 (ehdr->e_phnum)++;
481#endif
482
483 /* Prepare PT_LOAD headers for system ram chunks. */
484 ced->ehdr = ehdr;
485 ced->bufp = bufp;
486 ret = walk_system_ram_res(0, -1, ced,
487 prepare_elf64_ram_headers_callback);
488 if (ret < 0)
489 return ret;
490
491 *addr = buf;
492 *sz = elf_sz;
493 return 0;
494}
495
496/* Prepare elf headers. Return addr and size */
497static int prepare_elf_headers(struct kimage *image, void **addr,
498 unsigned long *sz)
499{
500 struct crash_elf_data *ced;
501 int ret;
502
503 ced = kzalloc(sizeof(*ced), GFP_KERNEL);
504 if (!ced)
505 return -ENOMEM;
506
507 fill_up_crash_elf_data(ced, image);
508
509 /* By default prepare 64bit headers */
510 ret = prepare_elf64_headers(ced, addr, sz);
511 kfree(ced);
512 return ret;
513}
514
515static int add_e820_entry(struct boot_params *params, struct e820entry *entry)
516{
517 unsigned int nr_e820_entries;
518
519 nr_e820_entries = params->e820_entries;
520 if (nr_e820_entries >= E820MAX)
521 return 1;
522
523 memcpy(&params->e820_map[nr_e820_entries], entry,
524 sizeof(struct e820entry));
525 params->e820_entries++;
526 return 0;
527}
528
529static int memmap_entry_callback(u64 start, u64 end, void *arg)
530{
531 struct crash_memmap_data *cmd = arg;
532 struct boot_params *params = cmd->params;
533 struct e820entry ei;
534
535 ei.addr = start;
536 ei.size = end - start + 1;
537 ei.type = cmd->type;
538 add_e820_entry(params, &ei);
539
540 return 0;
541}
542
543static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
544 unsigned long long mstart,
545 unsigned long long mend)
546{
547 unsigned long start, end;
548 int ret = 0;
549
550 cmem->ranges[0].start = mstart;
551 cmem->ranges[0].end = mend;
552 cmem->nr_ranges = 1;
553
554 /* Exclude Backup region */
555 start = image->arch.backup_load_addr;
556 end = start + image->arch.backup_src_sz - 1;
557 ret = exclude_mem_range(cmem, start, end);
558 if (ret)
559 return ret;
560
561 /* Exclude elf header region */
562 start = image->arch.elf_load_addr;
563 end = start + image->arch.elf_headers_sz - 1;
564 return exclude_mem_range(cmem, start, end);
565}
566
567/* Prepare memory map for crash dump kernel */
568int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
569{
570 int i, ret = 0;
571 unsigned long flags;
572 struct e820entry ei;
573 struct crash_memmap_data cmd;
574 struct crash_mem *cmem;
575
576 cmem = vzalloc(sizeof(struct crash_mem));
577 if (!cmem)
578 return -ENOMEM;
579
580 memset(&cmd, 0, sizeof(struct crash_memmap_data));
581 cmd.params = params;
582
583 /* Add first 640K segment */
584 ei.addr = image->arch.backup_src_start;
585 ei.size = image->arch.backup_src_sz;
586 ei.type = E820_RAM;
587 add_e820_entry(params, &ei);
588
589 /* Add ACPI tables */
590 cmd.type = E820_ACPI;
591 flags = IORESOURCE_MEM | IORESOURCE_BUSY;
592 walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
593 memmap_entry_callback);
594
595 /* Add ACPI Non-volatile Storage */
596 cmd.type = E820_NVS;
597 walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
598 memmap_entry_callback);
599
600 /* Add crashk_low_res region */
601 if (crashk_low_res.end) {
602 ei.addr = crashk_low_res.start;
603 ei.size = crashk_low_res.end - crashk_low_res.start + 1;
604 ei.type = E820_RAM;
605 add_e820_entry(params, &ei);
606 }
607
608 /* Exclude some ranges from crashk_res and add rest to memmap */
609 ret = memmap_exclude_ranges(image, cmem, crashk_res.start,
610 crashk_res.end);
611 if (ret)
612 goto out;
613
614 for (i = 0; i < cmem->nr_ranges; i++) {
615 ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1;
616
617 /* If entry is less than a page, skip it */
618 if (ei.size < PAGE_SIZE)
619 continue;
620 ei.addr = cmem->ranges[i].start;
621 ei.type = E820_RAM;
622 add_e820_entry(params, &ei);
623 }
624
625out:
626 vfree(cmem);
627 return ret;
628}
629
630static int determine_backup_region(u64 start, u64 end, void *arg)
631{
632 struct kimage *image = arg;
633
634 image->arch.backup_src_start = start;
635 image->arch.backup_src_sz = end - start + 1;
636
637 /* Expecting only one range for backup region */
638 return 1;
639}
640
641int crash_load_segments(struct kimage *image)
642{
643 unsigned long src_start, src_sz, elf_sz;
644 void *elf_addr;
645 int ret;
646
647 /*
648 * Determine and load a segment for backup area. First 640K RAM
649 * region is backup source
650 */
651
652 ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
653 image, determine_backup_region);
654
655 /* Zero or postive return values are ok */
656 if (ret < 0)
657 return ret;
658
659 src_start = image->arch.backup_src_start;
660 src_sz = image->arch.backup_src_sz;
661
662 /* Add backup segment. */
663 if (src_sz) {
664 /*
665 * Ideally there is no source for backup segment. This is
666 * copied in purgatory after crash. Just add a zero filled
667 * segment for now to make sure checksum logic works fine.
668 */
669 ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
670 sizeof(crash_zero_bytes), src_sz,
671 PAGE_SIZE, 0, -1, 0,
672 &image->arch.backup_load_addr);
673 if (ret)
674 return ret;
675 pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
676 image->arch.backup_load_addr, src_start, src_sz);
677 }
678
679 /* Prepare elf headers and add a segment */
680 ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
681 if (ret)
682 return ret;
683
684 image->arch.elf_headers = elf_addr;
685 image->arch.elf_headers_sz = elf_sz;
686
687 ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
688 ELF_CORE_HEADER_ALIGN, 0, -1, 0,
689 &image->arch.elf_load_addr);
690 if (ret) {
691 vfree((void *)image->arch.elf_headers);
692 return ret;
693 }
694 pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
695 image->arch.elf_load_addr, elf_sz, elf_sz);
696
697 return ret;
698}
699
700#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 7db54b5d5f86..3d3503351242 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -21,6 +21,7 @@
21#include <asm/apic.h> 21#include <asm/apic.h>
22#include <asm/pci_x86.h> 22#include <asm/pci_x86.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24#include <asm/i8259.h>
24 25
25__initdata u64 initial_dtb; 26__initdata u64 initial_dtb;
26char __initdata cmd_line[COMMAND_LINE_SIZE]; 27char __initdata cmd_line[COMMAND_LINE_SIZE];
@@ -165,82 +166,6 @@ static void __init dtb_lapic_setup(void)
165#ifdef CONFIG_X86_IO_APIC 166#ifdef CONFIG_X86_IO_APIC
166static unsigned int ioapic_id; 167static unsigned int ioapic_id;
167 168
168static void __init dtb_add_ioapic(struct device_node *dn)
169{
170 struct resource r;
171 int ret;
172
173 ret = of_address_to_resource(dn, 0, &r);
174 if (ret) {
175 printk(KERN_ERR "Can't obtain address from node %s.\n",
176 dn->full_name);
177 return;
178 }
179 mp_register_ioapic(++ioapic_id, r.start, gsi_top);
180}
181
182static void __init dtb_ioapic_setup(void)
183{
184 struct device_node *dn;
185
186 for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic")
187 dtb_add_ioapic(dn);
188
189 if (nr_ioapics) {
190 of_ioapic = 1;
191 return;
192 }
193 printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
194}
195#else
196static void __init dtb_ioapic_setup(void) {}
197#endif
198
199static void __init dtb_apic_setup(void)
200{
201 dtb_lapic_setup();
202 dtb_ioapic_setup();
203}
204
205#ifdef CONFIG_OF_FLATTREE
206static void __init x86_flattree_get_config(void)
207{
208 u32 size, map_len;
209 void *dt;
210
211 if (!initial_dtb)
212 return;
213
214 map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
215
216 initial_boot_params = dt = early_memremap(initial_dtb, map_len);
217 size = of_get_flat_dt_size();
218 if (map_len < size) {
219 early_iounmap(dt, map_len);
220 initial_boot_params = dt = early_memremap(initial_dtb, size);
221 map_len = size;
222 }
223
224 unflatten_and_copy_device_tree();
225 early_iounmap(dt, map_len);
226}
227#else
228static inline void x86_flattree_get_config(void) { }
229#endif
230
231void __init x86_dtb_init(void)
232{
233 x86_flattree_get_config();
234
235 if (!of_have_populated_dt())
236 return;
237
238 dtb_setup_hpet();
239 dtb_apic_setup();
240}
241
242#ifdef CONFIG_X86_IO_APIC
243
244struct of_ioapic_type { 169struct of_ioapic_type {
245 u32 out_type; 170 u32 out_type;
246 u32 trigger; 171 u32 trigger;
@@ -276,10 +201,8 @@ static int ioapic_xlate(struct irq_domain *domain,
276 const u32 *intspec, u32 intsize, 201 const u32 *intspec, u32 intsize,
277 irq_hw_number_t *out_hwirq, u32 *out_type) 202 irq_hw_number_t *out_hwirq, u32 *out_type)
278{ 203{
279 struct io_apic_irq_attr attr;
280 struct of_ioapic_type *it; 204 struct of_ioapic_type *it;
281 u32 line, idx; 205 u32 line, idx, gsi;
282 int rc;
283 206
284 if (WARN_ON(intsize < 2)) 207 if (WARN_ON(intsize < 2))
285 return -EINVAL; 208 return -EINVAL;
@@ -291,13 +214,10 @@ static int ioapic_xlate(struct irq_domain *domain,
291 214
292 it = &of_ioapic_type[intspec[1]]; 215 it = &of_ioapic_type[intspec[1]];
293 216
294 idx = (u32) domain->host_data; 217 idx = (u32)(long)domain->host_data;
295 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); 218 gsi = mp_pin_to_gsi(idx, line);
296 219 if (mp_set_gsi_attr(gsi, it->trigger, it->polarity, cpu_to_node(0)))
297 rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line), 220 return -EBUSY;
298 cpu_to_node(0), &attr);
299 if (rc)
300 return rc;
301 221
302 *out_hwirq = line; 222 *out_hwirq = line;
303 *out_type = it->out_type; 223 *out_type = it->out_type;
@@ -305,81 +225,86 @@ static int ioapic_xlate(struct irq_domain *domain,
305} 225}
306 226
307const struct irq_domain_ops ioapic_irq_domain_ops = { 227const struct irq_domain_ops ioapic_irq_domain_ops = {
228 .map = mp_irqdomain_map,
229 .unmap = mp_irqdomain_unmap,
308 .xlate = ioapic_xlate, 230 .xlate = ioapic_xlate,
309}; 231};
310 232
311static void dt_add_ioapic_domain(unsigned int ioapic_num, 233static void __init dtb_add_ioapic(struct device_node *dn)
312 struct device_node *np)
313{ 234{
314 struct irq_domain *id; 235 struct resource r;
315 struct mp_ioapic_gsi *gsi_cfg;
316 int ret; 236 int ret;
317 int num; 237 struct ioapic_domain_cfg cfg = {
318 238 .type = IOAPIC_DOMAIN_DYNAMIC,
319 gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); 239 .ops = &ioapic_irq_domain_ops,
320 num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; 240 .dev = dn,
321 241 };
322 id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, 242
323 (void *)ioapic_num); 243 ret = of_address_to_resource(dn, 0, &r);
324 BUG_ON(!id); 244 if (ret) {
325 if (gsi_cfg->gsi_base == 0) { 245 printk(KERN_ERR "Can't obtain address from node %s.\n",
326 /* 246 dn->full_name);
327 * The first NR_IRQS_LEGACY irq descs are allocated in 247 return;
328 * early_irq_init() and need just a mapping. The
329 * remaining irqs need both. All of them are preallocated
330 * and assigned so we can keep the 1:1 mapping which the ioapic
331 * is having.
332 */
333 irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
334
335 if (num > NR_IRQS_LEGACY) {
336 ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
337 NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
338 if (ret)
339 pr_err("Error creating mapping for the "
340 "remaining IRQs: %d\n", ret);
341 }
342 irq_set_default_host(id);
343 } else {
344 ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
345 if (ret)
346 pr_err("Error creating IRQ mapping: %d\n", ret);
347 } 248 }
249 mp_register_ioapic(++ioapic_id, r.start, gsi_top, &cfg);
348} 250}
349 251
350static void __init ioapic_add_ofnode(struct device_node *np) 252static void __init dtb_ioapic_setup(void)
351{ 253{
352 struct resource r; 254 struct device_node *dn;
353 int i, ret;
354 255
355 ret = of_address_to_resource(np, 0, &r); 256 for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic")
356 if (ret) { 257 dtb_add_ioapic(dn);
357 printk(KERN_ERR "Failed to obtain address for %s\n", 258
358 np->full_name); 259 if (nr_ioapics) {
260 of_ioapic = 1;
359 return; 261 return;
360 } 262 }
263 printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
264}
265#else
266static void __init dtb_ioapic_setup(void) {}
267#endif
361 268
362 for (i = 0; i < nr_ioapics; i++) { 269static void __init dtb_apic_setup(void)
363 if (r.start == mpc_ioapic_addr(i)) { 270{
364 dt_add_ioapic_domain(i, np); 271 dtb_lapic_setup();
365 return; 272 dtb_ioapic_setup();
366 }
367 }
368 printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name);
369} 273}
370 274
371void __init x86_add_irq_domains(void) 275#ifdef CONFIG_OF_FLATTREE
276static void __init x86_flattree_get_config(void)
372{ 277{
373 struct device_node *dp; 278 u32 size, map_len;
279 void *dt;
374 280
375 if (!of_have_populated_dt()) 281 if (!initial_dtb)
376 return; 282 return;
377 283
378 for_each_node_with_property(dp, "interrupt-controller") { 284 map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
379 if (of_device_is_compatible(dp, "intel,ce4100-ioapic")) 285
380 ioapic_add_ofnode(dp); 286 initial_boot_params = dt = early_memremap(initial_dtb, map_len);
287 size = of_get_flat_dt_size();
288 if (map_len < size) {
289 early_iounmap(dt, map_len);
290 initial_boot_params = dt = early_memremap(initial_dtb, size);
291 map_len = size;
381 } 292 }
293
294 unflatten_and_copy_device_tree();
295 early_iounmap(dt, map_len);
382} 296}
383#else 297#else
384void __init x86_add_irq_domains(void) { } 298static inline void x86_flattree_get_config(void) { }
385#endif 299#endif
300
301void __init x86_dtb_init(void)
302{
303 x86_flattree_get_config();
304
305 if (!of_have_populated_dt())
306 return;
307
308 dtb_setup_hpet();
309 dtb_apic_setup();
310}
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index d5dd80814419..a9a4229f6161 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -375,7 +375,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
375 /* 375 /*
376 * These bits must be zero. 376 * These bits must be zero.
377 */ 377 */
378 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; 378 memset(xsave_hdr->reserved, 0, 48);
379 379
380 return ret; 380 return ret;
381} 381}
diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c
index d30acdc1229d..9030e83db6ee 100644
--- a/arch/x86/kernel/iosf_mbi.c
+++ b/arch/x86/kernel/iosf_mbi.c
@@ -202,7 +202,7 @@ static int iosf_mbi_probe(struct pci_dev *pdev,
202 return 0; 202 return 0;
203} 203}
204 204
205static DEFINE_PCI_DEVICE_TABLE(iosf_mbi_pci_ids) = { 205static const struct pci_device_id iosf_mbi_pci_ids[] = {
206 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BAYTRAIL) }, 206 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_BAYTRAIL) },
207 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_QUARK_X1000) }, 207 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_QUARK_X1000) },
208 { 0, }, 208 { 0, },
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 7f50156542fb..1e6cff5814fa 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -78,7 +78,7 @@ void __init init_ISA_irqs(void)
78#endif 78#endif
79 legacy_pic->init(0); 79 legacy_pic->init(0);
80 80
81 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) 81 for (i = 0; i < nr_legacy_irqs(); i++)
82 irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); 82 irq_set_chip_and_handler_name(i, chip, handle_level_irq, name);
83} 83}
84 84
@@ -87,12 +87,6 @@ void __init init_IRQ(void)
87 int i; 87 int i;
88 88
89 /* 89 /*
90 * We probably need a better place for this, but it works for
91 * now ...
92 */
93 x86_add_irq_domains();
94
95 /*
96 * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. 90 * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
97 * If these IRQ's are handled by legacy interrupt-controllers like PIC, 91 * If these IRQ's are handled by legacy interrupt-controllers like PIC,
98 * then this configuration will likely be static after the boot. If 92 * then this configuration will likely be static after the boot. If
@@ -100,7 +94,7 @@ void __init init_IRQ(void)
100 * then this vector space can be freed and re-used dynamically as the 94 * then this vector space can be freed and re-used dynamically as the
101 * irq's migrate etc. 95 * irq's migrate etc.
102 */ 96 */
103 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) 97 for (i = 0; i < nr_legacy_irqs(); i++)
104 per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; 98 per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
105 99
106 x86_init.irqs.intr_init(); 100 x86_init.irqs.intr_init();
@@ -121,7 +115,7 @@ void setup_vector_irq(int cpu)
121 * legacy PIC, for the new cpu that is coming online, setup the static 115 * legacy PIC, for the new cpu that is coming online, setup the static
122 * legacy vector to irq mapping: 116 * legacy vector to irq mapping:
123 */ 117 */
124 for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++) 118 for (irq = 0; irq < nr_legacy_irqs(); irq++)
125 per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; 119 per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
126#endif 120#endif
127 121
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
new file mode 100644
index 000000000000..9642b9b33655
--- /dev/null
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -0,0 +1,553 @@
1/*
2 * Kexec bzImage loader
3 *
4 * Copyright (C) 2014 Red Hat Inc.
5 * Authors:
6 * Vivek Goyal <vgoyal@redhat.com>
7 *
8 * This source code is licensed under the GNU General Public License,
9 * Version 2. See the file COPYING for more details.
10 */
11
12#define pr_fmt(fmt) "kexec-bzImage64: " fmt
13
14#include <linux/string.h>
15#include <linux/printk.h>
16#include <linux/errno.h>
17#include <linux/slab.h>
18#include <linux/kexec.h>
19#include <linux/kernel.h>
20#include <linux/mm.h>
21#include <linux/efi.h>
22#include <linux/verify_pefile.h>
23#include <keys/system_keyring.h>
24
25#include <asm/bootparam.h>
26#include <asm/setup.h>
27#include <asm/crash.h>
28#include <asm/efi.h>
29
30#define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */
31
32/*
33 * Defines lowest physical address for various segments. Not sure where
34 * exactly these limits came from. Current bzimage64 loader in kexec-tools
35 * uses these so I am retaining it. It can be changed over time as we gain
36 * more insight.
37 */
38#define MIN_PURGATORY_ADDR 0x3000
39#define MIN_BOOTPARAM_ADDR 0x3000
40#define MIN_KERNEL_LOAD_ADDR 0x100000
41#define MIN_INITRD_LOAD_ADDR 0x1000000
42
43/*
44 * This is a place holder for all boot loader specific data structure which
45 * gets allocated in one call but gets freed much later during cleanup
46 * time. Right now there is only one field but it can grow as need be.
47 */
48struct bzimage64_data {
49 /*
50 * Temporary buffer to hold bootparams buffer. This should be
51 * freed once the bootparam segment has been loaded.
52 */
53 void *bootparams_buf;
54};
55
56static int setup_initrd(struct boot_params *params,
57 unsigned long initrd_load_addr, unsigned long initrd_len)
58{
59 params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL;
60 params->hdr.ramdisk_size = initrd_len & 0xffffffffUL;
61
62 params->ext_ramdisk_image = initrd_load_addr >> 32;
63 params->ext_ramdisk_size = initrd_len >> 32;
64
65 return 0;
66}
67
68static int setup_cmdline(struct kimage *image, struct boot_params *params,
69 unsigned long bootparams_load_addr,
70 unsigned long cmdline_offset, char *cmdline,
71 unsigned long cmdline_len)
72{
73 char *cmdline_ptr = ((char *)params) + cmdline_offset;
74 unsigned long cmdline_ptr_phys, len;
75 uint32_t cmdline_low_32, cmdline_ext_32;
76
77 memcpy(cmdline_ptr, cmdline, cmdline_len);
78 if (image->type == KEXEC_TYPE_CRASH) {
79 len = sprintf(cmdline_ptr + cmdline_len - 1,
80 " elfcorehdr=0x%lx", image->arch.elf_load_addr);
81 cmdline_len += len;
82 }
83 cmdline_ptr[cmdline_len - 1] = '\0';
84
85 pr_debug("Final command line is: %s\n", cmdline_ptr);
86 cmdline_ptr_phys = bootparams_load_addr + cmdline_offset;
87 cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL;
88 cmdline_ext_32 = cmdline_ptr_phys >> 32;
89
90 params->hdr.cmd_line_ptr = cmdline_low_32;
91 if (cmdline_ext_32)
92 params->ext_cmd_line_ptr = cmdline_ext_32;
93
94 return 0;
95}
96
97static int setup_e820_entries(struct boot_params *params)
98{
99 unsigned int nr_e820_entries;
100
101 nr_e820_entries = e820_saved.nr_map;
102
103 /* TODO: Pass entries more than E820MAX in bootparams setup data */
104 if (nr_e820_entries > E820MAX)
105 nr_e820_entries = E820MAX;
106
107 params->e820_entries = nr_e820_entries;
108 memcpy(&params->e820_map, &e820_saved.map,
109 nr_e820_entries * sizeof(struct e820entry));
110
111 return 0;
112}
113
114#ifdef CONFIG_EFI
115static int setup_efi_info_memmap(struct boot_params *params,
116 unsigned long params_load_addr,
117 unsigned int efi_map_offset,
118 unsigned int efi_map_sz)
119{
120 void *efi_map = (void *)params + efi_map_offset;
121 unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset;
122 struct efi_info *ei = &params->efi_info;
123
124 if (!efi_map_sz)
125 return 0;
126
127 efi_runtime_map_copy(efi_map, efi_map_sz);
128
129 ei->efi_memmap = efi_map_phys_addr & 0xffffffff;
130 ei->efi_memmap_hi = efi_map_phys_addr >> 32;
131 ei->efi_memmap_size = efi_map_sz;
132
133 return 0;
134}
135
136static int
137prepare_add_efi_setup_data(struct boot_params *params,
138 unsigned long params_load_addr,
139 unsigned int efi_setup_data_offset)
140{
141 unsigned long setup_data_phys;
142 struct setup_data *sd = (void *)params + efi_setup_data_offset;
143 struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data);
144
145 esd->fw_vendor = efi.fw_vendor;
146 esd->runtime = efi.runtime;
147 esd->tables = efi.config_table;
148 esd->smbios = efi.smbios;
149
150 sd->type = SETUP_EFI;
151 sd->len = sizeof(struct efi_setup_data);
152
153 /* Add setup data */
154 setup_data_phys = params_load_addr + efi_setup_data_offset;
155 sd->next = params->hdr.setup_data;
156 params->hdr.setup_data = setup_data_phys;
157
158 return 0;
159}
160
161static int
162setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
163 unsigned int efi_map_offset, unsigned int efi_map_sz,
164 unsigned int efi_setup_data_offset)
165{
166 struct efi_info *current_ei = &boot_params.efi_info;
167 struct efi_info *ei = &params->efi_info;
168
169 if (!current_ei->efi_memmap_size)
170 return 0;
171
172 /*
173 * If 1:1 mapping is not enabled, second kernel can not setup EFI
174 * and use EFI run time services. User space will have to pass
175 * acpi_rsdp=<addr> on kernel command line to make second kernel boot
176 * without efi.
177 */
178 if (efi_enabled(EFI_OLD_MEMMAP))
179 return 0;
180
181 ei->efi_loader_signature = current_ei->efi_loader_signature;
182 ei->efi_systab = current_ei->efi_systab;
183 ei->efi_systab_hi = current_ei->efi_systab_hi;
184
185 ei->efi_memdesc_version = current_ei->efi_memdesc_version;
186 ei->efi_memdesc_size = efi_get_runtime_map_desc_size();
187
188 setup_efi_info_memmap(params, params_load_addr, efi_map_offset,
189 efi_map_sz);
190 prepare_add_efi_setup_data(params, params_load_addr,
191 efi_setup_data_offset);
192 return 0;
193}
194#endif /* CONFIG_EFI */
195
196static int
197setup_boot_parameters(struct kimage *image, struct boot_params *params,
198 unsigned long params_load_addr,
199 unsigned int efi_map_offset, unsigned int efi_map_sz,
200 unsigned int efi_setup_data_offset)
201{
202 unsigned int nr_e820_entries;
203 unsigned long long mem_k, start, end;
204 int i, ret = 0;
205
206 /* Get subarch from existing bootparams */
207 params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
208
209 /* Copying screen_info will do? */
210 memcpy(&params->screen_info, &boot_params.screen_info,
211 sizeof(struct screen_info));
212
213 /* Fill in memsize later */
214 params->screen_info.ext_mem_k = 0;
215 params->alt_mem_k = 0;
216
217 /* Default APM info */
218 memset(&params->apm_bios_info, 0, sizeof(params->apm_bios_info));
219
220 /* Default drive info */
221 memset(&params->hd0_info, 0, sizeof(params->hd0_info));
222 memset(&params->hd1_info, 0, sizeof(params->hd1_info));
223
224 /* Default sysdesc table */
225 params->sys_desc_table.length = 0;
226
227 if (image->type == KEXEC_TYPE_CRASH) {
228 ret = crash_setup_memmap_entries(image, params);
229 if (ret)
230 return ret;
231 } else
232 setup_e820_entries(params);
233
234 nr_e820_entries = params->e820_entries;
235
236 for (i = 0; i < nr_e820_entries; i++) {
237 if (params->e820_map[i].type != E820_RAM)
238 continue;
239 start = params->e820_map[i].addr;
240 end = params->e820_map[i].addr + params->e820_map[i].size - 1;
241
242 if ((start <= 0x100000) && end > 0x100000) {
243 mem_k = (end >> 10) - (0x100000 >> 10);
244 params->screen_info.ext_mem_k = mem_k;
245 params->alt_mem_k = mem_k;
246 if (mem_k > 0xfc00)
247 params->screen_info.ext_mem_k = 0xfc00; /* 64M*/
248 if (mem_k > 0xffffffff)
249 params->alt_mem_k = 0xffffffff;
250 }
251 }
252
253#ifdef CONFIG_EFI
254 /* Setup EFI state */
255 setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
256 efi_setup_data_offset);
257#endif
258
259 /* Setup EDD info */
260 memcpy(params->eddbuf, boot_params.eddbuf,
261 EDDMAXNR * sizeof(struct edd_info));
262 params->eddbuf_entries = boot_params.eddbuf_entries;
263
264 memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer,
265 EDD_MBR_SIG_MAX * sizeof(unsigned int));
266
267 return ret;
268}
269
270int bzImage64_probe(const char *buf, unsigned long len)
271{
272 int ret = -ENOEXEC;
273 struct setup_header *header;
274
275 /* kernel should be atleast two sectors long */
276 if (len < 2 * 512) {
277 pr_err("File is too short to be a bzImage\n");
278 return ret;
279 }
280
281 header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr));
282 if (memcmp((char *)&header->header, "HdrS", 4) != 0) {
283 pr_err("Not a bzImage\n");
284 return ret;
285 }
286
287 if (header->boot_flag != 0xAA55) {
288 pr_err("No x86 boot sector present\n");
289 return ret;
290 }
291
292 if (header->version < 0x020C) {
293 pr_err("Must be at least protocol version 2.12\n");
294 return ret;
295 }
296
297 if (!(header->loadflags & LOADED_HIGH)) {
298 pr_err("zImage not a bzImage\n");
299 return ret;
300 }
301
302 if (!(header->xloadflags & XLF_KERNEL_64)) {
303 pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n");
304 return ret;
305 }
306
307 if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) {
308 pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n");
309 return ret;
310 }
311
312 /*
313 * Can't handle 32bit EFI as it does not allow loading kernel
314 * above 4G. This should be handled by 32bit bzImage loader
315 */
316 if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) {
317 pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n");
318 return ret;
319 }
320
321 /* I've got a bzImage */
322 pr_debug("It's a relocatable bzImage64\n");
323 ret = 0;
324
325 return ret;
326}
327
328void *bzImage64_load(struct kimage *image, char *kernel,
329 unsigned long kernel_len, char *initrd,
330 unsigned long initrd_len, char *cmdline,
331 unsigned long cmdline_len)
332{
333
334 struct setup_header *header;
335 int setup_sects, kern16_size, ret = 0;
336 unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
337 struct boot_params *params;
338 unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
339 unsigned long purgatory_load_addr;
340 unsigned long kernel_bufsz, kernel_memsz, kernel_align;
341 char *kernel_buf;
342 struct bzimage64_data *ldata;
343 struct kexec_entry64_regs regs64;
344 void *stack;
345 unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
346 unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
347
348 header = (struct setup_header *)(kernel + setup_hdr_offset);
349 setup_sects = header->setup_sects;
350 if (setup_sects == 0)
351 setup_sects = 4;
352
353 kern16_size = (setup_sects + 1) * 512;
354 if (kernel_len < kern16_size) {
355 pr_err("bzImage truncated\n");
356 return ERR_PTR(-ENOEXEC);
357 }
358
359 if (cmdline_len > header->cmdline_size) {
360 pr_err("Kernel command line too long\n");
361 return ERR_PTR(-EINVAL);
362 }
363
364 /*
365 * In case of crash dump, we will append elfcorehdr=<addr> to
366 * command line. Make sure it does not overflow
367 */
368 if (cmdline_len + MAX_ELFCOREHDR_STR_LEN > header->cmdline_size) {
369 pr_debug("Appending elfcorehdr=<addr> to command line exceeds maximum allowed length\n");
370 return ERR_PTR(-EINVAL);
371 }
372
373 /* Allocate and load backup region */
374 if (image->type == KEXEC_TYPE_CRASH) {
375 ret = crash_load_segments(image);
376 if (ret)
377 return ERR_PTR(ret);
378 }
379
380 /*
381 * Load purgatory. For 64bit entry point, purgatory code can be
382 * anywhere.
383 */
384 ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1,
385 &purgatory_load_addr);
386 if (ret) {
387 pr_err("Loading purgatory failed\n");
388 return ERR_PTR(ret);
389 }
390
391 pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
392
393
394 /*
395 * Load Bootparams and cmdline and space for efi stuff.
396 *
397 * Allocate memory together for multiple data structures so
398 * that they all can go in single area/segment and we don't
399 * have to create separate segment for each. Keeps things
400 * little bit simple
401 */
402 efi_map_sz = efi_get_runtime_map_size();
403 efi_map_sz = ALIGN(efi_map_sz, 16);
404 params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
405 MAX_ELFCOREHDR_STR_LEN;
406 params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
407 params_misc_sz = params_cmdline_sz + efi_map_sz +
408 sizeof(struct setup_data) +
409 sizeof(struct efi_setup_data);
410
411 params = kzalloc(params_misc_sz, GFP_KERNEL);
412 if (!params)
413 return ERR_PTR(-ENOMEM);
414 efi_map_offset = params_cmdline_sz;
415 efi_setup_data_offset = efi_map_offset + efi_map_sz;
416
417 /* Copy setup header onto bootparams. Documentation/x86/boot.txt */
418 setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
419
420 /* Is there a limit on setup header size? */
421 memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
422
423 ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
424 params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
425 ULONG_MAX, 1, &bootparam_load_addr);
426 if (ret)
427 goto out_free_params;
428 pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
429 bootparam_load_addr, params_misc_sz, params_misc_sz);
430
431 /* Load kernel */
432 kernel_buf = kernel + kern16_size;
433 kernel_bufsz = kernel_len - kern16_size;
434 kernel_memsz = PAGE_ALIGN(header->init_size);
435 kernel_align = header->kernel_alignment;
436
437 ret = kexec_add_buffer(image, kernel_buf,
438 kernel_bufsz, kernel_memsz, kernel_align,
439 MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
440 &kernel_load_addr);
441 if (ret)
442 goto out_free_params;
443
444 pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
445 kernel_load_addr, kernel_memsz, kernel_memsz);
446
447 /* Load initrd high */
448 if (initrd) {
449 ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
450 PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
451 ULONG_MAX, 1, &initrd_load_addr);
452 if (ret)
453 goto out_free_params;
454
455 pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
456 initrd_load_addr, initrd_len, initrd_len);
457
458 setup_initrd(params, initrd_load_addr, initrd_len);
459 }
460
461 setup_cmdline(image, params, bootparam_load_addr,
462 sizeof(struct boot_params), cmdline, cmdline_len);
463
464 /* bootloader info. Do we need a separate ID for kexec kernel loader? */
465 params->hdr.type_of_loader = 0x0D << 4;
466 params->hdr.loadflags = 0;
467
468 /* Setup purgatory regs for entry */
469 ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
470 sizeof(regs64), 1);
471 if (ret)
472 goto out_free_params;
473
474 regs64.rbx = 0; /* Bootstrap Processor */
475 regs64.rsi = bootparam_load_addr;
476 regs64.rip = kernel_load_addr + 0x200;
477 stack = kexec_purgatory_get_symbol_addr(image, "stack_end");
478 if (IS_ERR(stack)) {
479 pr_err("Could not find address of symbol stack_end\n");
480 ret = -EINVAL;
481 goto out_free_params;
482 }
483
484 regs64.rsp = (unsigned long)stack;
485 ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
486 sizeof(regs64), 0);
487 if (ret)
488 goto out_free_params;
489
490 ret = setup_boot_parameters(image, params, bootparam_load_addr,
491 efi_map_offset, efi_map_sz,
492 efi_setup_data_offset);
493 if (ret)
494 goto out_free_params;
495
496 /* Allocate loader specific data */
497 ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL);
498 if (!ldata) {
499 ret = -ENOMEM;
500 goto out_free_params;
501 }
502
503 /*
504 * Store pointer to params so that it could be freed after loading
505 * params segment has been loaded and contents have been copied
506 * somewhere else.
507 */
508 ldata->bootparams_buf = params;
509 return ldata;
510
511out_free_params:
512 kfree(params);
513 return ERR_PTR(ret);
514}
515
516/* This cleanup function is called after various segments have been loaded */
517int bzImage64_cleanup(void *loader_data)
518{
519 struct bzimage64_data *ldata = loader_data;
520
521 if (!ldata)
522 return 0;
523
524 kfree(ldata->bootparams_buf);
525 ldata->bootparams_buf = NULL;
526
527 return 0;
528}
529
530#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
531int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
532{
533 bool trusted;
534 int ret;
535
536 ret = verify_pefile_signature(kernel, kernel_len,
537 system_trusted_keyring, &trusted);
538 if (ret < 0)
539 return ret;
540 if (!trusted)
541 return -EKEYREJECTED;
542 return 0;
543}
544#endif
545
546struct kexec_file_ops kexec_bzImage64_ops = {
547 .probe = bzImage64_probe,
548 .load = bzImage64_load,
549 .cleanup = bzImage64_cleanup,
550#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
551 .verify_sig = bzImage64_verify_sig,
552#endif
553};
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 679cef0791cd..8b04018e5d1f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -6,6 +6,8 @@
6 * Version 2. See the file COPYING for more details. 6 * Version 2. See the file COPYING for more details.
7 */ 7 */
8 8
9#define pr_fmt(fmt) "kexec: " fmt
10
9#include <linux/mm.h> 11#include <linux/mm.h>
10#include <linux/kexec.h> 12#include <linux/kexec.h>
11#include <linux/string.h> 13#include <linux/string.h>
@@ -21,6 +23,11 @@
21#include <asm/tlbflush.h> 23#include <asm/tlbflush.h>
22#include <asm/mmu_context.h> 24#include <asm/mmu_context.h>
23#include <asm/debugreg.h> 25#include <asm/debugreg.h>
26#include <asm/kexec-bzimage64.h>
27
28static struct kexec_file_ops *kexec_file_loaders[] = {
29 &kexec_bzImage64_ops,
30};
24 31
25static void free_transition_pgtable(struct kimage *image) 32static void free_transition_pgtable(struct kimage *image)
26{ 33{
@@ -171,6 +178,38 @@ static void load_segments(void)
171 ); 178 );
172} 179}
173 180
181/* Update purgatory as needed after various image segments have been prepared */
182static int arch_update_purgatory(struct kimage *image)
183{
184 int ret = 0;
185
186 if (!image->file_mode)
187 return 0;
188
189 /* Setup copying of backup region */
190 if (image->type == KEXEC_TYPE_CRASH) {
191 ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
192 &image->arch.backup_load_addr,
193 sizeof(image->arch.backup_load_addr), 0);
194 if (ret)
195 return ret;
196
197 ret = kexec_purgatory_get_set_symbol(image, "backup_src",
198 &image->arch.backup_src_start,
199 sizeof(image->arch.backup_src_start), 0);
200 if (ret)
201 return ret;
202
203 ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
204 &image->arch.backup_src_sz,
205 sizeof(image->arch.backup_src_sz), 0);
206 if (ret)
207 return ret;
208 }
209
210 return ret;
211}
212
174int machine_kexec_prepare(struct kimage *image) 213int machine_kexec_prepare(struct kimage *image)
175{ 214{
176 unsigned long start_pgtable; 215 unsigned long start_pgtable;
@@ -184,6 +223,11 @@ int machine_kexec_prepare(struct kimage *image)
184 if (result) 223 if (result)
185 return result; 224 return result;
186 225
226 /* update purgatory as needed */
227 result = arch_update_purgatory(image);
228 if (result)
229 return result;
230
187 return 0; 231 return 0;
188} 232}
189 233
@@ -283,3 +327,198 @@ void arch_crash_save_vmcoreinfo(void)
283 (unsigned long)&_text - __START_KERNEL); 327 (unsigned long)&_text - __START_KERNEL);
284} 328}
285 329
330/* arch-dependent functionality related to kexec file-based syscall */
331
332int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
333 unsigned long buf_len)
334{
335 int i, ret = -ENOEXEC;
336 struct kexec_file_ops *fops;
337
338 for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
339 fops = kexec_file_loaders[i];
340 if (!fops || !fops->probe)
341 continue;
342
343 ret = fops->probe(buf, buf_len);
344 if (!ret) {
345 image->fops = fops;
346 return ret;
347 }
348 }
349
350 return ret;
351}
352
353void *arch_kexec_kernel_image_load(struct kimage *image)
354{
355 vfree(image->arch.elf_headers);
356 image->arch.elf_headers = NULL;
357
358 if (!image->fops || !image->fops->load)
359 return ERR_PTR(-ENOEXEC);
360
361 return image->fops->load(image, image->kernel_buf,
362 image->kernel_buf_len, image->initrd_buf,
363 image->initrd_buf_len, image->cmdline_buf,
364 image->cmdline_buf_len);
365}
366
367int arch_kimage_file_post_load_cleanup(struct kimage *image)
368{
369 if (!image->fops || !image->fops->cleanup)
370 return 0;
371
372 return image->fops->cleanup(image->image_loader_data);
373}
374
375int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
376 unsigned long kernel_len)
377{
378 if (!image->fops || !image->fops->verify_sig) {
379 pr_debug("kernel loader does not support signature verification.");
380 return -EKEYREJECTED;
381 }
382
383 return image->fops->verify_sig(kernel, kernel_len);
384}
385
386/*
387 * Apply purgatory relocations.
388 *
389 * ehdr: Pointer to elf headers
390 * sechdrs: Pointer to section headers.
391 * relsec: section index of SHT_RELA section.
392 *
393 * TODO: Some of the code belongs to generic code. Move that in kexec.c.
394 */
395int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
396 Elf64_Shdr *sechdrs, unsigned int relsec)
397{
398 unsigned int i;
399 Elf64_Rela *rel;
400 Elf64_Sym *sym;
401 void *location;
402 Elf64_Shdr *section, *symtabsec;
403 unsigned long address, sec_base, value;
404 const char *strtab, *name, *shstrtab;
405
406 /*
407 * ->sh_offset has been modified to keep the pointer to section
408 * contents in memory
409 */
410 rel = (void *)sechdrs[relsec].sh_offset;
411
412 /* Section to which relocations apply */
413 section = &sechdrs[sechdrs[relsec].sh_info];
414
415 pr_debug("Applying relocate section %u to %u\n", relsec,
416 sechdrs[relsec].sh_info);
417
418 /* Associated symbol table */
419 symtabsec = &sechdrs[sechdrs[relsec].sh_link];
420
421 /* String table */
422 if (symtabsec->sh_link >= ehdr->e_shnum) {
423 /* Invalid strtab section number */
424 pr_err("Invalid string table section index %d\n",
425 symtabsec->sh_link);
426 return -ENOEXEC;
427 }
428
429 strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset;
430
431 /* section header string table */
432 shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset;
433
434 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
435
436 /*
437 * rel[i].r_offset contains byte offset from beginning
438 * of section to the storage unit affected.
439 *
440 * This is location to update (->sh_offset). This is temporary
441 * buffer where section is currently loaded. This will finally
442 * be loaded to a different address later, pointed to by
443 * ->sh_addr. kexec takes care of moving it
444 * (kexec_load_segment()).
445 */
446 location = (void *)(section->sh_offset + rel[i].r_offset);
447
448 /* Final address of the location */
449 address = section->sh_addr + rel[i].r_offset;
450
451 /*
452 * rel[i].r_info contains information about symbol table index
453 * w.r.t which relocation must be made and type of relocation
454 * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get
455 * these respectively.
456 */
457 sym = (Elf64_Sym *)symtabsec->sh_offset +
458 ELF64_R_SYM(rel[i].r_info);
459
460 if (sym->st_name)
461 name = strtab + sym->st_name;
462 else
463 name = shstrtab + sechdrs[sym->st_shndx].sh_name;
464
465 pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n",
466 name, sym->st_info, sym->st_shndx, sym->st_value,
467 sym->st_size);
468
469 if (sym->st_shndx == SHN_UNDEF) {
470 pr_err("Undefined symbol: %s\n", name);
471 return -ENOEXEC;
472 }
473
474 if (sym->st_shndx == SHN_COMMON) {
475 pr_err("symbol '%s' in common section\n", name);
476 return -ENOEXEC;
477 }
478
479 if (sym->st_shndx == SHN_ABS)
480 sec_base = 0;
481 else if (sym->st_shndx >= ehdr->e_shnum) {
482 pr_err("Invalid section %d for symbol %s\n",
483 sym->st_shndx, name);
484 return -ENOEXEC;
485 } else
486 sec_base = sechdrs[sym->st_shndx].sh_addr;
487
488 value = sym->st_value;
489 value += sec_base;
490 value += rel[i].r_addend;
491
492 switch (ELF64_R_TYPE(rel[i].r_info)) {
493 case R_X86_64_NONE:
494 break;
495 case R_X86_64_64:
496 *(u64 *)location = value;
497 break;
498 case R_X86_64_32:
499 *(u32 *)location = value;
500 if (value != *(u32 *)location)
501 goto overflow;
502 break;
503 case R_X86_64_32S:
504 *(s32 *)location = value;
505 if ((s64)value != *(s32 *)location)
506 goto overflow;
507 break;
508 case R_X86_64_PC32:
509 value -= (u64)address;
510 *(u32 *)location = value;
511 break;
512 default:
513 pr_err("Unknown rela relocation: %llu\n",
514 ELF64_R_TYPE(rel[i].r_info));
515 return -ENOEXEC;
516 }
517 }
518 return 0;
519
520overflow:
521 pr_err("Overflow in relocation type %d value 0x%lx\n",
522 (int)ELF64_R_TYPE(rel[i].r_info), value);
523 return -ENOEXEC;
524}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index d2b56489d70f..2d2a237f2c73 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/smp.h> 20#include <linux/smp.h>
21#include <linux/pci.h> 21#include <linux/pci.h>
22#include <linux/irqdomain.h>
22 23
23#include <asm/mtrr.h> 24#include <asm/mtrr.h>
24#include <asm/mpspec.h> 25#include <asm/mpspec.h>
@@ -67,7 +68,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
67 boot_cpu_physical_apicid = m->apicid; 68 boot_cpu_physical_apicid = m->apicid;
68 } 69 }
69 70
70 printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu); 71 pr_info("Processor #%d%s\n", m->apicid, bootup_cpu);
71 generic_processor_info(apicid, m->apicver); 72 generic_processor_info(apicid, m->apicver);
72} 73}
73 74
@@ -87,9 +88,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
87 88
88#if MAX_MP_BUSSES < 256 89#if MAX_MP_BUSSES < 256
89 if (m->busid >= MAX_MP_BUSSES) { 90 if (m->busid >= MAX_MP_BUSSES) {
90 printk(KERN_WARNING "MP table busid value (%d) for bustype %s " 91 pr_warn("MP table busid value (%d) for bustype %s is too large, max. supported is %d\n",
91 " is too large, max. supported is %d\n", 92 m->busid, str, MAX_MP_BUSSES - 1);
92 m->busid, str, MAX_MP_BUSSES - 1);
93 return; 93 return;
94 } 94 }
95#endif 95#endif
@@ -110,19 +110,29 @@ static void __init MP_bus_info(struct mpc_bus *m)
110 mp_bus_id_to_type[m->busid] = MP_BUS_EISA; 110 mp_bus_id_to_type[m->busid] = MP_BUS_EISA;
111#endif 111#endif
112 } else 112 } else
113 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); 113 pr_warn("Unknown bustype %s - ignoring\n", str);
114} 114}
115 115
116static struct irq_domain_ops mp_ioapic_irqdomain_ops = {
117 .map = mp_irqdomain_map,
118 .unmap = mp_irqdomain_unmap,
119};
120
116static void __init MP_ioapic_info(struct mpc_ioapic *m) 121static void __init MP_ioapic_info(struct mpc_ioapic *m)
117{ 122{
123 struct ioapic_domain_cfg cfg = {
124 .type = IOAPIC_DOMAIN_LEGACY,
125 .ops = &mp_ioapic_irqdomain_ops,
126 };
127
118 if (m->flags & MPC_APIC_USABLE) 128 if (m->flags & MPC_APIC_USABLE)
119 mp_register_ioapic(m->apicid, m->apicaddr, gsi_top); 129 mp_register_ioapic(m->apicid, m->apicaddr, gsi_top, &cfg);
120} 130}
121 131
122static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) 132static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
123{ 133{
124 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," 134 apic_printk(APIC_VERBOSE,
125 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 135 "Int: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC INT %02x\n",
126 mp_irq->irqtype, mp_irq->irqflag & 3, 136 mp_irq->irqtype, mp_irq->irqflag & 3,
127 (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, 137 (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
128 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); 138 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
@@ -135,8 +145,8 @@ static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
135 145
136static void __init MP_lintsrc_info(struct mpc_lintsrc *m) 146static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
137{ 147{
138 apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," 148 apic_printk(APIC_VERBOSE,
139 " IRQ %02x, APIC ID %x, APIC LINT %02x\n", 149 "Lint: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC LINT %02x\n",
140 m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid, 150 m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid,
141 m->srcbusirq, m->destapic, m->destapiclint); 151 m->srcbusirq, m->destapic, m->destapiclint);
142} 152}
@@ -148,34 +158,33 @@ static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
148{ 158{
149 159
150 if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) { 160 if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) {
151 printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", 161 pr_err("MPTABLE: bad signature [%c%c%c%c]!\n",
152 mpc->signature[0], mpc->signature[1], 162 mpc->signature[0], mpc->signature[1],
153 mpc->signature[2], mpc->signature[3]); 163 mpc->signature[2], mpc->signature[3]);
154 return 0; 164 return 0;
155 } 165 }
156 if (mpf_checksum((unsigned char *)mpc, mpc->length)) { 166 if (mpf_checksum((unsigned char *)mpc, mpc->length)) {
157 printk(KERN_ERR "MPTABLE: checksum error!\n"); 167 pr_err("MPTABLE: checksum error!\n");
158 return 0; 168 return 0;
159 } 169 }
160 if (mpc->spec != 0x01 && mpc->spec != 0x04) { 170 if (mpc->spec != 0x01 && mpc->spec != 0x04) {
161 printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", 171 pr_err("MPTABLE: bad table version (%d)!!\n", mpc->spec);
162 mpc->spec);
163 return 0; 172 return 0;
164 } 173 }
165 if (!mpc->lapic) { 174 if (!mpc->lapic) {
166 printk(KERN_ERR "MPTABLE: null local APIC address!\n"); 175 pr_err("MPTABLE: null local APIC address!\n");
167 return 0; 176 return 0;
168 } 177 }
169 memcpy(oem, mpc->oem, 8); 178 memcpy(oem, mpc->oem, 8);
170 oem[8] = 0; 179 oem[8] = 0;
171 printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); 180 pr_info("MPTABLE: OEM ID: %s\n", oem);
172 181
173 memcpy(str, mpc->productid, 12); 182 memcpy(str, mpc->productid, 12);
174 str[12] = 0; 183 str[12] = 0;
175 184
176 printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); 185 pr_info("MPTABLE: Product ID: %s\n", str);
177 186
178 printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic); 187 pr_info("MPTABLE: APIC at: 0x%X\n", mpc->lapic);
179 188
180 return 1; 189 return 1;
181} 190}
@@ -188,8 +197,8 @@ static void skip_entry(unsigned char **ptr, int *count, int size)
188 197
189static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt) 198static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
190{ 199{
191 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n" 200 pr_err("Your mptable is wrong, contact your HW vendor!\n");
192 "type %x\n", *mpt); 201 pr_cont("type %x\n", *mpt);
193 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, 202 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
194 1, mpc, mpc->length, 1); 203 1, mpc, mpc->length, 1);
195} 204}
@@ -207,9 +216,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
207 if (!smp_check_mpc(mpc, oem, str)) 216 if (!smp_check_mpc(mpc, oem, str))
208 return 0; 217 return 0;
209 218
210#ifdef CONFIG_X86_32
211 generic_mps_oem_check(mpc, oem, str);
212#endif
213 /* Initialize the lapic mapping */ 219 /* Initialize the lapic mapping */
214 if (!acpi_lapic) 220 if (!acpi_lapic)
215 register_lapic_address(mpc->lapic); 221 register_lapic_address(mpc->lapic);
@@ -259,7 +265,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
259 } 265 }
260 266
261 if (!num_processors) 267 if (!num_processors)
262 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 268 pr_err("MPTABLE: no processors registered!\n");
263 return num_processors; 269 return num_processors;
264} 270}
265 271
@@ -295,16 +301,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
295 * If it does, we assume it's valid. 301 * If it does, we assume it's valid.
296 */ 302 */
297 if (mpc_default_type == 5) { 303 if (mpc_default_type == 5) {
298 printk(KERN_INFO "ISA/PCI bus type with no IRQ information... " 304 pr_info("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
299 "falling back to ELCR\n");
300 305
301 if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || 306 if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) ||
302 ELCR_trigger(13)) 307 ELCR_trigger(13))
303 printk(KERN_ERR "ELCR contains invalid data... " 308 pr_err("ELCR contains invalid data... not using ELCR\n");
304 "not using ELCR\n");
305 else { 309 else {
306 printk(KERN_INFO 310 pr_info("Using ELCR to identify PCI interrupts\n");
307 "Using ELCR to identify PCI interrupts\n");
308 ELCR_fallback = 1; 311 ELCR_fallback = 1;
309 } 312 }
310 } 313 }
@@ -353,7 +356,7 @@ static void __init construct_ioapic_table(int mpc_default_type)
353 bus.busid = 0; 356 bus.busid = 0;
354 switch (mpc_default_type) { 357 switch (mpc_default_type) {
355 default: 358 default:
356 printk(KERN_ERR "???\nUnknown standard configuration %d\n", 359 pr_err("???\nUnknown standard configuration %d\n",
357 mpc_default_type); 360 mpc_default_type);
358 /* fall through */ 361 /* fall through */
359 case 1: 362 case 1:
@@ -462,8 +465,8 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
462#ifdef CONFIG_X86_LOCAL_APIC 465#ifdef CONFIG_X86_LOCAL_APIC
463 smp_found_config = 0; 466 smp_found_config = 0;
464#endif 467#endif
465 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n" 468 pr_err("BIOS bug, MP table errors detected!...\n");
466 "... disabling SMP support. (tell your hw vendor)\n"); 469 pr_cont("... disabling SMP support. (tell your hw vendor)\n");
467 early_iounmap(mpc, size); 470 early_iounmap(mpc, size);
468 return -1; 471 return -1;
469 } 472 }
@@ -481,8 +484,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
481 if (!mp_irq_entries) { 484 if (!mp_irq_entries) {
482 struct mpc_bus bus; 485 struct mpc_bus bus;
483 486
484 printk(KERN_ERR "BIOS bug, no explicit IRQ entries, " 487 pr_err("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
485 "using default mptable. (tell your hw vendor)\n");
486 488
487 bus.type = MP_BUS; 489 bus.type = MP_BUS;
488 bus.busid = 0; 490 bus.busid = 0;
@@ -516,14 +518,14 @@ void __init default_get_smp_config(unsigned int early)
516 if (acpi_lapic && acpi_ioapic) 518 if (acpi_lapic && acpi_ioapic)
517 return; 519 return;
518 520
519 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 521 pr_info("Intel MultiProcessor Specification v1.%d\n",
520 mpf->specification); 522 mpf->specification);
521#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) 523#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
522 if (mpf->feature2 & (1 << 7)) { 524 if (mpf->feature2 & (1 << 7)) {
523 printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); 525 pr_info(" IMCR and PIC compatibility mode.\n");
524 pic_mode = 1; 526 pic_mode = 1;
525 } else { 527 } else {
526 printk(KERN_INFO " Virtual Wire compatibility mode.\n"); 528 pr_info(" Virtual Wire compatibility mode.\n");
527 pic_mode = 0; 529 pic_mode = 0;
528 } 530 }
529#endif 531#endif
@@ -539,8 +541,7 @@ void __init default_get_smp_config(unsigned int early)
539 return; 541 return;
540 } 542 }
541 543
542 printk(KERN_INFO "Default MP configuration #%d\n", 544 pr_info("Default MP configuration #%d\n", mpf->feature1);
543 mpf->feature1);
544 construct_default_ISA_mptable(mpf->feature1); 545 construct_default_ISA_mptable(mpf->feature1);
545 546
546 } else if (mpf->physptr) { 547 } else if (mpf->physptr) {
@@ -550,7 +551,7 @@ void __init default_get_smp_config(unsigned int early)
550 BUG(); 551 BUG();
551 552
552 if (!early) 553 if (!early)
553 printk(KERN_INFO "Processors: %d\n", num_processors); 554 pr_info("Processors: %d\n", num_processors);
554 /* 555 /*
555 * Only use the first configuration found. 556 * Only use the first configuration found.
556 */ 557 */
@@ -583,10 +584,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
583#endif 584#endif
584 mpf_found = mpf; 585 mpf_found = mpf;
585 586
586 printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n", 587 pr_info("found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
587 (unsigned long long) virt_to_phys(mpf), 588 (unsigned long long) virt_to_phys(mpf),
588 (unsigned long long) virt_to_phys(mpf) + 589 (unsigned long long) virt_to_phys(mpf) +
589 sizeof(*mpf) - 1, mpf); 590 sizeof(*mpf) - 1, mpf);
590 591
591 mem = virt_to_phys(mpf); 592 mem = virt_to_phys(mpf);
592 memblock_reserve(mem, sizeof(*mpf)); 593 memblock_reserve(mem, sizeof(*mpf));
@@ -735,7 +736,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
735 int nr_m_spare = 0; 736 int nr_m_spare = 0;
736 unsigned char *mpt = ((unsigned char *)mpc) + count; 737 unsigned char *mpt = ((unsigned char *)mpc) + count;
737 738
738 printk(KERN_INFO "mpc_length %x\n", mpc->length); 739 pr_info("mpc_length %x\n", mpc->length);
739 while (count < mpc->length) { 740 while (count < mpc->length) {
740 switch (*mpt) { 741 switch (*mpt) {
741 case MP_PROCESSOR: 742 case MP_PROCESSOR:
@@ -862,13 +863,13 @@ static int __init update_mp_table(void)
862 if (!smp_check_mpc(mpc, oem, str)) 863 if (!smp_check_mpc(mpc, oem, str))
863 return 0; 864 return 0;
864 865
865 printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf)); 866 pr_info("mpf: %llx\n", (u64)virt_to_phys(mpf));
866 printk(KERN_INFO "physptr: %x\n", mpf->physptr); 867 pr_info("physptr: %x\n", mpf->physptr);
867 868
868 if (mpc_new_phys && mpc->length > mpc_new_length) { 869 if (mpc_new_phys && mpc->length > mpc_new_length) {
869 mpc_new_phys = 0; 870 mpc_new_phys = 0;
870 printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", 871 pr_info("mpc_new_length is %ld, please use alloc_mptable=8k\n",
871 mpc_new_length); 872 mpc_new_length);
872 } 873 }
873 874
874 if (!mpc_new_phys) { 875 if (!mpc_new_phys) {
@@ -879,10 +880,10 @@ static int __init update_mp_table(void)
879 mpc->checksum = 0xff; 880 mpc->checksum = 0xff;
880 new = mpf_checksum((unsigned char *)mpc, mpc->length); 881 new = mpf_checksum((unsigned char *)mpc, mpc->length);
881 if (old == new) { 882 if (old == new) {
882 printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); 883 pr_info("mpc is readonly, please try alloc_mptable instead\n");
883 return 0; 884 return 0;
884 } 885 }
885 printk(KERN_INFO "use in-position replacing\n"); 886 pr_info("use in-position replacing\n");
886 } else { 887 } else {
887 mpf->physptr = mpc_new_phys; 888 mpf->physptr = mpc_new_phys;
888 mpc_new = phys_to_virt(mpc_new_phys); 889 mpc_new = phys_to_virt(mpc_new_phys);
@@ -892,7 +893,7 @@ static int __init update_mp_table(void)
892 if (mpc_new_phys - mpf->physptr) { 893 if (mpc_new_phys - mpf->physptr) {
893 struct mpf_intel *mpf_new; 894 struct mpf_intel *mpf_new;
894 /* steal 16 bytes from [0, 1k) */ 895 /* steal 16 bytes from [0, 1k) */
895 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); 896 pr_info("mpf new: %x\n", 0x400 - 16);
896 mpf_new = phys_to_virt(0x400 - 16); 897 mpf_new = phys_to_virt(0x400 - 16);
897 memcpy(mpf_new, mpf, 16); 898 memcpy(mpf_new, mpf, 16);
898 mpf = mpf_new; 899 mpf = mpf_new;
@@ -900,7 +901,7 @@ static int __init update_mp_table(void)
900 } 901 }
901 mpf->checksum = 0; 902 mpf->checksum = 0;
902 mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); 903 mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
903 printk(KERN_INFO "physptr new: %x\n", mpf->physptr); 904 pr_info("physptr new: %x\n", mpf->physptr);
904 } 905 }
905 906
906 /* 907 /*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4505e2a950d8..f804dc935d2a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -93,6 +93,7 @@ void arch_task_cache_init(void)
93 kmem_cache_create("task_xstate", xstate_size, 93 kmem_cache_create("task_xstate", xstate_size,
94 __alignof__(union thread_xstate), 94 __alignof__(union thread_xstate),
95 SLAB_PANIC | SLAB_NOTRACK, NULL); 95 SLAB_PANIC | SLAB_NOTRACK, NULL);
96 setup_xstate_comp();
96} 97}
97 98
98/* 99/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5492798930ef..2d872e08fab9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -168,10 +168,6 @@ static void smp_callin(void)
168 * CPU, first the APIC. (this is probably redundant on most 168 * CPU, first the APIC. (this is probably redundant on most
169 * boards) 169 * boards)
170 */ 170 */
171
172 pr_debug("CALLIN, before setup_local_APIC()\n");
173 if (apic->smp_callin_clear_local_apic)
174 apic->smp_callin_clear_local_apic();
175 setup_local_APIC(); 171 setup_local_APIC();
176 end_local_APIC_setup(); 172 end_local_APIC_setup();
177 173
@@ -1143,10 +1139,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1143 enable_IO_APIC(); 1139 enable_IO_APIC();
1144 1140
1145 bsp_end_local_APIC_setup(); 1141 bsp_end_local_APIC_setup();
1146
1147 if (apic->setup_portio_remap)
1148 apic->setup_portio_remap();
1149
1150 smpboot_setup_io_apic(); 1142 smpboot_setup_io_apic();
1151 /* 1143 /*
1152 * Set up local APIC timer on boot CPU. 1144 * Set up local APIC timer on boot CPU.
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 56b0c338061e..b6025f9e36c6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -950,7 +950,7 @@ core_initcall(cpufreq_tsc);
950static struct clocksource clocksource_tsc; 950static struct clocksource clocksource_tsc;
951 951
952/* 952/*
953 * We compare the TSC to the cycle_last value in the clocksource 953 * We used to compare the TSC to the cycle_last value in the clocksource
954 * structure to avoid a nasty time-warp. This can be observed in a 954 * structure to avoid a nasty time-warp. This can be observed in a
955 * very small window right after one CPU updated cycle_last under 955 * very small window right after one CPU updated cycle_last under
956 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which 956 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
@@ -960,26 +960,23 @@ static struct clocksource clocksource_tsc;
960 * due to the unsigned delta calculation of the time keeping core 960 * due to the unsigned delta calculation of the time keeping core
961 * code, which is necessary to support wrapping clocksources like pm 961 * code, which is necessary to support wrapping clocksources like pm
962 * timer. 962 * timer.
963 *
964 * This sanity check is now done in the core timekeeping code.
965 * checking the result of read_tsc() - cycle_last for being negative.
966 * That works because CLOCKSOURCE_MASK(64) does not mask out any bit.
963 */ 967 */
964static cycle_t read_tsc(struct clocksource *cs) 968static cycle_t read_tsc(struct clocksource *cs)
965{ 969{
966 cycle_t ret = (cycle_t)get_cycles(); 970 return (cycle_t)get_cycles();
967
968 return ret >= clocksource_tsc.cycle_last ?
969 ret : clocksource_tsc.cycle_last;
970}
971
972static void resume_tsc(struct clocksource *cs)
973{
974 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
975 clocksource_tsc.cycle_last = 0;
976} 971}
977 972
973/*
974 * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
975 */
978static struct clocksource clocksource_tsc = { 976static struct clocksource clocksource_tsc = {
979 .name = "tsc", 977 .name = "tsc",
980 .rating = 300, 978 .rating = 300,
981 .read = read_tsc, 979 .read = read_tsc,
982 .resume = resume_tsc,
983 .mask = CLOCKSOURCE_MASK(64), 980 .mask = CLOCKSOURCE_MASK(64),
984 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 981 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
985 CLOCK_SOURCE_MUST_VERIFY, 982 CLOCK_SOURCE_MUST_VERIFY,
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index b99b9ad8540c..ee22c1d93ae5 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -152,7 +152,7 @@ static void __init detect_vsmp_box(void)
152 is_vsmp = 1; 152 is_vsmp = 1;
153} 153}
154 154
155int is_vsmp_box(void) 155static int is_vsmp_box(void)
156{ 156{
157 if (is_vsmp != -1) 157 if (is_vsmp != -1)
158 return is_vsmp; 158 return is_vsmp;
@@ -166,7 +166,7 @@ int is_vsmp_box(void)
166static void __init detect_vsmp_box(void) 166static void __init detect_vsmp_box(void)
167{ 167{
168} 168}
169int is_vsmp_box(void) 169static int is_vsmp_box(void)
170{ 170{
171 return 0; 171 return 0;
172} 172}
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index 9531fbb123ba..c7d791f32b98 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,29 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
31 gtod_write_begin(vdata); 31 gtod_write_begin(vdata);
32 32
33 /* copy vsyscall data */ 33 /* copy vsyscall data */
34 vdata->vclock_mode = tk->clock->archdata.vclock_mode; 34 vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode;
35 vdata->cycle_last = tk->clock->cycle_last; 35 vdata->cycle_last = tk->tkr.cycle_last;
36 vdata->mask = tk->clock->mask; 36 vdata->mask = tk->tkr.mask;
37 vdata->mult = tk->mult; 37 vdata->mult = tk->tkr.mult;
38 vdata->shift = tk->shift; 38 vdata->shift = tk->tkr.shift;
39 39
40 vdata->wall_time_sec = tk->xtime_sec; 40 vdata->wall_time_sec = tk->xtime_sec;
41 vdata->wall_time_snsec = tk->xtime_nsec; 41 vdata->wall_time_snsec = tk->tkr.xtime_nsec;
42 42
43 vdata->monotonic_time_sec = tk->xtime_sec 43 vdata->monotonic_time_sec = tk->xtime_sec
44 + tk->wall_to_monotonic.tv_sec; 44 + tk->wall_to_monotonic.tv_sec;
45 vdata->monotonic_time_snsec = tk->xtime_nsec 45 vdata->monotonic_time_snsec = tk->tkr.xtime_nsec
46 + ((u64)tk->wall_to_monotonic.tv_nsec 46 + ((u64)tk->wall_to_monotonic.tv_nsec
47 << tk->shift); 47 << tk->tkr.shift);
48 while (vdata->monotonic_time_snsec >= 48 while (vdata->monotonic_time_snsec >=
49 (((u64)NSEC_PER_SEC) << tk->shift)) { 49 (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
50 vdata->monotonic_time_snsec -= 50 vdata->monotonic_time_snsec -=
51 ((u64)NSEC_PER_SEC) << tk->shift; 51 ((u64)NSEC_PER_SEC) << tk->tkr.shift;
52 vdata->monotonic_time_sec++; 52 vdata->monotonic_time_sec++;
53 } 53 }
54 54
55 vdata->wall_time_coarse_sec = tk->xtime_sec; 55 vdata->wall_time_coarse_sec = tk->xtime_sec;
56 vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift); 56 vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
57 tk->tkr.shift);
57 58
58 vdata->monotonic_time_coarse_sec = 59 vdata->monotonic_time_coarse_sec =
59 vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; 60 vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a4b451c6addf..940b142cc11f 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/bootmem.h> 9#include <linux/bootmem.h>
10#include <linux/compat.h> 10#include <linux/compat.h>
11#include <linux/cpu.h>
11#include <asm/i387.h> 12#include <asm/i387.h>
12#include <asm/fpu-internal.h> 13#include <asm/fpu-internal.h>
13#include <asm/sigframe.h> 14#include <asm/sigframe.h>
@@ -24,7 +25,9 @@ u64 pcntxt_mask;
24struct xsave_struct *init_xstate_buf; 25struct xsave_struct *init_xstate_buf;
25 26
26static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; 27static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
27static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; 28static unsigned int *xstate_offsets, *xstate_sizes;
29static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8];
30static unsigned int xstate_features;
28 31
29/* 32/*
30 * If a processor implementation discern that a processor state component is 33 * If a processor implementation discern that a processor state component is
@@ -283,7 +286,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
283 286
284 if (use_xsave()) { 287 if (use_xsave()) {
285 /* These bits must be zero. */ 288 /* These bits must be zero. */
286 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; 289 memset(xsave_hdr->reserved, 0, 48);
287 290
288 /* 291 /*
289 * Init the state that is not present in the memory 292 * Init the state that is not present in the memory
@@ -479,6 +482,52 @@ static void __init setup_xstate_features(void)
479} 482}
480 483
481/* 484/*
485 * This function sets up offsets and sizes of all extended states in
486 * xsave area. This supports both standard format and compacted format
487 * of the xsave aread.
488 *
489 * Input: void
490 * Output: void
491 */
492void setup_xstate_comp(void)
493{
494 unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8];
495 int i;
496
497 /*
498 * The FP xstates and SSE xstates are legacy states. They are always
499 * in the fixed offsets in the xsave area in either compacted form
500 * or standard form.
501 */
502 xstate_comp_offsets[0] = 0;
503 xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space);
504
505 if (!cpu_has_xsaves) {
506 for (i = 2; i < xstate_features; i++) {
507 if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
508 xstate_comp_offsets[i] = xstate_offsets[i];
509 xstate_comp_sizes[i] = xstate_sizes[i];
510 }
511 }
512 return;
513 }
514
515 xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
516
517 for (i = 2; i < xstate_features; i++) {
518 if (test_bit(i, (unsigned long *)&pcntxt_mask))
519 xstate_comp_sizes[i] = xstate_sizes[i];
520 else
521 xstate_comp_sizes[i] = 0;
522
523 if (i > 2)
524 xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
525 + xstate_comp_sizes[i-1];
526
527 }
528}
529
530/*
482 * setup the xstate image representing the init state 531 * setup the xstate image representing the init state
483 */ 532 */
484static void __init setup_init_fpu_buf(void) 533static void __init setup_init_fpu_buf(void)
@@ -496,15 +545,21 @@ static void __init setup_init_fpu_buf(void)
496 545
497 setup_xstate_features(); 546 setup_xstate_features();
498 547
548 if (cpu_has_xsaves) {
549 init_xstate_buf->xsave_hdr.xcomp_bv =
550 (u64)1 << 63 | pcntxt_mask;
551 init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask;
552 }
553
499 /* 554 /*
500 * Init all the features state with header_bv being 0x0 555 * Init all the features state with header_bv being 0x0
501 */ 556 */
502 xrstor_state(init_xstate_buf, -1); 557 xrstor_state_booting(init_xstate_buf, -1);
503 /* 558 /*
504 * Dump the init state again. This is to identify the init state 559 * Dump the init state again. This is to identify the init state
505 * of any feature which is not represented by all zero's. 560 * of any feature which is not represented by all zero's.
506 */ 561 */
507 xsave_state(init_xstate_buf, -1); 562 xsave_state_booting(init_xstate_buf, -1);
508} 563}
509 564
510static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; 565static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
@@ -520,6 +575,30 @@ static int __init eager_fpu_setup(char *s)
520} 575}
521__setup("eagerfpu=", eager_fpu_setup); 576__setup("eagerfpu=", eager_fpu_setup);
522 577
578
579/*
580 * Calculate total size of enabled xstates in XCR0/pcntxt_mask.
581 */
582static void __init init_xstate_size(void)
583{
584 unsigned int eax, ebx, ecx, edx;
585 int i;
586
587 if (!cpu_has_xsaves) {
588 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
589 xstate_size = ebx;
590 return;
591 }
592
593 xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
594 for (i = 2; i < 64; i++) {
595 if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
596 cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
597 xstate_size += eax;
598 }
599 }
600}
601
523/* 602/*
524 * Enable and initialize the xsave feature. 603 * Enable and initialize the xsave feature.
525 */ 604 */
@@ -551,8 +630,7 @@ static void __init xstate_enable_boot_cpu(void)
551 /* 630 /*
552 * Recompute the context size for enabled features 631 * Recompute the context size for enabled features
553 */ 632 */
554 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); 633 init_xstate_size();
555 xstate_size = ebx;
556 634
557 update_regset_xstate_info(xstate_size, pcntxt_mask); 635 update_regset_xstate_info(xstate_size, pcntxt_mask);
558 prepare_fx_sw_frame(); 636 prepare_fx_sw_frame();
@@ -572,8 +650,9 @@ static void __init xstate_enable_boot_cpu(void)
572 } 650 }
573 } 651 }
574 652
575 pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", 653 pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n",
576 pcntxt_mask, xstate_size); 654 pcntxt_mask, xstate_size,
655 cpu_has_xsaves ? "compacted form" : "standard form");
577} 656}
578 657
579/* 658/*
@@ -635,3 +714,26 @@ void eager_fpu_init(void)
635 else 714 else
636 fxrstor_checking(&init_xstate_buf->i387); 715 fxrstor_checking(&init_xstate_buf->i387);
637} 716}
717
718/*
719 * Given the xsave area and a state inside, this function returns the
720 * address of the state.
721 *
722 * This is the API that is called to get xstate address in either
723 * standard format or compacted format of xsave area.
724 *
725 * Inputs:
726 * xsave: base address of the xsave area;
727 * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE,
728 * etc.)
729 * Output:
730 * address of the state in the xsave area.
731 */
732void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
733{
734 int feature = fls64(xstate) - 1;
735 if (!test_bit(feature, (unsigned long *)&pcntxt_mask))
736 return NULL;
737
738 return (void *)xsave + xstate_comp_offsets[feature];
739}
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 287e4c85fff9..f9d16ff56c6b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
27 select MMU_NOTIFIER 27 select MMU_NOTIFIER
28 select ANON_INODES 28 select ANON_INODES
29 select HAVE_KVM_IRQCHIP 29 select HAVE_KVM_IRQCHIP
30 select HAVE_KVM_IRQFD
30 select HAVE_KVM_IRQ_ROUTING 31 select HAVE_KVM_IRQ_ROUTING
31 select HAVE_KVM_EVENTFD 32 select HAVE_KVM_EVENTFD
32 select KVM_APIC_ARCHITECTURE 33 select KVM_APIC_ARCHITECTURE
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index bd0da433e6d7..a1ec6a50a05a 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
108 108
109 vector = kvm_cpu_get_extint(v); 109 vector = kvm_cpu_get_extint(v);
110 110
111 if (kvm_apic_vid_enabled(v->kvm) || vector != -1) 111 if (vector != -1)
112 return vector; /* PIC */ 112 return vector; /* PIC */
113 113
114 return kvm_get_apic_interrupt(v); /* APIC */ 114 return kvm_get_apic_interrupt(v); /* APIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3855103f71fd..08e8a899e005 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -352,25 +352,46 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
352 352
353static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 353static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
354{ 354{
355 apic->irr_pending = false; 355 struct kvm_vcpu *vcpu;
356
357 vcpu = apic->vcpu;
358
356 apic_clear_vector(vec, apic->regs + APIC_IRR); 359 apic_clear_vector(vec, apic->regs + APIC_IRR);
357 if (apic_search_irr(apic) != -1) 360 if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
358 apic->irr_pending = true; 361 /* try to update RVI */
362 kvm_make_request(KVM_REQ_EVENT, vcpu);
363 else {
364 vec = apic_search_irr(apic);
365 apic->irr_pending = (vec != -1);
366 }
359} 367}
360 368
361static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 369static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
362{ 370{
363 /* Note that we never get here with APIC virtualization enabled. */ 371 struct kvm_vcpu *vcpu;
372
373 if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
374 return;
375
376 vcpu = apic->vcpu;
364 377
365 if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
366 ++apic->isr_count;
367 BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
368 /* 378 /*
369 * ISR (in service register) bit is set when injecting an interrupt. 379 * With APIC virtualization enabled, all caching is disabled
370 * The highest vector is injected. Thus the latest bit set matches 380 * because the processor can modify ISR under the hood. Instead
371 * the highest bit in ISR. 381 * just set SVI.
372 */ 382 */
373 apic->highest_isr_cache = vec; 383 if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
384 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
385 else {
386 ++apic->isr_count;
387 BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
388 /*
389 * ISR (in service register) bit is set when injecting an interrupt.
390 * The highest vector is injected. Thus the latest bit set matches
391 * the highest bit in ISR.
392 */
393 apic->highest_isr_cache = vec;
394 }
374} 395}
375 396
376static inline int apic_find_highest_isr(struct kvm_lapic *apic) 397static inline int apic_find_highest_isr(struct kvm_lapic *apic)
@@ -1627,11 +1648,16 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
1627 int vector = kvm_apic_has_interrupt(vcpu); 1648 int vector = kvm_apic_has_interrupt(vcpu);
1628 struct kvm_lapic *apic = vcpu->arch.apic; 1649 struct kvm_lapic *apic = vcpu->arch.apic;
1629 1650
1630 /* Note that we never get here with APIC virtualization enabled. */
1631
1632 if (vector == -1) 1651 if (vector == -1)
1633 return -1; 1652 return -1;
1634 1653
1654 /*
1655 * We get here even with APIC virtualization enabled, if doing
1656 * nested virtualization and L1 runs with the "acknowledge interrupt
1657 * on exit" mode. Then we cannot inject the interrupt via RVI,
1658 * because the process would deliver it through the IDT.
1659 */
1660
1635 apic_set_isr(vector, apic); 1661 apic_set_isr(vector, apic);
1636 apic_update_ppr(apic); 1662 apic_update_ppr(apic);
1637 apic_clear_irr(vector, apic); 1663 apic_clear_irr(vector, apic);
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 1185fe7a7f47..9ade5cfb5a4c 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -273,7 +273,7 @@ static int mmu_audit_set(const char *val, const struct kernel_param *kp)
273 int ret; 273 int ret;
274 unsigned long enable; 274 unsigned long enable;
275 275
276 ret = strict_strtoul(val, 10, &enable); 276 ret = kstrtoul(val, 10, &enable);
277 if (ret < 0) 277 if (ret < 0)
278 return -EINVAL; 278 return -EINVAL;
279 279
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e618f34bde2d..bfe11cf124a1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8754,6 +8754,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8754 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, 8754 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
8755 exit_qualification); 8755 exit_qualification);
8756 8756
8757 vmx_load_vmcs01(vcpu);
8758
8757 if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) 8759 if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
8758 && nested_exit_intr_ack_set(vcpu)) { 8760 && nested_exit_intr_ack_set(vcpu)) {
8759 int irq = kvm_cpu_get_interrupt(vcpu); 8761 int irq = kvm_cpu_get_interrupt(vcpu);
@@ -8769,8 +8771,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8769 vmcs12->vm_exit_intr_error_code, 8771 vmcs12->vm_exit_intr_error_code,
8770 KVM_ISA_VMX); 8772 KVM_ISA_VMX);
8771 8773
8772 vmx_load_vmcs01(vcpu);
8773
8774 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); 8774 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
8775 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); 8775 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
8776 vmx_segment_cache_clear(vmx); 8776 vmx_segment_cache_clear(vmx);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b86d329b953a..8f1e22d3b286 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1020,9 +1020,8 @@ struct pvclock_gtod_data {
1020 u32 shift; 1020 u32 shift;
1021 } clock; 1021 } clock;
1022 1022
1023 /* open coded 'struct timespec' */ 1023 u64 boot_ns;
1024 u64 monotonic_time_snsec; 1024 u64 nsec_base;
1025 time_t monotonic_time_sec;
1026}; 1025};
1027 1026
1028static struct pvclock_gtod_data pvclock_gtod_data; 1027static struct pvclock_gtod_data pvclock_gtod_data;
@@ -1030,27 +1029,21 @@ static struct pvclock_gtod_data pvclock_gtod_data;
1030static void update_pvclock_gtod(struct timekeeper *tk) 1029static void update_pvclock_gtod(struct timekeeper *tk)
1031{ 1030{
1032 struct pvclock_gtod_data *vdata = &pvclock_gtod_data; 1031 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1032 u64 boot_ns;
1033
1034 boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
1033 1035
1034 write_seqcount_begin(&vdata->seq); 1036 write_seqcount_begin(&vdata->seq);
1035 1037
1036 /* copy pvclock gtod data */ 1038 /* copy pvclock gtod data */
1037 vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; 1039 vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode;
1038 vdata->clock.cycle_last = tk->clock->cycle_last; 1040 vdata->clock.cycle_last = tk->tkr.cycle_last;
1039 vdata->clock.mask = tk->clock->mask; 1041 vdata->clock.mask = tk->tkr.mask;
1040 vdata->clock.mult = tk->mult; 1042 vdata->clock.mult = tk->tkr.mult;
1041 vdata->clock.shift = tk->shift; 1043 vdata->clock.shift = tk->tkr.shift;
1042 1044
1043 vdata->monotonic_time_sec = tk->xtime_sec 1045 vdata->boot_ns = boot_ns;
1044 + tk->wall_to_monotonic.tv_sec; 1046 vdata->nsec_base = tk->tkr.xtime_nsec;
1045 vdata->monotonic_time_snsec = tk->xtime_nsec
1046 + (tk->wall_to_monotonic.tv_nsec
1047 << tk->shift);
1048 while (vdata->monotonic_time_snsec >=
1049 (((u64)NSEC_PER_SEC) << tk->shift)) {
1050 vdata->monotonic_time_snsec -=
1051 ((u64)NSEC_PER_SEC) << tk->shift;
1052 vdata->monotonic_time_sec++;
1053 }
1054 1047
1055 write_seqcount_end(&vdata->seq); 1048 write_seqcount_end(&vdata->seq);
1056} 1049}
@@ -1145,11 +1138,7 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
1145 1138
1146static inline u64 get_kernel_ns(void) 1139static inline u64 get_kernel_ns(void)
1147{ 1140{
1148 struct timespec ts; 1141 return ktime_get_boot_ns();
1149
1150 ktime_get_ts(&ts);
1151 monotonic_to_bootbased(&ts);
1152 return timespec_to_ns(&ts);
1153} 1142}
1154 1143
1155#ifdef CONFIG_X86_64 1144#ifdef CONFIG_X86_64
@@ -1414,23 +1403,22 @@ static inline u64 vgettsc(cycle_t *cycle_now)
1414 return v * gtod->clock.mult; 1403 return v * gtod->clock.mult;
1415} 1404}
1416 1405
1417static int do_monotonic(struct timespec *ts, cycle_t *cycle_now) 1406static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
1418{ 1407{
1408 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1419 unsigned long seq; 1409 unsigned long seq;
1420 u64 ns;
1421 int mode; 1410 int mode;
1422 struct pvclock_gtod_data *gtod = &pvclock_gtod_data; 1411 u64 ns;
1423 1412
1424 ts->tv_nsec = 0;
1425 do { 1413 do {
1426 seq = read_seqcount_begin(&gtod->seq); 1414 seq = read_seqcount_begin(&gtod->seq);
1427 mode = gtod->clock.vclock_mode; 1415 mode = gtod->clock.vclock_mode;
1428 ts->tv_sec = gtod->monotonic_time_sec; 1416 ns = gtod->nsec_base;
1429 ns = gtod->monotonic_time_snsec;
1430 ns += vgettsc(cycle_now); 1417 ns += vgettsc(cycle_now);
1431 ns >>= gtod->clock.shift; 1418 ns >>= gtod->clock.shift;
1419 ns += gtod->boot_ns;
1432 } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 1420 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
1433 timespec_add_ns(ts, ns); 1421 *t = ns;
1434 1422
1435 return mode; 1423 return mode;
1436} 1424}
@@ -1438,19 +1426,11 @@ static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
1438/* returns true if host is using tsc clocksource */ 1426/* returns true if host is using tsc clocksource */
1439static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now) 1427static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
1440{ 1428{
1441 struct timespec ts;
1442
1443 /* checked again under seqlock below */ 1429 /* checked again under seqlock below */
1444 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) 1430 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1445 return false; 1431 return false;
1446 1432
1447 if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC) 1433 return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
1448 return false;
1449
1450 monotonic_to_bootbased(&ts);
1451 *kernel_ns = timespec_to_ns(&ts);
1452
1453 return true;
1454} 1434}
1455#endif 1435#endif
1456 1436
@@ -2656,7 +2636,7 @@ out:
2656 return r; 2636 return r;
2657} 2637}
2658 2638
2659int kvm_dev_ioctl_check_extension(long ext) 2639int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2660{ 2640{
2661 int r; 2641 int r;
2662 2642
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1dbade870f90..a24194681513 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1218,7 +1218,8 @@ good_area:
1218 /* 1218 /*
1219 * If for any reason at all we couldn't handle the fault, 1219 * If for any reason at all we couldn't handle the fault,
1220 * make sure we exit gracefully rather than endlessly redo 1220 * make sure we exit gracefully rather than endlessly redo
1221 * the fault: 1221 * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if
1222 * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
1222 */ 1223 */
1223 fault = handle_mm_fault(mm, vma, address, flags); 1224 fault = handle_mm_fault(mm, vma, address, flags);
1224 1225
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e39504878aec..7d05565ba781 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -825,7 +825,8 @@ void __init mem_init(void)
825int arch_add_memory(int nid, u64 start, u64 size) 825int arch_add_memory(int nid, u64 start, u64 size)
826{ 826{
827 struct pglist_data *pgdata = NODE_DATA(nid); 827 struct pglist_data *pgdata = NODE_DATA(nid);
828 struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; 828 struct zone *zone = pgdata->node_zones +
829 zone_for_memory(nid, start, size, ZONE_HIGHMEM);
829 unsigned long start_pfn = start >> PAGE_SHIFT; 830 unsigned long start_pfn = start >> PAGE_SHIFT;
830 unsigned long nr_pages = size >> PAGE_SHIFT; 831 unsigned long nr_pages = size >> PAGE_SHIFT;
831 832
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index df1a9927ad29..5621c47d7a1a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -691,7 +691,8 @@ static void update_end_of_memory_vars(u64 start, u64 size)
691int arch_add_memory(int nid, u64 start, u64 size) 691int arch_add_memory(int nid, u64 start, u64 size)
692{ 692{
693 struct pglist_data *pgdat = NODE_DATA(nid); 693 struct pglist_data *pgdat = NODE_DATA(nid);
694 struct zone *zone = pgdat->node_zones + ZONE_NORMAL; 694 struct zone *zone = pgdat->node_zones +
695 zone_for_memory(nid, start, size, ZONE_NORMAL);
695 unsigned long start_pfn = start >> PAGE_SHIFT; 696 unsigned long start_pfn = start >> PAGE_SHIFT;
696 unsigned long nr_pages = size >> PAGE_SHIFT; 697 unsigned long nr_pages = size >> PAGE_SHIFT;
697 int ret; 698 int ret;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 99bef86ed6df..5c8cb8043c5a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -211,10 +211,10 @@ struct jit_context {
211 bool seen_ld_abs; 211 bool seen_ld_abs;
212}; 212};
213 213
214static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, 214static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
215 int oldproglen, struct jit_context *ctx) 215 int oldproglen, struct jit_context *ctx)
216{ 216{
217 struct sock_filter_int *insn = bpf_prog->insnsi; 217 struct bpf_insn *insn = bpf_prog->insnsi;
218 int insn_cnt = bpf_prog->len; 218 int insn_cnt = bpf_prog->len;
219 u8 temp[64]; 219 u8 temp[64];
220 int i; 220 int i;
@@ -235,7 +235,7 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
235 /* mov qword ptr [rbp-X],rbx */ 235 /* mov qword ptr [rbp-X],rbx */
236 EMIT3_off32(0x48, 0x89, 0x9D, -stacksize); 236 EMIT3_off32(0x48, 0x89, 0x9D, -stacksize);
237 237
238 /* sk_convert_filter() maps classic BPF register X to R7 and uses R8 238 /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
239 * as temporary, so all tcpdump filters need to spill/fill R7(r13) and 239 * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
240 * R8(r14). R9(r15) spill could be made conditional, but there is only 240 * R8(r14). R9(r15) spill could be made conditional, but there is only
241 * one 'bpf_error' return path out of helper functions inside bpf_jit.S 241 * one 'bpf_error' return path out of helper functions inside bpf_jit.S
@@ -841,7 +841,7 @@ common_load: ctx->seen_ld_abs = true;
841 /* By design x64 JIT should support all BPF instructions 841 /* By design x64 JIT should support all BPF instructions
842 * This error will be seen if new instruction was added 842 * This error will be seen if new instruction was added
843 * to interpreter, but not to JIT 843 * to interpreter, but not to JIT
844 * or if there is junk in sk_filter 844 * or if there is junk in bpf_prog
845 */ 845 */
846 pr_err("bpf_jit: unknown opcode %02x\n", insn->code); 846 pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
847 return -EINVAL; 847 return -EINVAL;
@@ -862,11 +862,11 @@ common_load: ctx->seen_ld_abs = true;
862 return proglen; 862 return proglen;
863} 863}
864 864
865void bpf_jit_compile(struct sk_filter *prog) 865void bpf_jit_compile(struct bpf_prog *prog)
866{ 866{
867} 867}
868 868
869void bpf_int_jit_compile(struct sk_filter *prog) 869void bpf_int_jit_compile(struct bpf_prog *prog)
870{ 870{
871 struct bpf_binary_header *header = NULL; 871 struct bpf_binary_header *header = NULL;
872 int proglen, oldproglen = 0; 872 int proglen, oldproglen = 0;
@@ -932,7 +932,7 @@ out:
932 932
933static void bpf_jit_free_deferred(struct work_struct *work) 933static void bpf_jit_free_deferred(struct work_struct *work)
934{ 934{
935 struct sk_filter *fp = container_of(work, struct sk_filter, work); 935 struct bpf_prog *fp = container_of(work, struct bpf_prog, work);
936 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 936 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
937 struct bpf_binary_header *header = (void *)addr; 937 struct bpf_binary_header *header = (void *)addr;
938 938
@@ -941,7 +941,7 @@ static void bpf_jit_free_deferred(struct work_struct *work)
941 kfree(fp); 941 kfree(fp);
942} 942}
943 943
944void bpf_jit_free(struct sk_filter *fp) 944void bpf_jit_free(struct bpf_prog *fp)
945{ 945{
946 if (fp->jited) { 946 if (fp->jited) {
947 INIT_WORK(&fp->work, bpf_jit_free_deferred); 947 INIT_WORK(&fp->work, bpf_jit_free_deferred);
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 5075371ab593..cfd1b132b8e3 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -448,7 +448,7 @@ static void probe_pci_root_info(struct pci_root_info *info,
448 return; 448 return;
449 449
450 size = sizeof(*info->res) * info->res_num; 450 size = sizeof(*info->res) * info->res_num;
451 info->res = kzalloc(size, GFP_KERNEL); 451 info->res = kzalloc_node(size, GFP_KERNEL, info->sd.node);
452 if (!info->res) { 452 if (!info->res) {
453 info->res_num = 0; 453 info->res_num = 0;
454 return; 454 return;
@@ -456,7 +456,7 @@ static void probe_pci_root_info(struct pci_root_info *info,
456 456
457 size = sizeof(*info->res_offset) * info->res_num; 457 size = sizeof(*info->res_offset) * info->res_num;
458 info->res_num = 0; 458 info->res_num = 0;
459 info->res_offset = kzalloc(size, GFP_KERNEL); 459 info->res_offset = kzalloc_node(size, GFP_KERNEL, info->sd.node);
460 if (!info->res_offset) { 460 if (!info->res_offset) {
461 kfree(info->res); 461 kfree(info->res);
462 info->res = NULL; 462 info->res = NULL;
@@ -499,7 +499,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
499 if (node != NUMA_NO_NODE && !node_online(node)) 499 if (node != NUMA_NO_NODE && !node_online(node))
500 node = NUMA_NO_NODE; 500 node = NUMA_NO_NODE;
501 501
502 info = kzalloc(sizeof(*info), GFP_KERNEL); 502 info = kzalloc_node(sizeof(*info), GFP_KERNEL, node);
503 if (!info) { 503 if (!info) {
504 printk(KERN_WARNING "pci_bus %04x:%02x: " 504 printk(KERN_WARNING "pci_bus %04x:%02x: "
505 "ignored (out of memory)\n", domain, busnum); 505 "ignored (out of memory)\n", domain, busnum);
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 84b9d672843d..3865116c51fb 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -208,27 +208,31 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
208 208
209static int intel_mid_pci_irq_enable(struct pci_dev *dev) 209static int intel_mid_pci_irq_enable(struct pci_dev *dev)
210{ 210{
211 u8 pin; 211 int polarity;
212 struct io_apic_irq_attr irq_attr;
213 212
214 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); 213 if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
214 polarity = 0; /* active high */
215 else
216 polarity = 1; /* active low */
215 217
216 /* 218 /*
217 * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to 219 * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
218 * IOAPIC RTE entries, so we just enable RTE for the device. 220 * IOAPIC RTE entries, so we just enable RTE for the device.
219 */ 221 */
220 irq_attr.ioapic = mp_find_ioapic(dev->irq); 222 if (mp_set_gsi_attr(dev->irq, 1, polarity, dev_to_node(&dev->dev)))
221 irq_attr.ioapic_pin = dev->irq; 223 return -EBUSY;
222 irq_attr.trigger = 1; /* level */ 224 if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0)
223 if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) 225 return -EBUSY;
224 irq_attr.polarity = 0; /* active high */
225 else
226 irq_attr.polarity = 1; /* active low */
227 io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr);
228 226
229 return 0; 227 return 0;
230} 228}
231 229
230static void intel_mid_pci_irq_disable(struct pci_dev *dev)
231{
232 if (!dev->dev.power.is_prepared && dev->irq > 0)
233 mp_unmap_irq(dev->irq);
234}
235
232struct pci_ops intel_mid_pci_ops = { 236struct pci_ops intel_mid_pci_ops = {
233 .read = pci_read, 237 .read = pci_read,
234 .write = pci_write, 238 .write = pci_write,
@@ -245,6 +249,7 @@ int __init intel_mid_pci_init(void)
245 pr_info("Intel MID platform detected, using MID PCI ops\n"); 249 pr_info("Intel MID platform detected, using MID PCI ops\n");
246 pci_mmcfg_late_init(); 250 pci_mmcfg_late_init();
247 pcibios_enable_irq = intel_mid_pci_irq_enable; 251 pcibios_enable_irq = intel_mid_pci_irq_enable;
252 pcibios_disable_irq = intel_mid_pci_irq_disable;
248 pci_root_ops = intel_mid_pci_ops; 253 pci_root_ops = intel_mid_pci_ops;
249 pci_soc_mode = 1; 254 pci_soc_mode = 1;
250 /* Continue with standard init */ 255 /* Continue with standard init */
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 84112f55dd7a..bc1a2c341891 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -26,6 +26,7 @@ static int acer_tm360_irqrouting;
26static struct irq_routing_table *pirq_table; 26static struct irq_routing_table *pirq_table;
27 27
28static int pirq_enable_irq(struct pci_dev *dev); 28static int pirq_enable_irq(struct pci_dev *dev);
29static void pirq_disable_irq(struct pci_dev *dev);
29 30
30/* 31/*
31 * Never use: 0, 1, 2 (timer, keyboard, and cascade) 32 * Never use: 0, 1, 2 (timer, keyboard, and cascade)
@@ -53,7 +54,7 @@ struct irq_router_handler {
53}; 54};
54 55
55int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq; 56int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq;
56void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; 57void (*pcibios_disable_irq)(struct pci_dev *dev) = pirq_disable_irq;
57 58
58/* 59/*
59 * Check passed address for the PCI IRQ Routing Table signature 60 * Check passed address for the PCI IRQ Routing Table signature
@@ -1186,7 +1187,7 @@ void pcibios_penalize_isa_irq(int irq, int active)
1186 1187
1187static int pirq_enable_irq(struct pci_dev *dev) 1188static int pirq_enable_irq(struct pci_dev *dev)
1188{ 1189{
1189 u8 pin; 1190 u8 pin = 0;
1190 1191
1191 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); 1192 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
1192 if (pin && !pcibios_lookup_irq(dev, 1)) { 1193 if (pin && !pcibios_lookup_irq(dev, 1)) {
@@ -1227,8 +1228,6 @@ static int pirq_enable_irq(struct pci_dev *dev)
1227 } 1228 }
1228 dev = temp_dev; 1229 dev = temp_dev;
1229 if (irq >= 0) { 1230 if (irq >= 0) {
1230 io_apic_set_pci_routing(&dev->dev, irq,
1231 &irq_attr);
1232 dev->irq = irq; 1231 dev->irq = irq;
1233 dev_info(&dev->dev, "PCI->APIC IRQ transform: " 1232 dev_info(&dev->dev, "PCI->APIC IRQ transform: "
1234 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); 1233 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
@@ -1254,3 +1253,12 @@ static int pirq_enable_irq(struct pci_dev *dev)
1254 } 1253 }
1255 return 0; 1254 return 0;
1256} 1255}
1256
1257static void pirq_disable_irq(struct pci_dev *dev)
1258{
1259 if (io_apic_assign_pci_irqs && !dev->dev.power.is_prepared &&
1260 dev->irq) {
1261 mp_unmap_irq(dev->irq);
1262 dev->irq = 0;
1263 }
1264}
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 905956f16465..093f5f4272d3 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -23,6 +23,7 @@
23#include <xen/features.h> 23#include <xen/features.h>
24#include <xen/events.h> 24#include <xen/events.h>
25#include <asm/xen/pci.h> 25#include <asm/xen/pci.h>
26#include <asm/i8259.h>
26 27
27static int xen_pcifront_enable_irq(struct pci_dev *dev) 28static int xen_pcifront_enable_irq(struct pci_dev *dev)
28{ 29{
@@ -40,7 +41,7 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev)
40 /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/ 41 /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/
41 pirq = gsi; 42 pirq = gsi;
42 43
43 if (gsi < NR_IRQS_LEGACY) 44 if (gsi < nr_legacy_irqs())
44 share = 0; 45 share = 0;
45 46
46 rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront"); 47 rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront");
@@ -511,7 +512,7 @@ int __init pci_xen_initial_domain(void)
511 xen_setup_acpi_sci(); 512 xen_setup_acpi_sci();
512 __acpi_register_gsi = acpi_register_gsi_xen; 513 __acpi_register_gsi = acpi_register_gsi_xen;
513 /* Pre-allocate legacy irqs */ 514 /* Pre-allocate legacy irqs */
514 for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { 515 for (irq = 0; irq < nr_legacy_irqs(); irq++) {
515 int trigger, polarity; 516 int trigger, polarity;
516 517
517 if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) 518 if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
@@ -522,7 +523,7 @@ int __init pci_xen_initial_domain(void)
522 true /* Map GSI to PIRQ */); 523 true /* Map GSI to PIRQ */);
523 } 524 }
524 if (0 == nr_ioapics) { 525 if (0 == nr_ioapics) {
525 for (irq = 0; irq < NR_IRQS_LEGACY; irq++) 526 for (irq = 0; irq < nr_legacy_irqs(); irq++)
526 xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic"); 527 xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic");
527 } 528 }
528 return 0; 529 return 0;
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 8244f5ec2f4c..701fd5843c87 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -135,14 +135,10 @@ static void __init sdv_arch_setup(void)
135 sdv_serial_fixup(); 135 sdv_serial_fixup();
136} 136}
137 137
138#ifdef CONFIG_X86_IO_APIC
139static void sdv_pci_init(void) 138static void sdv_pci_init(void)
140{ 139{
141 x86_of_pci_init(); 140 x86_of_pci_init();
142 /* We can't set this earlier, because we need to calibrate the timer */
143 legacy_pic = &null_legacy_pic;
144} 141}
145#endif
146 142
147/* 143/*
148 * CE4100 specific x86_init function overrides and early setup 144 * CE4100 specific x86_init function overrides and early setup
@@ -155,7 +151,9 @@ void __init x86_ce4100_early_setup(void)
155 x86_init.resources.probe_roms = x86_init_noop; 151 x86_init.resources.probe_roms = x86_init_noop;
156 x86_init.mpparse.get_smp_config = x86_init_uint_noop; 152 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
157 x86_init.mpparse.find_smp_config = x86_init_noop; 153 x86_init.mpparse.find_smp_config = x86_init_noop;
154 x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
158 x86_init.pci.init = ce4100_pci_init; 155 x86_init.pci.init = ce4100_pci_init;
156 x86_init.pci.init_irq = sdv_pci_init;
159 157
160 /* 158 /*
161 * By default, the reboot method is ACPI which is supported by the 159 * By default, the reboot method is ACPI which is supported by the
@@ -166,10 +164,5 @@ void __init x86_ce4100_early_setup(void)
166 */ 164 */
167 reboot_type = BOOT_KBD; 165 reboot_type = BOOT_KBD;
168 166
169#ifdef CONFIG_X86_IO_APIC
170 x86_init.pci.init_irq = sdv_pci_init;
171 x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
172#endif
173
174 pm_power_off = ce4100_power_off; 167 pm_power_off = ce4100_power_off;
175} 168}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
index 973cf3bfa9fd..0b283d4d0ad7 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
@@ -26,28 +26,18 @@ static struct platform_device wdt_dev = {
26 26
27static int tangier_probe(struct platform_device *pdev) 27static int tangier_probe(struct platform_device *pdev)
28{ 28{
29 int ioapic; 29 int gsi;
30 int irq;
31 struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data; 30 struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
32 struct io_apic_irq_attr irq_attr = { 0 };
33 31
34 if (!pdata) 32 if (!pdata)
35 return -EINVAL; 33 return -EINVAL;
36 34
37 irq = pdata->irq; 35 /* IOAPIC builds identity mapping between GSI and IRQ on MID */
38 ioapic = mp_find_ioapic(irq); 36 gsi = pdata->irq;
39 if (ioapic >= 0) { 37 if (mp_set_gsi_attr(gsi, 1, 0, cpu_to_node(0)) ||
40 int ret; 38 mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC) <= 0) {
41 irq_attr.ioapic = ioapic;
42 irq_attr.ioapic_pin = irq;
43 irq_attr.trigger = 1;
44 /* irq_attr.polarity = 0; -> Active high */
45 ret = io_apic_set_pci_routing(NULL, irq, &irq_attr);
46 if (ret)
47 return ret;
48 } else {
49 dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n", 39 dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n",
50 irq); 40 gsi);
51 return -EINVAL; 41 return -EINVAL;
52 } 42 }
53 43
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 994c40bd7cb7..3c53a90fdb18 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -432,9 +432,8 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
432 struct sfi_table_simple *sb; 432 struct sfi_table_simple *sb;
433 struct sfi_device_table_entry *pentry; 433 struct sfi_device_table_entry *pentry;
434 struct devs_id *dev = NULL; 434 struct devs_id *dev = NULL;
435 int num, i; 435 int num, i, ret;
436 int ioapic; 436 int polarity;
437 struct io_apic_irq_attr irq_attr;
438 437
439 sb = (struct sfi_table_simple *)table; 438 sb = (struct sfi_table_simple *)table;
440 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry); 439 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
@@ -448,35 +447,30 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
448 * devices, but they have separate RTE entry in IOAPIC 447 * devices, but they have separate RTE entry in IOAPIC
449 * so we have to enable them one by one here 448 * so we have to enable them one by one here
450 */ 449 */
451 ioapic = mp_find_ioapic(irq); 450 if (intel_mid_identify_cpu() ==
452 if (ioapic >= 0) { 451 INTEL_MID_CPU_CHIP_TANGIER) {
453 irq_attr.ioapic = ioapic; 452 if (!strncmp(pentry->name, "r69001-ts-i2c", 13))
454 irq_attr.ioapic_pin = irq; 453 /* active low */
455 irq_attr.trigger = 1; 454 polarity = 1;
456 if (intel_mid_identify_cpu() == 455 else if (!strncmp(pentry->name,
457 INTEL_MID_CPU_CHIP_TANGIER) { 456 "synaptics_3202", 14))
458 if (!strncmp(pentry->name, 457 /* active low */
459 "r69001-ts-i2c", 13)) 458 polarity = 1;
460 /* active low */ 459 else if (irq == 41)
461 irq_attr.polarity = 1; 460 /* fast_int_1 */
462 else if (!strncmp(pentry->name, 461 polarity = 1;
463 "synaptics_3202", 14)) 462 else
464 /* active low */ 463 /* active high */
465 irq_attr.polarity = 1; 464 polarity = 0;
466 else if (irq == 41) 465 } else {
467 /* fast_int_1 */ 466 /* PNW and CLV go with active low */
468 irq_attr.polarity = 1; 467 polarity = 1;
469 else
470 /* active high */
471 irq_attr.polarity = 0;
472 } else {
473 /* PNW and CLV go with active low */
474 irq_attr.polarity = 1;
475 }
476 io_apic_set_pci_routing(NULL, irq, &irq_attr);
477 } 468 }
478 } else { 469
479 irq = 0; /* No irq */ 470 ret = mp_set_gsi_attr(irq, 1, polarity, NUMA_NO_NODE);
471 if (ret == 0)
472 ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC);
473 WARN_ON(ret < 0);
480 } 474 }
481 475
482 dev = get_device_id(pentry->type, pentry->name); 476 dev = get_device_id(pentry->type, pentry->name);
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index bcd1a703e3e6..2a8a74f3bd76 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -25,6 +25,7 @@
25#include <linux/init.h> 25#include <linux/init.h>
26#include <linux/sfi.h> 26#include <linux/sfi.h>
27#include <linux/io.h> 27#include <linux/io.h>
28#include <linux/irqdomain.h>
28 29
29#include <asm/io_apic.h> 30#include <asm/io_apic.h>
30#include <asm/mpspec.h> 31#include <asm/mpspec.h>
@@ -70,19 +71,26 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table)
70#endif /* CONFIG_X86_LOCAL_APIC */ 71#endif /* CONFIG_X86_LOCAL_APIC */
71 72
72#ifdef CONFIG_X86_IO_APIC 73#ifdef CONFIG_X86_IO_APIC
74static struct irq_domain_ops sfi_ioapic_irqdomain_ops = {
75 .map = mp_irqdomain_map,
76};
73 77
74static int __init sfi_parse_ioapic(struct sfi_table_header *table) 78static int __init sfi_parse_ioapic(struct sfi_table_header *table)
75{ 79{
76 struct sfi_table_simple *sb; 80 struct sfi_table_simple *sb;
77 struct sfi_apic_table_entry *pentry; 81 struct sfi_apic_table_entry *pentry;
78 int i, num; 82 int i, num;
83 struct ioapic_domain_cfg cfg = {
84 .type = IOAPIC_DOMAIN_STRICT,
85 .ops = &sfi_ioapic_irqdomain_ops,
86 };
79 87
80 sb = (struct sfi_table_simple *)table; 88 sb = (struct sfi_table_simple *)table;
81 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry); 89 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
82 pentry = (struct sfi_apic_table_entry *)sb->pentry; 90 pentry = (struct sfi_apic_table_entry *)sb->pentry;
83 91
84 for (i = 0; i < num; i++) { 92 for (i = 0; i < num; i++) {
85 mp_register_ioapic(i, pentry->phys_addr, gsi_top); 93 mp_register_ioapic(i, pentry->phys_addr, gsi_top, &cfg);
86 pentry++; 94 pentry++;
87 } 95 }
88 96
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index ed161c6e278b..3968d67d366b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1479,7 +1479,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
1479 return count; 1479 return count;
1480 } 1480 }
1481 1481
1482 if (strict_strtol(optstr, 10, &input_arg) < 0) { 1482 if (kstrtol(optstr, 10, &input_arg) < 0) {
1483 printk(KERN_DEBUG "%s is invalid\n", optstr); 1483 printk(KERN_DEBUG "%s is invalid\n", optstr);
1484 return -EINVAL; 1484 return -EINVAL;
1485 } 1485 }
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
new file mode 100644
index 000000000000..7fde9ee438a4
--- /dev/null
+++ b/arch/x86/purgatory/Makefile
@@ -0,0 +1,30 @@
1purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string.o
2
3targets += $(purgatory-y)
4PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
5
6LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
7targets += purgatory.ro
8
9# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
10# in turn leaves some undefined symbols like __fentry__ in purgatory and not
11# sure how to relocate those. Like kexec-tools, use custom flags.
12
13KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
14
15$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
16 $(call if_changed,ld)
17
18targets += kexec-purgatory.c
19
20quiet_cmd_bin2c = BIN2C $@
21 cmd_bin2c = cat $(obj)/purgatory.ro | $(objtree)/scripts/basic/bin2c kexec_purgatory > $(obj)/kexec-purgatory.c
22
23$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
24 $(call if_changed,bin2c)
25
26
27# No loaders for 32bits yet.
28ifeq ($(CONFIG_X86_64),y)
29 obj-$(CONFIG_KEXEC) += kexec-purgatory.o
30endif
diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S
new file mode 100644
index 000000000000..d1a4291d3568
--- /dev/null
+++ b/arch/x86/purgatory/entry64.S
@@ -0,0 +1,101 @@
1/*
2 * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com)
3 * Copyright (C) 2014 Red Hat Inc.
4
5 * Author(s): Vivek Goyal <vgoyal@redhat.com>
6 *
7 * This code has been taken from kexec-tools.
8 *
9 * This source code is licensed under the GNU General Public License,
10 * Version 2. See the file COPYING for more details.
11 */
12
13 .text
14 .balign 16
15 .code64
16 .globl entry64, entry64_regs
17
18
19entry64:
20 /* Setup a gdt that should be preserved */
21 lgdt gdt(%rip)
22
23 /* load the data segments */
24 movl $0x18, %eax /* data segment */
25 movl %eax, %ds
26 movl %eax, %es
27 movl %eax, %ss
28 movl %eax, %fs
29 movl %eax, %gs
30
31 /* Setup new stack */
32 leaq stack_init(%rip), %rsp
33 pushq $0x10 /* CS */
34 leaq new_cs_exit(%rip), %rax
35 pushq %rax
36 lretq
37new_cs_exit:
38
39 /* Load the registers */
40 movq rax(%rip), %rax
41 movq rbx(%rip), %rbx
42 movq rcx(%rip), %rcx
43 movq rdx(%rip), %rdx
44 movq rsi(%rip), %rsi
45 movq rdi(%rip), %rdi
46 movq rsp(%rip), %rsp
47 movq rbp(%rip), %rbp
48 movq r8(%rip), %r8
49 movq r9(%rip), %r9
50 movq r10(%rip), %r10
51 movq r11(%rip), %r11
52 movq r12(%rip), %r12
53 movq r13(%rip), %r13
54 movq r14(%rip), %r14
55 movq r15(%rip), %r15
56
57 /* Jump to the new code... */
58 jmpq *rip(%rip)
59
60 .section ".rodata"
61 .balign 4
62entry64_regs:
63rax: .quad 0x0
64rcx: .quad 0x0
65rdx: .quad 0x0
66rbx: .quad 0x0
67rsp: .quad 0x0
68rbp: .quad 0x0
69rsi: .quad 0x0
70rdi: .quad 0x0
71r8: .quad 0x0
72r9: .quad 0x0
73r10: .quad 0x0
74r11: .quad 0x0
75r12: .quad 0x0
76r13: .quad 0x0
77r14: .quad 0x0
78r15: .quad 0x0
79rip: .quad 0x0
80 .size entry64_regs, . - entry64_regs
81
82 /* GDT */
83 .section ".rodata"
84 .balign 16
85gdt:
86 /* 0x00 unusable segment
87 * 0x08 unused
88 * so use them as gdt ptr
89 */
90 .word gdt_end - gdt - 1
91 .quad gdt
92 .word 0, 0, 0
93
94 /* 0x10 4GB flat code segment */
95 .word 0xFFFF, 0x0000, 0x9A00, 0x00AF
96
97 /* 0x18 4GB flat data segment */
98 .word 0xFFFF, 0x0000, 0x9200, 0x00CF
99gdt_end:
100stack: .quad 0, 0
101stack_init:
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
new file mode 100644
index 000000000000..25e068ba3382
--- /dev/null
+++ b/arch/x86/purgatory/purgatory.c
@@ -0,0 +1,72 @@
1/*
2 * purgatory: Runs between two kernels
3 *
4 * Copyright (C) 2014 Red Hat Inc.
5 *
6 * Author:
7 * Vivek Goyal <vgoyal@redhat.com>
8 *
9 * This source code is licensed under the GNU General Public License,
10 * Version 2. See the file COPYING for more details.
11 */
12
13#include "sha256.h"
14#include "../boot/string.h"
15
16struct sha_region {
17 unsigned long start;
18 unsigned long len;
19};
20
21unsigned long backup_dest = 0;
22unsigned long backup_src = 0;
23unsigned long backup_sz = 0;
24
25u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
26
27struct sha_region sha_regions[16] = {};
28
29/*
30 * On x86, second kernel requries first 640K of memory to boot. Copy
31 * first 640K to a backup region in reserved memory range so that second
32 * kernel can use first 640K.
33 */
34static int copy_backup_region(void)
35{
36 if (backup_dest)
37 memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
38
39 return 0;
40}
41
42int verify_sha256_digest(void)
43{
44 struct sha_region *ptr, *end;
45 u8 digest[SHA256_DIGEST_SIZE];
46 struct sha256_state sctx;
47
48 sha256_init(&sctx);
49 end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
50 for (ptr = sha_regions; ptr < end; ptr++)
51 sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
52
53 sha256_final(&sctx, digest);
54
55 if (memcmp(digest, sha256_digest, sizeof(digest)))
56 return 1;
57
58 return 0;
59}
60
61void purgatory(void)
62{
63 int ret;
64
65 ret = verify_sha256_digest();
66 if (ret) {
67 /* loop forever */
68 for (;;)
69 ;
70 }
71 copy_backup_region();
72}
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
new file mode 100644
index 000000000000..fe3c91ba1bd0
--- /dev/null
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -0,0 +1,58 @@
1/*
2 * purgatory: setup code
3 *
4 * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com)
5 * Copyright (C) 2014 Red Hat Inc.
6 *
7 * This code has been taken from kexec-tools.
8 *
9 * This source code is licensed under the GNU General Public License,
10 * Version 2. See the file COPYING for more details.
11 */
12
13 .text
14 .globl purgatory_start
15 .balign 16
16purgatory_start:
17 .code64
18
19 /* Load a gdt so I know what the segment registers are */
20 lgdt gdt(%rip)
21
22 /* load the data segments */
23 movl $0x18, %eax /* data segment */
24 movl %eax, %ds
25 movl %eax, %es
26 movl %eax, %ss
27 movl %eax, %fs
28 movl %eax, %gs
29
30 /* Setup a stack */
31 leaq lstack_end(%rip), %rsp
32
33 /* Call the C code */
34 call purgatory
35 jmp entry64
36
37 .section ".rodata"
38 .balign 16
39gdt: /* 0x00 unusable segment
40 * 0x08 unused
41 * so use them as the gdt ptr
42 */
43 .word gdt_end - gdt - 1
44 .quad gdt
45 .word 0, 0, 0
46
47 /* 0x10 4GB flat code segment */
48 .word 0xFFFF, 0x0000, 0x9A00, 0x00AF
49
50 /* 0x18 4GB flat data segment */
51 .word 0xFFFF, 0x0000, 0x9200, 0x00CF
52gdt_end:
53
54 .bss
55 .balign 4096
56lstack:
57 .skip 4096
58lstack_end:
diff --git a/arch/x86/purgatory/sha256.c b/arch/x86/purgatory/sha256.c
new file mode 100644
index 000000000000..548ca675a14a
--- /dev/null
+++ b/arch/x86/purgatory/sha256.c
@@ -0,0 +1,283 @@
1/*
2 * SHA-256, as specified in
3 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
4 *
5 * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
6 *
7 * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
8 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
9 * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
10 * Copyright (c) 2014 Red Hat Inc.
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 2 of the License, or (at your option)
15 * any later version.
16 */
17
18#include <linux/bitops.h>
19#include <asm/byteorder.h>
20#include "sha256.h"
21#include "../boot/string.h"
22
23static inline u32 Ch(u32 x, u32 y, u32 z)
24{
25 return z ^ (x & (y ^ z));
26}
27
28static inline u32 Maj(u32 x, u32 y, u32 z)
29{
30 return (x & y) | (z & (x | y));
31}
32
33#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
34#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
35#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
36#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
37
38static inline void LOAD_OP(int I, u32 *W, const u8 *input)
39{
40 W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
41}
42
43static inline void BLEND_OP(int I, u32 *W)
44{
45 W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
46}
47
48static void sha256_transform(u32 *state, const u8 *input)
49{
50 u32 a, b, c, d, e, f, g, h, t1, t2;
51 u32 W[64];
52 int i;
53
54 /* load the input */
55 for (i = 0; i < 16; i++)
56 LOAD_OP(i, W, input);
57
58 /* now blend */
59 for (i = 16; i < 64; i++)
60 BLEND_OP(i, W);
61
62 /* load the state into our registers */
63 a = state[0]; b = state[1]; c = state[2]; d = state[3];
64 e = state[4]; f = state[5]; g = state[6]; h = state[7];
65
66 /* now iterate */
67 t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
68 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
69 t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
70 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
71 t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
72 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
73 t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
74 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
75 t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
76 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
77 t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
78 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
79 t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
80 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
81 t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
82 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2;
83
84 t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
85 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
86 t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
87 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
88 t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
89 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
90 t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
91 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
92 t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
93 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
94 t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
95 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
96 t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
97 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
98 t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
99 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
100
101 t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
102 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
103 t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
104 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
105 t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
106 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
107 t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
108 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
109 t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
110 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
111 t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
112 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
113 t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
114 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
115 t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
116 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
117
118 t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
119 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
120 t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
121 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
122 t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
123 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
124 t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
125 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
126 t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
127 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
128 t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
129 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
130 t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
131 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
132 t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
133 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
134
135 t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
136 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
137 t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
138 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
139 t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
140 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
141 t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
142 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
143 t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
144 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
145 t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
146 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
147 t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
148 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
149 t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
150 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
151
152 t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
153 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
154 t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
155 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
156 t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
157 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
158 t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
159 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
160 t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
161 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
162 t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
163 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
164 t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
165 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
166 t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
167 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
168
169 t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
170 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
171 t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
172 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
173 t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
174 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
175 t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
176 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
177 t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
178 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
179 t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
180 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
181 t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
182 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
183 t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
184 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
185
186 t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
187 t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
188 t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
189 t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
190 t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
191 t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
192 t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
193 t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
194 t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
195 t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
196 t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
197 t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
198 t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
199 t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
200 t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
201 t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
202
203 state[0] += a; state[1] += b; state[2] += c; state[3] += d;
204 state[4] += e; state[5] += f; state[6] += g; state[7] += h;
205
206 /* clear any sensitive info... */
207 a = b = c = d = e = f = g = h = t1 = t2 = 0;
208 memset(W, 0, 64 * sizeof(u32));
209}
210
211int sha256_init(struct sha256_state *sctx)
212{
213 sctx->state[0] = SHA256_H0;
214 sctx->state[1] = SHA256_H1;
215 sctx->state[2] = SHA256_H2;
216 sctx->state[3] = SHA256_H3;
217 sctx->state[4] = SHA256_H4;
218 sctx->state[5] = SHA256_H5;
219 sctx->state[6] = SHA256_H6;
220 sctx->state[7] = SHA256_H7;
221 sctx->count = 0;
222
223 return 0;
224}
225
226int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
227{
228 unsigned int partial, done;
229 const u8 *src;
230
231 partial = sctx->count & 0x3f;
232 sctx->count += len;
233 done = 0;
234 src = data;
235
236 if ((partial + len) > 63) {
237 if (partial) {
238 done = -partial;
239 memcpy(sctx->buf + partial, data, done + 64);
240 src = sctx->buf;
241 }
242
243 do {
244 sha256_transform(sctx->state, src);
245 done += 64;
246 src = data + done;
247 } while (done + 63 < len);
248
249 partial = 0;
250 }
251 memcpy(sctx->buf + partial, src, len - done);
252
253 return 0;
254}
255
256int sha256_final(struct sha256_state *sctx, u8 *out)
257{
258 __be32 *dst = (__be32 *)out;
259 __be64 bits;
260 unsigned int index, pad_len;
261 int i;
262 static const u8 padding[64] = { 0x80, };
263
264 /* Save number of bits */
265 bits = cpu_to_be64(sctx->count << 3);
266
267 /* Pad out to 56 mod 64. */
268 index = sctx->count & 0x3f;
269 pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
270 sha256_update(sctx, padding, pad_len);
271
272 /* Append length (before padding) */
273 sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
274
275 /* Store state in digest */
276 for (i = 0; i < 8; i++)
277 dst[i] = cpu_to_be32(sctx->state[i]);
278
279 /* Zeroize sensitive information. */
280 memset(sctx, 0, sizeof(*sctx));
281
282 return 0;
283}
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h
new file mode 100644
index 000000000000..bd15a4127735
--- /dev/null
+++ b/arch/x86/purgatory/sha256.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (C) 2014 Red Hat Inc.
3 *
4 * Author: Vivek Goyal <vgoyal@redhat.com>
5 *
6 * This source code is licensed under the GNU General Public License,
7 * Version 2. See the file COPYING for more details.
8 */
9
10#ifndef SHA256_H
11#define SHA256_H
12
13
14#include <linux/types.h>
15#include <crypto/sha.h>
16
17extern int sha256_init(struct sha256_state *sctx);
18extern int sha256_update(struct sha256_state *sctx, const u8 *input,
19 unsigned int length);
20extern int sha256_final(struct sha256_state *sctx, u8 *hash);
21
22#endif /* SHA256_H */
diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S
new file mode 100644
index 000000000000..3cefba1fefc8
--- /dev/null
+++ b/arch/x86/purgatory/stack.S
@@ -0,0 +1,19 @@
1/*
2 * purgatory: stack
3 *
4 * Copyright (C) 2014 Red Hat Inc.
5 *
6 * This source code is licensed under the GNU General Public License,
7 * Version 2. See the file COPYING for more details.
8 */
9
10 /* A stack for the loaded kernel.
11 * Seperate and in the data section so it can be prepopulated.
12 */
13 .data
14 .balign 4096
15 .globl stack, stack_end
16
17stack:
18 .skip 4096
19stack_end:
diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c
new file mode 100644
index 000000000000..d886b1fa36f0
--- /dev/null
+++ b/arch/x86/purgatory/string.c
@@ -0,0 +1,13 @@
1/*
2 * Simple string functions.
3 *
4 * Copyright (C) 2014 Red Hat Inc.
5 *
6 * Author:
7 * Vivek Goyal <vgoyal@redhat.com>
8 *
9 * This source code is licensed under the GNU General Public License,
10 * Version 2. See the file COPYING for more details.
11 */
12
13#include "../boot/string.c"
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d6b867921612..028b78168d85 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -360,3 +360,6 @@
360351 i386 sched_setattr sys_sched_setattr 360351 i386 sched_setattr sys_sched_setattr
361352 i386 sched_getattr sys_sched_getattr 361352 i386 sched_getattr sys_sched_getattr
362353 i386 renameat2 sys_renameat2 362353 i386 renameat2 sys_renameat2
363354 i386 seccomp sys_seccomp
364355 i386 getrandom sys_getrandom
365356 i386 memfd_create sys_memfd_create
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ec255a1646d2..35dd922727b9 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -323,6 +323,10 @@
323314 common sched_setattr sys_sched_setattr 323314 common sched_setattr sys_sched_setattr
324315 common sched_getattr sys_sched_getattr 324315 common sched_getattr sys_sched_getattr
325316 common renameat2 sys_renameat2 325316 common renameat2 sys_renameat2
326317 common seccomp sys_seccomp
327318 common getrandom sys_getrandom
328319 common memfd_create sys_memfd_create
329320 common kexec_file_load sys_kexec_file_load
326 330
327# 331#
328# x32-specific system call numbers start at 512 to avoid cache impact 332# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 0feee2fd5077..25a1022dd793 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -216,6 +216,5 @@ extern long elf_aux_hwcap;
216#define ELF_HWCAP (elf_aux_hwcap) 216#define ELF_HWCAP (elf_aux_hwcap)
217 217
218#define SET_PERSONALITY(ex) do ; while(0) 218#define SET_PERSONALITY(ex) do ; while(0)
219#define __HAVE_ARCH_GATE_AREA 1
220 219
221#endif 220#endif
diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c
index c6492e75797b..f8fecaddcc0d 100644
--- a/arch/x86/um/mem_64.c
+++ b/arch/x86/um/mem_64.c
@@ -9,18 +9,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
9 9
10 return NULL; 10 return NULL;
11} 11}
12
13struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
14{
15 return NULL;
16}
17
18int in_gate_area(struct mm_struct *mm, unsigned long addr)
19{
20 return 0;
21}
22
23int in_gate_area_no_mm(unsigned long addr)
24{
25 return 0;
26}
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 5e04a1c899fa..79d824551c1a 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -370,13 +370,12 @@ struct rt_sigframe
370 char retcode[8]; 370 char retcode[8];
371}; 371};
372 372
373int setup_signal_stack_sc(unsigned long stack_top, int sig, 373int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig,
374 struct k_sigaction *ka, struct pt_regs *regs, 374 struct pt_regs *regs, sigset_t *mask)
375 sigset_t *mask)
376{ 375{
377 struct sigframe __user *frame; 376 struct sigframe __user *frame;
378 void __user *restorer; 377 void __user *restorer;
379 int err = 0; 378 int err = 0, sig = ksig->sig;
380 379
381 /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */ 380 /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */
382 stack_top = ((stack_top + 4) & -16UL) - 4; 381 stack_top = ((stack_top + 4) & -16UL) - 4;
@@ -385,8 +384,8 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
385 return 1; 384 return 1;
386 385
387 restorer = frame->retcode; 386 restorer = frame->retcode;
388 if (ka->sa.sa_flags & SA_RESTORER) 387 if (ksig->ka.sa.sa_flags & SA_RESTORER)
389 restorer = ka->sa.sa_restorer; 388 restorer = ksig->ka.sa.sa_restorer;
390 389
391 err |= __put_user(restorer, &frame->pretcode); 390 err |= __put_user(restorer, &frame->pretcode);
392 err |= __put_user(sig, &frame->sig); 391 err |= __put_user(sig, &frame->sig);
@@ -410,20 +409,19 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
410 return err; 409 return err;
411 410
412 PT_REGS_SP(regs) = (unsigned long) frame; 411 PT_REGS_SP(regs) = (unsigned long) frame;
413 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; 412 PT_REGS_IP(regs) = (unsigned long) ksig->ka.sa.sa_handler;
414 PT_REGS_AX(regs) = (unsigned long) sig; 413 PT_REGS_AX(regs) = (unsigned long) sig;
415 PT_REGS_DX(regs) = (unsigned long) 0; 414 PT_REGS_DX(regs) = (unsigned long) 0;
416 PT_REGS_CX(regs) = (unsigned long) 0; 415 PT_REGS_CX(regs) = (unsigned long) 0;
417 return 0; 416 return 0;
418} 417}
419 418
420int setup_signal_stack_si(unsigned long stack_top, int sig, 419int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
421 struct k_sigaction *ka, struct pt_regs *regs, 420 struct pt_regs *regs, sigset_t *mask)
422 siginfo_t *info, sigset_t *mask)
423{ 421{
424 struct rt_sigframe __user *frame; 422 struct rt_sigframe __user *frame;
425 void __user *restorer; 423 void __user *restorer;
426 int err = 0; 424 int err = 0, sig = ksig->sig;
427 425
428 stack_top &= -8UL; 426 stack_top &= -8UL;
429 frame = (struct rt_sigframe __user *) stack_top - 1; 427 frame = (struct rt_sigframe __user *) stack_top - 1;
@@ -431,14 +429,14 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
431 return 1; 429 return 1;
432 430
433 restorer = frame->retcode; 431 restorer = frame->retcode;
434 if (ka->sa.sa_flags & SA_RESTORER) 432 if (ksig->ka.sa.sa_flags & SA_RESTORER)
435 restorer = ka->sa.sa_restorer; 433 restorer = ksig->ka.sa.sa_restorer;
436 434
437 err |= __put_user(restorer, &frame->pretcode); 435 err |= __put_user(restorer, &frame->pretcode);
438 err |= __put_user(sig, &frame->sig); 436 err |= __put_user(sig, &frame->sig);
439 err |= __put_user(&frame->info, &frame->pinfo); 437 err |= __put_user(&frame->info, &frame->pinfo);
440 err |= __put_user(&frame->uc, &frame->puc); 438 err |= __put_user(&frame->uc, &frame->puc);
441 err |= copy_siginfo_to_user(&frame->info, info); 439 err |= copy_siginfo_to_user(&frame->info, &ksig->info);
442 err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask, 440 err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask,
443 PT_REGS_SP(regs)); 441 PT_REGS_SP(regs));
444 442
@@ -457,7 +455,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
457 return err; 455 return err;
458 456
459 PT_REGS_SP(regs) = (unsigned long) frame; 457 PT_REGS_SP(regs) = (unsigned long) frame;
460 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; 458 PT_REGS_IP(regs) = (unsigned long) ksig->ka.sa.sa_handler;
461 PT_REGS_AX(regs) = (unsigned long) sig; 459 PT_REGS_AX(regs) = (unsigned long) sig;
462 PT_REGS_DX(regs) = (unsigned long) &frame->info; 460 PT_REGS_DX(regs) = (unsigned long) &frame->info;
463 PT_REGS_CX(regs) = (unsigned long) &frame->uc; 461 PT_REGS_CX(regs) = (unsigned long) &frame->uc;
@@ -502,12 +500,11 @@ struct rt_sigframe
502 struct _fpstate fpstate; 500 struct _fpstate fpstate;
503}; 501};
504 502
505int setup_signal_stack_si(unsigned long stack_top, int sig, 503int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
506 struct k_sigaction *ka, struct pt_regs * regs, 504 struct pt_regs *regs, sigset_t *set)
507 siginfo_t *info, sigset_t *set)
508{ 505{
509 struct rt_sigframe __user *frame; 506 struct rt_sigframe __user *frame;
510 int err = 0; 507 int err = 0, sig = ksig->sig;
511 508
512 frame = (struct rt_sigframe __user *) 509 frame = (struct rt_sigframe __user *)
513 round_down(stack_top - sizeof(struct rt_sigframe), 16); 510 round_down(stack_top - sizeof(struct rt_sigframe), 16);
@@ -517,8 +514,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
517 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 514 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
518 goto out; 515 goto out;
519 516
520 if (ka->sa.sa_flags & SA_SIGINFO) { 517 if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
521 err |= copy_siginfo_to_user(&frame->info, info); 518 err |= copy_siginfo_to_user(&frame->info, &ksig->info);
522 if (err) 519 if (err)
523 goto out; 520 goto out;
524 } 521 }
@@ -543,8 +540,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
543 * already in userspace. 540 * already in userspace.
544 */ 541 */
545 /* x86-64 should always use SA_RESTORER. */ 542 /* x86-64 should always use SA_RESTORER. */
546 if (ka->sa.sa_flags & SA_RESTORER) 543 if (ksig->ka.sa.sa_flags & SA_RESTORER)
547 err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); 544 err |= __put_user(ksig->ka.sa.sa_restorer, &frame->pretcode);
548 else 545 else
549 /* could use a vstub here */ 546 /* could use a vstub here */
550 return err; 547 return err;
@@ -570,7 +567,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
570 */ 567 */
571 PT_REGS_SI(regs) = (unsigned long) &frame->info; 568 PT_REGS_SI(regs) = (unsigned long) &frame->info;
572 PT_REGS_DX(regs) = (unsigned long) &frame->uc; 569 PT_REGS_DX(regs) = (unsigned long) &frame->uc;
573 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; 570 PT_REGS_IP(regs) = (unsigned long) ksig->ka.sa.sa_handler;
574 out: 571 out:
575 return err; 572 return err;
576} 573}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index e4f7781ee162..e904c270573b 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -115,23 +115,6 @@ static __init int ia32_binfmt_init(void)
115 return 0; 115 return 0;
116} 116}
117__initcall(ia32_binfmt_init); 117__initcall(ia32_binfmt_init);
118#endif 118#endif /* CONFIG_SYSCTL */
119
120#else /* CONFIG_X86_32 */
121
122struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
123{
124 return NULL;
125}
126
127int in_gate_area(struct mm_struct *mm, unsigned long addr)
128{
129 return 0;
130}
131
132int in_gate_area_no_mm(unsigned long addr)
133{
134 return 0;
135}
136 119
137#endif /* CONFIG_X86_64 */ 120#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 94813515fdd6..c0cb11fb5008 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1828,8 +1828,19 @@ static void __init xen_hvm_guest_init(void)
1828 xen_hvm_init_mmu_ops(); 1828 xen_hvm_init_mmu_ops();
1829} 1829}
1830 1830
1831static bool xen_nopv = false;
1832static __init int xen_parse_nopv(char *arg)
1833{
1834 xen_nopv = true;
1835 return 0;
1836}
1837early_param("xen_nopv", xen_parse_nopv);
1838
1831static uint32_t __init xen_hvm_platform(void) 1839static uint32_t __init xen_hvm_platform(void)
1832{ 1840{
1841 if (xen_nopv)
1842 return 0;
1843
1833 if (xen_pv_domain()) 1844 if (xen_pv_domain())
1834 return 0; 1845 return 0;
1835 1846
@@ -1838,6 +1849,8 @@ static uint32_t __init xen_hvm_platform(void)
1838 1849
1839bool xen_hvm_need_lapic(void) 1850bool xen_hvm_need_lapic(void)
1840{ 1851{
1852 if (xen_nopv)
1853 return false;
1841 if (xen_pv_domain()) 1854 if (xen_pv_domain())
1842 return false; 1855 return false;
1843 if (!xen_hvm_domain()) 1856 if (!xen_hvm_domain())
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index ebfa9b2c871d..1580e7a5a4cf 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -49,7 +49,7 @@
49static struct gnttab_vm_area { 49static struct gnttab_vm_area {
50 struct vm_struct *area; 50 struct vm_struct *area;
51 pte_t **ptes; 51 pte_t **ptes;
52} gnttab_shared_vm_area, gnttab_status_vm_area; 52} gnttab_shared_vm_area;
53 53
54int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, 54int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
55 unsigned long max_nr_gframes, 55 unsigned long max_nr_gframes,
@@ -73,43 +73,16 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
73 return 0; 73 return 0;
74} 74}
75 75
76int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
77 unsigned long max_nr_gframes,
78 grant_status_t **__shared)
79{
80 grant_status_t *shared = *__shared;
81 unsigned long addr;
82 unsigned long i;
83
84 if (shared == NULL)
85 *__shared = shared = gnttab_status_vm_area.area->addr;
86
87 addr = (unsigned long)shared;
88
89 for (i = 0; i < nr_gframes; i++) {
90 set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i],
91 mfn_pte(frames[i], PAGE_KERNEL));
92 addr += PAGE_SIZE;
93 }
94
95 return 0;
96}
97
98void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) 76void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
99{ 77{
100 pte_t **ptes;
101 unsigned long addr; 78 unsigned long addr;
102 unsigned long i; 79 unsigned long i;
103 80
104 if (shared == gnttab_status_vm_area.area->addr)
105 ptes = gnttab_status_vm_area.ptes;
106 else
107 ptes = gnttab_shared_vm_area.ptes;
108
109 addr = (unsigned long)shared; 81 addr = (unsigned long)shared;
110 82
111 for (i = 0; i < nr_gframes; i++) { 83 for (i = 0; i < nr_gframes; i++) {
112 set_pte_at(&init_mm, addr, ptes[i], __pte(0)); 84 set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i],
85 __pte(0));
113 addr += PAGE_SIZE; 86 addr += PAGE_SIZE;
114 } 87 }
115} 88}
@@ -129,35 +102,12 @@ static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
129 return 0; 102 return 0;
130} 103}
131 104
132static void arch_gnttab_vfree(struct gnttab_vm_area *area) 105int arch_gnttab_init(unsigned long nr_shared)
133{ 106{
134 free_vm_area(area->area);
135 kfree(area->ptes);
136}
137
138int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
139{
140 int ret;
141
142 if (!xen_pv_domain()) 107 if (!xen_pv_domain())
143 return 0; 108 return 0;
144 109
145 ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); 110 return arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared);
146 if (ret < 0)
147 return ret;
148
149 /*
150 * Always allocate the space for the status frames in case
151 * we're migrated to a host with V2 support.
152 */
153 ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status);
154 if (ret < 0)
155 goto err;
156
157 return 0;
158 err:
159 arch_gnttab_vfree(&gnttab_shared_vm_area);
160 return -ENOMEM;
161} 111}
162 112
163#ifdef CONFIG_XEN_PVH 113#ifdef CONFIG_XEN_PVH
@@ -168,6 +118,7 @@ static int __init xlated_setup_gnttab_pages(void)
168{ 118{
169 struct page **pages; 119 struct page **pages;
170 xen_pfn_t *pfns; 120 xen_pfn_t *pfns;
121 void *vaddr;
171 int rc; 122 int rc;
172 unsigned int i; 123 unsigned int i;
173 unsigned long nr_grant_frames = gnttab_max_grant_frames(); 124 unsigned long nr_grant_frames = gnttab_max_grant_frames();
@@ -193,21 +144,20 @@ static int __init xlated_setup_gnttab_pages(void)
193 for (i = 0; i < nr_grant_frames; i++) 144 for (i = 0; i < nr_grant_frames; i++)
194 pfns[i] = page_to_pfn(pages[i]); 145 pfns[i] = page_to_pfn(pages[i]);
195 146
196 rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames, 147 vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL);
197 &xen_auto_xlat_grant_frames.vaddr); 148 if (!vaddr) {
198
199 if (rc) {
200 pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, 149 pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__,
201 nr_grant_frames, rc); 150 nr_grant_frames, rc);
202 free_xenballooned_pages(nr_grant_frames, pages); 151 free_xenballooned_pages(nr_grant_frames, pages);
203 kfree(pages); 152 kfree(pages);
204 kfree(pfns); 153 kfree(pfns);
205 return rc; 154 return -ENOMEM;
206 } 155 }
207 kfree(pages); 156 kfree(pages);
208 157
209 xen_auto_xlat_grant_frames.pfn = pfns; 158 xen_auto_xlat_grant_frames.pfn = pfns;
210 xen_auto_xlat_grant_frames.count = nr_grant_frames; 159 xen_auto_xlat_grant_frames.count = nr_grant_frames;
160 xen_auto_xlat_grant_frames.vaddr = vaddr;
211 161
212 return 0; 162 return 0;
213} 163}
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 9bb3d82ffec8..3172692381ae 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -841,10 +841,9 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
841 pfn = ALIGN(pfn, P2M_PER_PAGE); 841 pfn = ALIGN(pfn, P2M_PER_PAGE);
842 } 842 }
843 843
844 if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), 844 WARN((pfn - pfn_s) != (pfn_e - pfn_s),
845 "Identity mapping failed. We are %ld short of 1-1 mappings!\n", 845 "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
846 (pfn_e - pfn_s) - (pfn - pfn_s))) 846 (pfn_e - pfn_s) - (pfn - pfn_s));
847 printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
848 847
849 return pfn - pfn_s; 848 return pfn - pfn_s;
850} 849}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 7b78f88c1707..5718b0b58b60 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -444,7 +444,7 @@ void xen_setup_timer(int cpu)
444 444
445 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, 445 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| 446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
447 IRQF_FORCE_RESUME, 447 IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
448 name, NULL); 448 name, NULL);
449 (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); 449 (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
450 450