diff options
Diffstat (limited to 'arch/x86')
127 files changed, 1213 insertions, 1030 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f8958b01b975..140e254fe546 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -123,7 +123,7 @@ config NEED_SG_DMA_LENGTH | |||
123 | def_bool y | 123 | def_bool y |
124 | 124 | ||
125 | config GENERIC_ISA_DMA | 125 | config GENERIC_ISA_DMA |
126 | def_bool y | 126 | def_bool ISA_DMA_API |
127 | 127 | ||
128 | config GENERIC_IOMAP | 128 | config GENERIC_IOMAP |
129 | def_bool y | 129 | def_bool y |
@@ -143,7 +143,7 @@ config GENERIC_GPIO | |||
143 | bool | 143 | bool |
144 | 144 | ||
145 | config ARCH_MAY_HAVE_PC_FDC | 145 | config ARCH_MAY_HAVE_PC_FDC |
146 | def_bool y | 146 | def_bool ISA_DMA_API |
147 | 147 | ||
148 | config RWSEM_GENERIC_SPINLOCK | 148 | config RWSEM_GENERIC_SPINLOCK |
149 | def_bool !X86_XADD | 149 | def_bool !X86_XADD |
@@ -221,10 +221,6 @@ config X86_HT | |||
221 | def_bool y | 221 | def_bool y |
222 | depends on SMP | 222 | depends on SMP |
223 | 223 | ||
224 | config X86_TRAMPOLINE | ||
225 | def_bool y | ||
226 | depends on SMP || (64BIT && ACPI_SLEEP) | ||
227 | |||
228 | config X86_32_LAZY_GS | 224 | config X86_32_LAZY_GS |
229 | def_bool y | 225 | def_bool y |
230 | depends on X86_32 && !CC_STACKPROTECTOR | 226 | depends on X86_32 && !CC_STACKPROTECTOR |
@@ -2006,9 +2002,13 @@ source "drivers/pci/pcie/Kconfig" | |||
2006 | 2002 | ||
2007 | source "drivers/pci/Kconfig" | 2003 | source "drivers/pci/Kconfig" |
2008 | 2004 | ||
2009 | # x86_64 have no ISA slots, but do have ISA-style DMA. | 2005 | # x86_64 have no ISA slots, but can have ISA-style DMA. |
2010 | config ISA_DMA_API | 2006 | config ISA_DMA_API |
2011 | def_bool y | 2007 | bool "ISA-style DMA support" if (X86_64 && EXPERT) |
2008 | default y | ||
2009 | help | ||
2010 | Enables ISA-style DMA support for devices requiring such controllers. | ||
2011 | If unsure, say Y. | ||
2012 | 2012 | ||
2013 | if X86_32 | 2013 | if X86_32 |
2014 | 2014 | ||
@@ -2096,6 +2096,16 @@ source "drivers/pcmcia/Kconfig" | |||
2096 | 2096 | ||
2097 | source "drivers/pci/hotplug/Kconfig" | 2097 | source "drivers/pci/hotplug/Kconfig" |
2098 | 2098 | ||
2099 | config RAPIDIO | ||
2100 | bool "RapidIO support" | ||
2101 | depends on PCI | ||
2102 | default n | ||
2103 | help | ||
2104 | If you say Y here, the kernel will include drivers and | ||
2105 | infrastructure code to support RapidIO interconnect devices. | ||
2106 | |||
2107 | source "drivers/rapidio/Kconfig" | ||
2108 | |||
2099 | endmenu | 2109 | endmenu |
2100 | 2110 | ||
2101 | 2111 | ||
@@ -2130,6 +2140,11 @@ config SYSVIPC_COMPAT | |||
2130 | def_bool y | 2140 | def_bool y |
2131 | depends on COMPAT && SYSVIPC | 2141 | depends on COMPAT && SYSVIPC |
2132 | 2142 | ||
2143 | config KEYS_COMPAT | ||
2144 | bool | ||
2145 | depends on COMPAT && KEYS | ||
2146 | default y | ||
2147 | |||
2133 | endmenu | 2148 | endmenu |
2134 | 2149 | ||
2135 | 2150 | ||
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index ed47e6e1747f..d161e939df62 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -326,7 +326,7 @@ config X86_PPRO_FENCE | |||
326 | Old PentiumPro multiprocessor systems had errata that could cause | 326 | Old PentiumPro multiprocessor systems had errata that could cause |
327 | memory operations to violate the x86 ordering standard in rare cases. | 327 | memory operations to violate the x86 ordering standard in rare cases. |
328 | Enabling this option will attempt to work around some (but not all) | 328 | Enabling this option will attempt to work around some (but not all) |
329 | occurances of this problem, at the cost of much heavier spinlock and | 329 | occurrences of this problem, at the cost of much heavier spinlock and |
330 | memory barrier operations. | 330 | memory barrier operations. |
331 | 331 | ||
332 | If unsure, say n here. Even distro kernels should think twice before | 332 | If unsure, say n here. Even distro kernels should think twice before |
@@ -366,7 +366,7 @@ config X86_INTEL_USERCOPY | |||
366 | 366 | ||
367 | config X86_USE_PPRO_CHECKSUM | 367 | config X86_USE_PPRO_CHECKSUM |
368 | def_bool y | 368 | def_bool y |
369 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM | 369 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM |
370 | 370 | ||
371 | config X86_USE_3DNOW | 371 | config X86_USE_3DNOW |
372 | def_bool y | 372 | def_bool y |
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 8fe2a4966b7a..adcf794b22e2 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -1346,7 +1346,7 @@ _zero_cipher_left_decrypt: | |||
1346 | and $15, %r13 # %r13 = arg4 (mod 16) | 1346 | and $15, %r13 # %r13 = arg4 (mod 16) |
1347 | je _multiple_of_16_bytes_decrypt | 1347 | je _multiple_of_16_bytes_decrypt |
1348 | 1348 | ||
1349 | # Handle the last <16 byte block seperately | 1349 | # Handle the last <16 byte block separately |
1350 | 1350 | ||
1351 | paddd ONE(%rip), %xmm0 # increment CNT to get Yn | 1351 | paddd ONE(%rip), %xmm0 # increment CNT to get Yn |
1352 | movdqa SHUF_MASK(%rip), %xmm10 | 1352 | movdqa SHUF_MASK(%rip), %xmm10 |
@@ -1355,7 +1355,7 @@ _zero_cipher_left_decrypt: | |||
1355 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) | 1355 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) |
1356 | sub $16, %r11 | 1356 | sub $16, %r11 |
1357 | add %r13, %r11 | 1357 | add %r13, %r11 |
1358 | movdqu (%arg3,%r11,1), %xmm1 # recieve the last <16 byte block | 1358 | movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte block |
1359 | lea SHIFT_MASK+16(%rip), %r12 | 1359 | lea SHIFT_MASK+16(%rip), %r12 |
1360 | sub %r13, %r12 | 1360 | sub %r13, %r12 |
1361 | # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes | 1361 | # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes |
@@ -1607,7 +1607,7 @@ _zero_cipher_left_encrypt: | |||
1607 | and $15, %r13 # %r13 = arg4 (mod 16) | 1607 | and $15, %r13 # %r13 = arg4 (mod 16) |
1608 | je _multiple_of_16_bytes_encrypt | 1608 | je _multiple_of_16_bytes_encrypt |
1609 | 1609 | ||
1610 | # Handle the last <16 Byte block seperately | 1610 | # Handle the last <16 Byte block separately |
1611 | paddd ONE(%rip), %xmm0 # INCR CNT to get Yn | 1611 | paddd ONE(%rip), %xmm0 # INCR CNT to get Yn |
1612 | movdqa SHUF_MASK(%rip), %xmm10 | 1612 | movdqa SHUF_MASK(%rip), %xmm10 |
1613 | PSHUFB_XMM %xmm10, %xmm0 | 1613 | PSHUFB_XMM %xmm10, %xmm0 |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index e1e60c7d5813..e0e6340c8dad 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -873,22 +873,18 @@ rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) | |||
873 | crypto_ablkcipher_clear_flags(ctr_tfm, ~0); | 873 | crypto_ablkcipher_clear_flags(ctr_tfm, ~0); |
874 | 874 | ||
875 | ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); | 875 | ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); |
876 | if (ret) { | 876 | if (ret) |
877 | crypto_free_ablkcipher(ctr_tfm); | 877 | goto out_free_ablkcipher; |
878 | return ret; | ||
879 | } | ||
880 | 878 | ||
879 | ret = -ENOMEM; | ||
881 | req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); | 880 | req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); |
882 | if (!req) { | 881 | if (!req) |
883 | crypto_free_ablkcipher(ctr_tfm); | 882 | goto out_free_ablkcipher; |
884 | return -EINVAL; | ||
885 | } | ||
886 | 883 | ||
887 | req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); | 884 | req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); |
888 | if (!req_data) { | 885 | if (!req_data) |
889 | crypto_free_ablkcipher(ctr_tfm); | 886 | goto out_free_request; |
890 | return -ENOMEM; | 887 | |
891 | } | ||
892 | memset(req_data->iv, 0, sizeof(req_data->iv)); | 888 | memset(req_data->iv, 0, sizeof(req_data->iv)); |
893 | 889 | ||
894 | /* Clear the data in the hash sub key container to zero.*/ | 890 | /* Clear the data in the hash sub key container to zero.*/ |
@@ -913,8 +909,10 @@ rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) | |||
913 | if (!ret) | 909 | if (!ret) |
914 | ret = req_data->result.err; | 910 | ret = req_data->result.err; |
915 | } | 911 | } |
916 | ablkcipher_request_free(req); | ||
917 | kfree(req_data); | 912 | kfree(req_data); |
913 | out_free_request: | ||
914 | ablkcipher_request_free(req); | ||
915 | out_free_ablkcipher: | ||
918 | crypto_free_ablkcipher(ctr_tfm); | 916 | crypto_free_ablkcipher(ctr_tfm); |
919 | return ret; | 917 | return ret; |
920 | } | 918 | } |
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 2d93bdbc9ac0..fd843877e841 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
@@ -298,6 +298,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
298 | /* OK, This is the point of no return */ | 298 | /* OK, This is the point of no return */ |
299 | set_personality(PER_LINUX); | 299 | set_personality(PER_LINUX); |
300 | set_thread_flag(TIF_IA32); | 300 | set_thread_flag(TIF_IA32); |
301 | current->mm->context.ia32_compat = 1; | ||
301 | 302 | ||
302 | setup_new_exec(bprm); | 303 | setup_new_exec(bprm); |
303 | 304 | ||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 430312ba6e3f..849a9d23c71d 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -847,4 +847,5 @@ ia32_sys_call_table: | |||
847 | .quad sys_name_to_handle_at | 847 | .quad sys_name_to_handle_at |
848 | .quad compat_sys_open_by_handle_at | 848 | .quad compat_sys_open_by_handle_at |
849 | .quad compat_sys_clock_adjtime | 849 | .quad compat_sys_clock_adjtime |
850 | .quad sys_syncfs | ||
850 | ia32_syscall_end: | 851 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index b964ec457546..12e0e7dd869c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/processor.h> | 29 | #include <asm/processor.h> |
30 | #include <asm/mmu.h> | 30 | #include <asm/mmu.h> |
31 | #include <asm/mpspec.h> | 31 | #include <asm/mpspec.h> |
32 | #include <asm/trampoline.h> | ||
32 | 33 | ||
33 | #define COMPILER_DEPENDENT_INT64 long long | 34 | #define COMPILER_DEPENDENT_INT64 long long |
34 | #define COMPILER_DEPENDENT_UINT64 unsigned long long | 35 | #define COMPILER_DEPENDENT_UINT64 unsigned long long |
@@ -113,11 +114,11 @@ static inline void acpi_disable_pci(void) | |||
113 | acpi_noirq_set(); | 114 | acpi_noirq_set(); |
114 | } | 115 | } |
115 | 116 | ||
116 | /* routines for saving/restoring kernel state */ | 117 | /* Low-level suspend routine. */ |
117 | extern int acpi_save_state_mem(void); | 118 | extern int acpi_suspend_lowlevel(void); |
118 | extern void acpi_restore_state_mem(void); | ||
119 | 119 | ||
120 | extern unsigned long acpi_wakeup_address; | 120 | extern const unsigned char acpi_wakeup_code[]; |
121 | #define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code))) | ||
121 | 122 | ||
122 | /* early initialization routine */ | 123 | /* early initialization routine */ |
123 | extern void acpi_reserve_wakeup_memory(void); | 124 | extern void acpi_reserve_wakeup_memory(void); |
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index e264ae5a1443..331682231bb4 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h | |||
@@ -13,7 +13,7 @@ extern const struct pci_device_id amd_nb_misc_ids[]; | |||
13 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; | 13 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; |
14 | struct bootnode; | 14 | struct bootnode; |
15 | 15 | ||
16 | extern int early_is_amd_nb(u32 value); | 16 | extern bool early_is_amd_nb(u32 value); |
17 | extern int amd_cache_northbridges(void); | 17 | extern int amd_cache_northbridges(void); |
18 | extern void amd_flush_garts(void); | 18 | extern void amd_flush_garts(void); |
19 | extern int amd_numa_init(void); | 19 | extern int amd_numa_init(void); |
@@ -32,18 +32,18 @@ struct amd_northbridge_info { | |||
32 | }; | 32 | }; |
33 | extern struct amd_northbridge_info amd_northbridges; | 33 | extern struct amd_northbridge_info amd_northbridges; |
34 | 34 | ||
35 | #define AMD_NB_GART 0x1 | 35 | #define AMD_NB_GART BIT(0) |
36 | #define AMD_NB_L3_INDEX_DISABLE 0x2 | 36 | #define AMD_NB_L3_INDEX_DISABLE BIT(1) |
37 | #define AMD_NB_L3_PARTITIONING 0x4 | 37 | #define AMD_NB_L3_PARTITIONING BIT(2) |
38 | 38 | ||
39 | #ifdef CONFIG_AMD_NB | 39 | #ifdef CONFIG_AMD_NB |
40 | 40 | ||
41 | static inline int amd_nb_num(void) | 41 | static inline u16 amd_nb_num(void) |
42 | { | 42 | { |
43 | return amd_northbridges.num; | 43 | return amd_northbridges.num; |
44 | } | 44 | } |
45 | 45 | ||
46 | static inline int amd_nb_has_feature(int feature) | 46 | static inline bool amd_nb_has_feature(unsigned feature) |
47 | { | 47 | { |
48 | return ((amd_northbridges.flags & feature) == feature); | 48 | return ((amd_northbridges.flags & feature) == feature); |
49 | } | 49 | } |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 903683b07e42..69d58131bc8e 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -456,14 +456,12 @@ static inline int fls(int x) | |||
456 | 456 | ||
457 | #ifdef __KERNEL__ | 457 | #ifdef __KERNEL__ |
458 | 458 | ||
459 | #include <asm-generic/bitops/ext2-non-atomic.h> | 459 | #include <asm-generic/bitops/le.h> |
460 | 460 | ||
461 | #define ext2_set_bit_atomic(lock, nr, addr) \ | 461 | #define ext2_set_bit_atomic(lock, nr, addr) \ |
462 | test_and_set_bit((nr), (unsigned long *)(addr)) | 462 | test_and_set_bit((nr), (unsigned long *)(addr)) |
463 | #define ext2_clear_bit_atomic(lock, nr, addr) \ | 463 | #define ext2_clear_bit_atomic(lock, nr, addr) \ |
464 | test_and_clear_bit((nr), (unsigned long *)(addr)) | 464 | test_and_clear_bit((nr), (unsigned long *)(addr)) |
465 | 465 | ||
466 | #include <asm-generic/bitops/minix.h> | ||
467 | |||
468 | #endif /* __KERNEL__ */ | 466 | #endif /* __KERNEL__ */ |
469 | #endif /* _ASM_X86_BITOPS_H */ | 467 | #endif /* _ASM_X86_BITOPS_H */ |
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 62f084478f7e..4e12668711e5 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -71,7 +71,7 @@ static inline void set_page_memtype(struct page *pg, unsigned long memtype) { } | |||
71 | * Read/Write : ReadOnly, ReadWrite | 71 | * Read/Write : ReadOnly, ReadWrite |
72 | * Presence : NotPresent | 72 | * Presence : NotPresent |
73 | * | 73 | * |
74 | * Within a catagory, the attributes are mutually exclusive. | 74 | * Within a category, the attributes are mutually exclusive. |
75 | * | 75 | * |
76 | * The implementation of this API will take care of various aspects that | 76 | * The implementation of this API will take care of various aspects that |
77 | * are associated with changing such attributes, such as: | 77 | * are associated with changing such attributes, such as: |
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index ca1098a7e580..97b6d8114a43 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h | |||
@@ -151,6 +151,7 @@ | |||
151 | #define DMA_AUTOINIT 0x10 | 151 | #define DMA_AUTOINIT 0x10 |
152 | 152 | ||
153 | 153 | ||
154 | #ifdef CONFIG_ISA_DMA_API | ||
154 | extern spinlock_t dma_spin_lock; | 155 | extern spinlock_t dma_spin_lock; |
155 | 156 | ||
156 | static inline unsigned long claim_dma_lock(void) | 157 | static inline unsigned long claim_dma_lock(void) |
@@ -164,6 +165,7 @@ static inline void release_dma_lock(unsigned long flags) | |||
164 | { | 165 | { |
165 | spin_unlock_irqrestore(&dma_spin_lock, flags); | 166 | spin_unlock_irqrestore(&dma_spin_lock, flags); |
166 | } | 167 | } |
168 | #endif /* CONFIG_ISA_DMA_API */ | ||
167 | 169 | ||
168 | /* enable/disable a specific DMA channel */ | 170 | /* enable/disable a specific DMA channel */ |
169 | static inline void enable_dma(unsigned int dmanr) | 171 | static inline void enable_dma(unsigned int dmanr) |
@@ -303,9 +305,11 @@ static inline int get_dma_residue(unsigned int dmanr) | |||
303 | } | 305 | } |
304 | 306 | ||
305 | 307 | ||
306 | /* These are in kernel/dma.c: */ | 308 | /* These are in kernel/dma.c because x86 uses CONFIG_GENERIC_ISA_DMA */ |
309 | #ifdef CONFIG_ISA_DMA_API | ||
307 | extern int request_dma(unsigned int dmanr, const char *device_id); | 310 | extern int request_dma(unsigned int dmanr, const char *device_id); |
308 | extern void free_dma(unsigned int dmanr); | 311 | extern void free_dma(unsigned int dmanr); |
312 | #endif | ||
309 | 313 | ||
310 | /* From PCI */ | 314 | /* From PCI */ |
311 | 315 | ||
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 518bbbb9ee59..fe2cc6e105fa 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h | |||
@@ -26,7 +26,7 @@ extern void die(const char *, struct pt_regs *,long); | |||
26 | extern int __must_check __die(const char *, struct pt_regs *, long); | 26 | extern int __must_check __die(const char *, struct pt_regs *, long); |
27 | extern void show_registers(struct pt_regs *regs); | 27 | extern void show_registers(struct pt_regs *regs); |
28 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, | 28 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, |
29 | unsigned long *sp); | 29 | unsigned long *sp, unsigned long bp); |
30 | extern void __show_regs(struct pt_regs *regs, int all); | 30 | extern void __show_regs(struct pt_regs *regs, int all); |
31 | extern void show_regs(struct pt_regs *regs); | 31 | extern void show_regs(struct pt_regs *regs); |
32 | extern unsigned long oops_begin(void); | 32 | extern unsigned long oops_begin(void); |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 8e37deb1eb38..0f5213564326 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -142,9 +142,9 @@ struct x86_emulate_ops { | |||
142 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, | 142 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, |
143 | unsigned int count, struct kvm_vcpu *vcpu); | 143 | unsigned int count, struct kvm_vcpu *vcpu); |
144 | 144 | ||
145 | bool (*get_cached_descriptor)(struct desc_struct *desc, | 145 | bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, |
146 | int seg, struct kvm_vcpu *vcpu); | 146 | int seg, struct kvm_vcpu *vcpu); |
147 | void (*set_cached_descriptor)(struct desc_struct *desc, | 147 | void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, |
148 | int seg, struct kvm_vcpu *vcpu); | 148 | int seg, struct kvm_vcpu *vcpu); |
149 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | 149 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); |
150 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | 150 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); |
@@ -239,6 +239,7 @@ struct x86_emulate_ctxt { | |||
239 | int interruptibility; | 239 | int interruptibility; |
240 | 240 | ||
241 | bool perm_ok; /* do not check permissions if true */ | 241 | bool perm_ok; /* do not check permissions if true */ |
242 | bool only_vendor_specific_insn; | ||
242 | 243 | ||
243 | bool have_exception; | 244 | bool have_exception; |
244 | struct x86_exception exception; | 245 | struct x86_exception exception; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ffd7f8d29187..c8af0991fdf0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -85,7 +85,7 @@ | |||
85 | 85 | ||
86 | #define ASYNC_PF_PER_VCPU 64 | 86 | #define ASYNC_PF_PER_VCPU 64 |
87 | 87 | ||
88 | extern spinlock_t kvm_lock; | 88 | extern raw_spinlock_t kvm_lock; |
89 | extern struct list_head vm_list; | 89 | extern struct list_head vm_list; |
90 | 90 | ||
91 | struct kvm_vcpu; | 91 | struct kvm_vcpu; |
@@ -255,6 +255,8 @@ struct kvm_mmu { | |||
255 | int (*sync_page)(struct kvm_vcpu *vcpu, | 255 | int (*sync_page)(struct kvm_vcpu *vcpu, |
256 | struct kvm_mmu_page *sp); | 256 | struct kvm_mmu_page *sp); |
257 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 257 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
258 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
259 | u64 *spte, const void *pte, unsigned long mmu_seq); | ||
258 | hpa_t root_hpa; | 260 | hpa_t root_hpa; |
259 | int root_level; | 261 | int root_level; |
260 | int shadow_root_level; | 262 | int shadow_root_level; |
@@ -335,12 +337,6 @@ struct kvm_vcpu_arch { | |||
335 | u64 *last_pte_updated; | 337 | u64 *last_pte_updated; |
336 | gfn_t last_pte_gfn; | 338 | gfn_t last_pte_gfn; |
337 | 339 | ||
338 | struct { | ||
339 | gfn_t gfn; /* presumed gfn during guest pte update */ | ||
340 | pfn_t pfn; /* pfn corresponding to that gfn */ | ||
341 | unsigned long mmu_seq; | ||
342 | } update_pte; | ||
343 | |||
344 | struct fpu guest_fpu; | 340 | struct fpu guest_fpu; |
345 | u64 xcr0; | 341 | u64 xcr0; |
346 | 342 | ||
@@ -448,7 +444,7 @@ struct kvm_arch { | |||
448 | 444 | ||
449 | unsigned long irq_sources_bitmap; | 445 | unsigned long irq_sources_bitmap; |
450 | s64 kvmclock_offset; | 446 | s64 kvmclock_offset; |
451 | spinlock_t tsc_write_lock; | 447 | raw_spinlock_t tsc_write_lock; |
452 | u64 last_tsc_nsec; | 448 | u64 last_tsc_nsec; |
453 | u64 last_tsc_offset; | 449 | u64 last_tsc_offset; |
454 | u64 last_tsc_write; | 450 | u64 last_tsc_write; |
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 80a1dee5bea5..aeff3e89b222 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h | |||
@@ -13,6 +13,12 @@ typedef struct { | |||
13 | int size; | 13 | int size; |
14 | struct mutex lock; | 14 | struct mutex lock; |
15 | void *vdso; | 15 | void *vdso; |
16 | |||
17 | #ifdef CONFIG_X86_64 | ||
18 | /* True if mm supports a task running in 32 bit compatibility mode. */ | ||
19 | unsigned short ia32_compat; | ||
20 | #endif | ||
21 | |||
16 | } mm_context_t; | 22 | } mm_context_t; |
17 | 23 | ||
18 | #ifdef CONFIG_SMP | 24 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 823d48223400..fd5a1f365c95 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -43,6 +43,7 @@ | |||
43 | 43 | ||
44 | #define MSR_MTRRcap 0x000000fe | 44 | #define MSR_MTRRcap 0x000000fe |
45 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | 45 | #define MSR_IA32_BBL_CR_CTL 0x00000119 |
46 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e | ||
46 | 47 | ||
47 | #define MSR_IA32_SYSENTER_CS 0x00000174 | 48 | #define MSR_IA32_SYSENTER_CS 0x00000174 |
48 | #define MSR_IA32_SYSENTER_ESP 0x00000175 | 49 | #define MSR_IA32_SYSENTER_ESP 0x00000175 |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 07f46016d3ff..4886a68f267e 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -29,8 +29,8 @@ void arch_trigger_all_cpu_backtrace(void); | |||
29 | * external nmis, because the local ones are more frequent. | 29 | * external nmis, because the local ones are more frequent. |
30 | * | 30 | * |
31 | * Also setup some default high/normal/low settings for | 31 | * Also setup some default high/normal/low settings for |
32 | * subsystems to registers with. Using 4 bits to seperate | 32 | * subsystems to registers with. Using 4 bits to separate |
33 | * the priorities. This can go alot higher if needed be. | 33 | * the priorities. This can go a lot higher if needed be. |
34 | */ | 34 | */ |
35 | 35 | ||
36 | #define NMI_LOCAL_SHIFT 16 /* randomly picked */ | 36 | #define NMI_LOCAL_SHIFT 16 /* randomly picked */ |
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index 6d8723a766cc..af788496020b 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h | |||
@@ -38,7 +38,7 @@ | |||
38 | #define K8_NOP8 K8_NOP4 K8_NOP4 | 38 | #define K8_NOP8 K8_NOP4 K8_NOP4 |
39 | 39 | ||
40 | /* K7 nops | 40 | /* K7 nops |
41 | uses eax dependencies (arbitary choice) | 41 | uses eax dependencies (arbitrary choice) |
42 | 1: nop | 42 | 1: nop |
43 | 2: movl %eax,%eax | 43 | 2: movl %eax,%eax |
44 | 3: leal (,%eax,1),%eax | 44 | 3: leal (,%eax,1),%eax |
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index f482010350fb..5ca6801b75f3 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h | |||
@@ -20,7 +20,7 @@ extern struct olpc_platform_t olpc_platform_info; | |||
20 | 20 | ||
21 | /* | 21 | /* |
22 | * OLPC board IDs contain the major build number within the mask 0x0ff0, | 22 | * OLPC board IDs contain the major build number within the mask 0x0ff0, |
23 | * and the minor build number withing 0x000f. Pre-builds have a minor | 23 | * and the minor build number within 0x000f. Pre-builds have a minor |
24 | * number less than 8, and normal builds start at 8. For example, 0x0B10 | 24 | * number less than 8, and normal builds start at 8. For example, 0x0B10 |
25 | * is a PreB1, and 0x0C18 is a C1. | 25 | * is a PreB1, and 0x0C18 is a C1. |
26 | */ | 26 | */ |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 7e172955ee57..a09e1f052d84 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -451,6 +451,26 @@ do { \ | |||
451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
452 | #endif /* !CONFIG_M386 */ | 452 | #endif /* !CONFIG_M386 */ |
453 | 453 | ||
454 | #ifdef CONFIG_X86_CMPXCHG64 | ||
455 | #define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ | ||
456 | ({ \ | ||
457 | char __ret; \ | ||
458 | typeof(o1) __o1 = o1; \ | ||
459 | typeof(o1) __n1 = n1; \ | ||
460 | typeof(o2) __o2 = o2; \ | ||
461 | typeof(o2) __n2 = n2; \ | ||
462 | typeof(o2) __dummy = n2; \ | ||
463 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ | ||
464 | : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ | ||
465 | : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ | ||
466 | __ret; \ | ||
467 | }) | ||
468 | |||
469 | #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
470 | #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
471 | #define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
472 | #endif /* CONFIG_X86_CMPXCHG64 */ | ||
473 | |||
454 | /* | 474 | /* |
455 | * Per cpu atomic 64 bit operations are only available under 64 bit. | 475 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
456 | * 32 bit must fall back to generic operations. | 476 | * 32 bit must fall back to generic operations. |
@@ -480,6 +500,34 @@ do { \ | |||
480 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 500 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
481 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | 501 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) |
482 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 502 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
503 | |||
504 | /* | ||
505 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction | ||
506 | * is not supported on early AMD64 processors so we must be able to emulate | ||
507 | * it in software. The address used in the cmpxchg16 instruction must be | ||
508 | * aligned to a 16 byte boundary. | ||
509 | */ | ||
510 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ | ||
511 | ({ \ | ||
512 | char __ret; \ | ||
513 | typeof(o1) __o1 = o1; \ | ||
514 | typeof(o1) __n1 = n1; \ | ||
515 | typeof(o2) __o2 = o2; \ | ||
516 | typeof(o2) __n2 = n2; \ | ||
517 | typeof(o2) __dummy; \ | ||
518 | alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ | ||
519 | "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ | ||
520 | X86_FEATURE_CX16, \ | ||
521 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | ||
522 | "S" (&pcp1), "b"(__n1), "c"(__n2), \ | ||
523 | "a"(__o1), "d"(__o2)); \ | ||
524 | __ret; \ | ||
525 | }) | ||
526 | |||
527 | #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
528 | #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
529 | #define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
530 | |||
483 | #endif | 531 | #endif |
484 | 532 | ||
485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 533 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index cc29086e30cd..56fd9e3abbda 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Netburst Perfomance Events (P4, old Xeon) | 2 | * Netburst Performance Events (P4, old Xeon) |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #ifndef PERF_EVENT_P4_H | 5 | #ifndef PERF_EVENT_P4_H |
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/bitops.h> | 9 | #include <linux/bitops.h> |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * NetBurst has perfomance MSRs shared between | 12 | * NetBurst has performance MSRs shared between |
13 | * threads if HT is turned on, ie for both logical | 13 | * threads if HT is turned on, ie for both logical |
14 | * processors (mem: in turn in Atom with HT support | 14 | * processors (mem: in turn in Atom with HT support |
15 | * perf-MSRs are not shared and every thread has its | 15 | * perf-MSRs are not shared and every thread has its |
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 94b979d1b58d..effff47a3c82 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -69,8 +69,6 @@ static inline void native_pmd_clear(pmd_t *pmd) | |||
69 | 69 | ||
70 | static inline void pud_clear(pud_t *pudp) | 70 | static inline void pud_clear(pud_t *pudp) |
71 | { | 71 | { |
72 | unsigned long pgd; | ||
73 | |||
74 | set_pud(pudp, __pud(0)); | 72 | set_pud(pudp, __pud(0)); |
75 | 73 | ||
76 | /* | 74 | /* |
@@ -79,13 +77,10 @@ static inline void pud_clear(pud_t *pudp) | |||
79 | * section 8.1: in PAE mode we explicitly have to flush the | 77 | * section 8.1: in PAE mode we explicitly have to flush the |
80 | * TLB via cr3 if the top-level pgd is changed... | 78 | * TLB via cr3 if the top-level pgd is changed... |
81 | * | 79 | * |
82 | * Make sure the pud entry we're updating is within the | 80 | * Currently all places where pud_clear() is called either have |
83 | * current pgd to avoid unnecessary TLB flushes. | 81 | * flush_tlb_mm() followed or don't need TLB flush (x86_64 code or |
82 | * pud_clear_bad()), so we don't need TLB flush here. | ||
84 | */ | 83 | */ |
85 | pgd = read_cr3(); | ||
86 | if (__pa(pudp) >= pgd && __pa(pudp) < | ||
87 | (pgd + sizeof(pgd_t)*PTRS_PER_PGD)) | ||
88 | write_cr3(pgd); | ||
89 | } | 84 | } |
90 | 85 | ||
91 | #ifdef CONFIG_SMP | 86 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 7a3e836eb2a9..a898a2b6e10c 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -7,7 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ | 8 | #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ |
9 | #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ | 9 | #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ |
10 | #define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ | 10 | #define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ |
11 | #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ | 11 | #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ |
12 | #define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ | 12 | #define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ |
13 | #define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ | 13 | #define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ |
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 52b098a6eebb..7b0a55a88851 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h | |||
@@ -31,7 +31,7 @@ | |||
31 | #define R12 24 | 31 | #define R12 24 |
32 | #define RBP 32 | 32 | #define RBP 32 |
33 | #define RBX 40 | 33 | #define RBX 40 |
34 | /* arguments: interrupts/non tracing syscalls only save upto here*/ | 34 | /* arguments: interrupts/non tracing syscalls only save up to here*/ |
35 | #define R11 48 | 35 | #define R11 48 |
36 | #define R10 56 | 36 | #define R10 56 |
37 | #define R9 64 | 37 | #define R9 64 |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 78cd1ea94500..1babf8adecdf 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -73,7 +73,7 @@ struct pt_regs { | |||
73 | unsigned long r12; | 73 | unsigned long r12; |
74 | unsigned long rbp; | 74 | unsigned long rbp; |
75 | unsigned long rbx; | 75 | unsigned long rbx; |
76 | /* arguments: non interrupts/non tracing syscalls only save upto here*/ | 76 | /* arguments: non interrupts/non tracing syscalls only save up to here*/ |
77 | unsigned long r11; | 77 | unsigned long r11; |
78 | unsigned long r10; | 78 | unsigned long r10; |
79 | unsigned long r9; | 79 | unsigned long r9; |
@@ -103,7 +103,7 @@ struct pt_regs { | |||
103 | unsigned long r12; | 103 | unsigned long r12; |
104 | unsigned long bp; | 104 | unsigned long bp; |
105 | unsigned long bx; | 105 | unsigned long bx; |
106 | /* arguments: non interrupts/non tracing syscalls only save upto here*/ | 106 | /* arguments: non interrupts/non tracing syscalls only save up to here*/ |
107 | unsigned long r11; | 107 | unsigned long r11; |
108 | unsigned long r10; | 108 | unsigned long r10; |
109 | unsigned long r9; | 109 | unsigned long r9; |
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 562d4fd31ba8..3250e3d605d9 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h | |||
@@ -18,7 +18,10 @@ extern struct machine_ops machine_ops; | |||
18 | 18 | ||
19 | void native_machine_crash_shutdown(struct pt_regs *regs); | 19 | void native_machine_crash_shutdown(struct pt_regs *regs); |
20 | void native_machine_shutdown(void); | 20 | void native_machine_shutdown(void); |
21 | void machine_real_restart(const unsigned char *code, int length); | 21 | void machine_real_restart(unsigned int type); |
22 | /* These must match dispatch_table in reboot_32.S */ | ||
23 | #define MRR_BIOS 0 | ||
24 | #define MRR_APM 1 | ||
22 | 25 | ||
23 | typedef void (*nmi_shootdown_cb)(int, struct die_args*); | 26 | typedef void (*nmi_shootdown_cb)(int, struct die_args*); |
24 | void nmi_shootdown_cpus(nmi_shootdown_cb callback); | 27 | void nmi_shootdown_cpus(nmi_shootdown_cb callback); |
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 231f1c1d6607..cd84f7208f76 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h | |||
@@ -1,14 +1,16 @@ | |||
1 | #ifndef _ASM_X86_SEGMENT_H | 1 | #ifndef _ASM_X86_SEGMENT_H |
2 | #define _ASM_X86_SEGMENT_H | 2 | #define _ASM_X86_SEGMENT_H |
3 | 3 | ||
4 | #include <linux/const.h> | ||
5 | |||
4 | /* Constructor for a conventional segment GDT (or LDT) entry */ | 6 | /* Constructor for a conventional segment GDT (or LDT) entry */ |
5 | /* This is a macro so it can be used in initializers */ | 7 | /* This is a macro so it can be used in initializers */ |
6 | #define GDT_ENTRY(flags, base, limit) \ | 8 | #define GDT_ENTRY(flags, base, limit) \ |
7 | ((((base) & 0xff000000ULL) << (56-24)) | \ | 9 | ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ |
8 | (((flags) & 0x0000f0ffULL) << 40) | \ | 10 | (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ |
9 | (((limit) & 0x000f0000ULL) << (48-16)) | \ | 11 | (((limit) & _AC(0x000f0000,ULL)) << (48-16)) | \ |
10 | (((base) & 0x00ffffffULL) << 16) | \ | 12 | (((base) & _AC(0x00ffffff,ULL)) << 16) | \ |
11 | (((limit) & 0x0000ffffULL))) | 13 | (((limit) & _AC(0x0000ffff,ULL)))) |
12 | 14 | ||
13 | /* Simple and small GDT entries for booting only */ | 15 | /* Simple and small GDT entries for booting only */ |
14 | 16 | ||
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 52b5c7ed3608..d7e89c83645d 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -47,7 +47,7 @@ struct stacktrace_ops { | |||
47 | }; | 47 | }; |
48 | 48 | ||
49 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | 49 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, |
50 | unsigned long *stack, | 50 | unsigned long *stack, unsigned long bp, |
51 | const struct stacktrace_ops *ops, void *data); | 51 | const struct stacktrace_ops *ops, void *data); |
52 | 52 | ||
53 | #ifdef CONFIG_X86_32 | 53 | #ifdef CONFIG_X86_32 |
@@ -86,11 +86,11 @@ stack_frame(struct task_struct *task, struct pt_regs *regs) | |||
86 | 86 | ||
87 | extern void | 87 | extern void |
88 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 88 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
89 | unsigned long *stack, char *log_lvl); | 89 | unsigned long *stack, unsigned long bp, char *log_lvl); |
90 | 90 | ||
91 | extern void | 91 | extern void |
92 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 92 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
93 | unsigned long *sp, char *log_lvl); | 93 | unsigned long *sp, unsigned long bp, char *log_lvl); |
94 | 94 | ||
95 | extern unsigned int code_bytes; | 95 | extern unsigned int code_bytes; |
96 | 96 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f0b6e5dbc5a0..1f2e61e28981 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -161,8 +161,14 @@ struct thread_info { | |||
161 | 161 | ||
162 | #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 162 | #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR |
163 | 163 | ||
164 | #define alloc_thread_info(tsk) \ | 164 | #define alloc_thread_info_node(tsk, node) \ |
165 | ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) | 165 | ({ \ |
166 | struct page *page = alloc_pages_node(node, THREAD_FLAGS, \ | ||
167 | THREAD_ORDER); \ | ||
168 | struct thread_info *ret = page ? page_address(page) : NULL; \ | ||
169 | \ | ||
170 | ret; \ | ||
171 | }) | ||
166 | 172 | ||
167 | #ifdef CONFIG_X86_32 | 173 | #ifdef CONFIG_X86_32 |
168 | 174 | ||
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index f4500fb3b485..feca3118a73b 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h | |||
@@ -3,25 +3,36 @@ | |||
3 | 3 | ||
4 | #ifndef __ASSEMBLY__ | 4 | #ifndef __ASSEMBLY__ |
5 | 5 | ||
6 | #ifdef CONFIG_X86_TRAMPOLINE | 6 | #include <linux/types.h> |
7 | #include <asm/io.h> | ||
8 | |||
7 | /* | 9 | /* |
8 | * Trampoline 80x86 program as an array. | 10 | * Trampoline 80x86 program as an array. These are in the init rodata |
11 | * segment, but that's okay, because we only care about the relative | ||
12 | * addresses of the symbols. | ||
9 | */ | 13 | */ |
10 | extern const unsigned char trampoline_data []; | 14 | extern const unsigned char x86_trampoline_start []; |
11 | extern const unsigned char trampoline_end []; | 15 | extern const unsigned char x86_trampoline_end []; |
12 | extern unsigned char *trampoline_base; | 16 | extern unsigned char *x86_trampoline_base; |
13 | 17 | ||
14 | extern unsigned long init_rsp; | 18 | extern unsigned long init_rsp; |
15 | extern unsigned long initial_code; | 19 | extern unsigned long initial_code; |
16 | extern unsigned long initial_gs; | 20 | extern unsigned long initial_gs; |
17 | 21 | ||
18 | #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) | 22 | extern void __init setup_trampolines(void); |
23 | |||
24 | extern const unsigned char trampoline_data[]; | ||
25 | extern const unsigned char trampoline_status[]; | ||
26 | |||
27 | #define TRAMPOLINE_SYM(x) \ | ||
28 | ((void *)(x86_trampoline_base + \ | ||
29 | ((const unsigned char *)(x) - x86_trampoline_start))) | ||
19 | 30 | ||
20 | extern unsigned long setup_trampoline(void); | 31 | /* Address of the SMP trampoline */ |
21 | extern void __init reserve_trampoline_memory(void); | 32 | static inline unsigned long trampoline_address(void) |
22 | #else | 33 | { |
23 | static inline void reserve_trampoline_memory(void) {} | 34 | return virt_to_phys(TRAMPOLINE_SYM(trampoline_data)); |
24 | #endif /* CONFIG_X86_TRAMPOLINE */ | 35 | } |
25 | 36 | ||
26 | #endif /* __ASSEMBLY__ */ | 37 | #endif /* __ASSEMBLY__ */ |
27 | 38 | ||
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 1ca132fc0d03..83e2efd181e2 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -35,7 +35,7 @@ static inline cycles_t get_cycles(void) | |||
35 | static __always_inline cycles_t vget_cycles(void) | 35 | static __always_inline cycles_t vget_cycles(void) |
36 | { | 36 | { |
37 | /* | 37 | /* |
38 | * We only do VDSOs on TSC capable CPUs, so this shouldnt | 38 | * We only do VDSOs on TSC capable CPUs, so this shouldn't |
39 | * access boot_cpu_data (which is not VDSO-safe): | 39 | * access boot_cpu_data (which is not VDSO-safe): |
40 | */ | 40 | */ |
41 | #ifndef CONFIG_X86_TSC | 41 | #ifndef CONFIG_X86_TSC |
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h index df1da20f4534..8e8c23fef08c 100644 --- a/arch/x86/include/asm/types.h +++ b/arch/x86/include/asm/types.h | |||
@@ -1,22 +1,6 @@ | |||
1 | #ifndef _ASM_X86_TYPES_H | 1 | #ifndef _ASM_X86_TYPES_H |
2 | #define _ASM_X86_TYPES_H | 2 | #define _ASM_X86_TYPES_H |
3 | 3 | ||
4 | #define dma_addr_t dma_addr_t | ||
5 | |||
6 | #include <asm-generic/types.h> | 4 | #include <asm-generic/types.h> |
7 | 5 | ||
8 | #ifdef __KERNEL__ | ||
9 | #ifndef __ASSEMBLY__ | ||
10 | |||
11 | typedef u64 dma64_addr_t; | ||
12 | #if defined(CONFIG_X86_64) || defined(CONFIG_HIGHMEM64G) | ||
13 | /* DMA addresses come in 32-bit and 64-bit flavours. */ | ||
14 | typedef u64 dma_addr_t; | ||
15 | #else | ||
16 | typedef u32 dma_addr_t; | ||
17 | #endif | ||
18 | |||
19 | #endif /* __ASSEMBLY__ */ | ||
20 | #endif /* __KERNEL__ */ | ||
21 | |||
22 | #endif /* _ASM_X86_TYPES_H */ | 6 | #endif /* _ASM_X86_TYPES_H */ |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index ffaf183c619a..a755ef5e5977 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -349,10 +349,11 @@ | |||
349 | #define __NR_name_to_handle_at 341 | 349 | #define __NR_name_to_handle_at 341 |
350 | #define __NR_open_by_handle_at 342 | 350 | #define __NR_open_by_handle_at 342 |
351 | #define __NR_clock_adjtime 343 | 351 | #define __NR_clock_adjtime 343 |
352 | #define __NR_syncfs 344 | ||
352 | 353 | ||
353 | #ifdef __KERNEL__ | 354 | #ifdef __KERNEL__ |
354 | 355 | ||
355 | #define NR_syscalls 344 | 356 | #define NR_syscalls 345 |
356 | 357 | ||
357 | #define __ARCH_WANT_IPC_PARSE_VERSION | 358 | #define __ARCH_WANT_IPC_PARSE_VERSION |
358 | #define __ARCH_WANT_OLD_READDIR | 359 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 5466bea670e7..160fa76bd578 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -675,6 +675,8 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) | |||
675 | __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) | 675 | __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) |
676 | #define __NR_clock_adjtime 305 | 676 | #define __NR_clock_adjtime 305 |
677 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) | 677 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) |
678 | #define __NR_syncfs 306 | ||
679 | __SYSCALL(__NR_syncfs, sys_syncfs) | ||
678 | 680 | ||
679 | #ifndef __NO_STUBS | 681 | #ifndef __NO_STUBS |
680 | #define __ARCH_WANT_OLD_READDIR | 682 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 1c10c88ee4e1..5d4922ad4b9b 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h | |||
@@ -86,7 +86,7 @@ DEFINE_GUEST_HANDLE(void); | |||
86 | * The privilege level specifies which modes may enter a trap via a software | 86 | * The privilege level specifies which modes may enter a trap via a software |
87 | * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate | 87 | * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate |
88 | * privilege levels as follows: | 88 | * privilege levels as follows: |
89 | * Level == 0: Noone may enter | 89 | * Level == 0: No one may enter |
90 | * Level == 1: Kernel may enter | 90 | * Level == 1: Kernel may enter |
91 | * Level == 2: Kernel may enter | 91 | * Level == 2: Kernel may enter |
92 | * Level == 3: Everyone may enter | 92 | * Level == 3: Everyone may enter |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 62445ba2f8a8..7338ef2218bc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -41,13 +41,13 @@ obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | |||
41 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 41 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
42 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 42 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
43 | obj-y += bootflag.o e820.o | 43 | obj-y += bootflag.o e820.o |
44 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 44 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
45 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 45 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
46 | obj-y += tsc.o io_delay.o rtc.o | 46 | obj-y += tsc.o io_delay.o rtc.o |
47 | obj-y += pci-iommu_table.o | 47 | obj-y += pci-iommu_table.o |
48 | obj-y += resource.o | 48 | obj-y += resource.o |
49 | 49 | ||
50 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 50 | obj-y += trampoline.o trampoline_$(BITS).o |
51 | obj-y += process.o | 51 | obj-y += process.o |
52 | obj-y += i387.o xsave.o | 52 | obj-y += i387.o xsave.o |
53 | obj-y += ptrace.o | 53 | obj-y += ptrace.o |
@@ -55,10 +55,12 @@ obj-$(CONFIG_X86_32) += tls.o | |||
55 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 55 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
56 | obj-y += step.o | 56 | obj-y += step.o |
57 | obj-$(CONFIG_INTEL_TXT) += tboot.o | 57 | obj-$(CONFIG_INTEL_TXT) += tboot.o |
58 | obj-$(CONFIG_ISA_DMA_API) += i8237.o | ||
58 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 59 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
59 | obj-y += cpu/ | 60 | obj-y += cpu/ |
60 | obj-y += acpi/ | 61 | obj-y += acpi/ |
61 | obj-y += reboot.o | 62 | obj-y += reboot.o |
63 | obj-$(CONFIG_X86_32) += reboot_32.o | ||
62 | obj-$(CONFIG_MCA) += mca_32.o | 64 | obj-$(CONFIG_MCA) += mca_32.o |
63 | obj-$(CONFIG_X86_MSR) += msr.o | 65 | obj-$(CONFIG_X86_MSR) += msr.o |
64 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 66 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
@@ -69,7 +71,6 @@ obj-$(CONFIG_SMP) += smp.o | |||
69 | obj-$(CONFIG_SMP) += smpboot.o | 71 | obj-$(CONFIG_SMP) += smpboot.o |
70 | obj-$(CONFIG_SMP) += tsc_sync.o | 72 | obj-$(CONFIG_SMP) += tsc_sync.o |
71 | obj-$(CONFIG_SMP) += setup_percpu.o | 73 | obj-$(CONFIG_SMP) += setup_percpu.o |
72 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o | ||
73 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | 74 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o |
74 | obj-y += apic/ | 75 | obj-y += apic/ |
75 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 76 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 28595d6df47c..ead21b663117 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S | |||
@@ -6,11 +6,17 @@ | |||
6 | #include <asm/page_types.h> | 6 | #include <asm/page_types.h> |
7 | #include <asm/pgtable_types.h> | 7 | #include <asm/pgtable_types.h> |
8 | #include <asm/processor-flags.h> | 8 | #include <asm/processor-flags.h> |
9 | #include "wakeup.h" | ||
9 | 10 | ||
10 | .code16 | 11 | .code16 |
11 | .section ".header", "a" | 12 | .section ".jump", "ax" |
13 | .globl _start | ||
14 | _start: | ||
15 | cli | ||
16 | jmp wakeup_code | ||
12 | 17 | ||
13 | /* This should match the structure in wakeup.h */ | 18 | /* This should match the structure in wakeup.h */ |
19 | .section ".header", "a" | ||
14 | .globl wakeup_header | 20 | .globl wakeup_header |
15 | wakeup_header: | 21 | wakeup_header: |
16 | video_mode: .short 0 /* Video mode number */ | 22 | video_mode: .short 0 /* Video mode number */ |
@@ -30,14 +36,11 @@ wakeup_jmp: .byte 0xea /* ljmpw */ | |||
30 | wakeup_jmp_off: .word 3f | 36 | wakeup_jmp_off: .word 3f |
31 | wakeup_jmp_seg: .word 0 | 37 | wakeup_jmp_seg: .word 0 |
32 | wakeup_gdt: .quad 0, 0, 0 | 38 | wakeup_gdt: .quad 0, 0, 0 |
33 | signature: .long 0x51ee1111 | 39 | signature: .long WAKEUP_HEADER_SIGNATURE |
34 | 40 | ||
35 | .text | 41 | .text |
36 | .globl _start | ||
37 | .code16 | 42 | .code16 |
38 | wakeup_code: | 43 | wakeup_code: |
39 | _start: | ||
40 | cli | ||
41 | cld | 44 | cld |
42 | 45 | ||
43 | /* Apparently some dimwit BIOS programmers don't know how to | 46 | /* Apparently some dimwit BIOS programmers don't know how to |
@@ -77,12 +80,12 @@ _start: | |||
77 | 80 | ||
78 | /* Check header signature... */ | 81 | /* Check header signature... */ |
79 | movl signature, %eax | 82 | movl signature, %eax |
80 | cmpl $0x51ee1111, %eax | 83 | cmpl $WAKEUP_HEADER_SIGNATURE, %eax |
81 | jne bogus_real_magic | 84 | jne bogus_real_magic |
82 | 85 | ||
83 | /* Check we really have everything... */ | 86 | /* Check we really have everything... */ |
84 | movl end_signature, %eax | 87 | movl end_signature, %eax |
85 | cmpl $0x65a22c82, %eax | 88 | cmpl $WAKEUP_END_SIGNATURE, %eax |
86 | jne bogus_real_magic | 89 | jne bogus_real_magic |
87 | 90 | ||
88 | /* Call the C code */ | 91 | /* Call the C code */ |
@@ -147,3 +150,7 @@ wakeup_heap: | |||
147 | wakeup_stack: | 150 | wakeup_stack: |
148 | .space 2048 | 151 | .space 2048 |
149 | wakeup_stack_end: | 152 | wakeup_stack_end: |
153 | |||
154 | .section ".signature","a" | ||
155 | end_signature: | ||
156 | .long WAKEUP_END_SIGNATURE | ||
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h index 69d38d0b2b64..e1828c07e79c 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ b/arch/x86/kernel/acpi/realmode/wakeup.h | |||
@@ -35,7 +35,8 @@ struct wakeup_header { | |||
35 | extern struct wakeup_header wakeup_header; | 35 | extern struct wakeup_header wakeup_header; |
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | #define HEADER_OFFSET 0x3f00 | 38 | #define WAKEUP_HEADER_OFFSET 8 |
39 | #define WAKEUP_SIZE 0x4000 | 39 | #define WAKEUP_HEADER_SIGNATURE 0x51ee1111 |
40 | #define WAKEUP_END_SIGNATURE 0x65a22c82 | ||
40 | 41 | ||
41 | #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ | 42 | #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 060fff8f5c5b..d4f8010a5b1b 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S | |||
@@ -13,9 +13,19 @@ ENTRY(_start) | |||
13 | SECTIONS | 13 | SECTIONS |
14 | { | 14 | { |
15 | . = 0; | 15 | . = 0; |
16 | .jump : { | ||
17 | *(.jump) | ||
18 | } = 0x90909090 | ||
19 | |||
20 | . = WAKEUP_HEADER_OFFSET; | ||
21 | .header : { | ||
22 | *(.header) | ||
23 | } | ||
24 | |||
25 | . = ALIGN(16); | ||
16 | .text : { | 26 | .text : { |
17 | *(.text*) | 27 | *(.text*) |
18 | } | 28 | } = 0x90909090 |
19 | 29 | ||
20 | . = ALIGN(16); | 30 | . = ALIGN(16); |
21 | .rodata : { | 31 | .rodata : { |
@@ -33,11 +43,6 @@ SECTIONS | |||
33 | *(.data*) | 43 | *(.data*) |
34 | } | 44 | } |
35 | 45 | ||
36 | .signature : { | ||
37 | end_signature = .; | ||
38 | LONG(0x65a22c82) | ||
39 | } | ||
40 | |||
41 | . = ALIGN(16); | 46 | . = ALIGN(16); |
42 | .bss : { | 47 | .bss : { |
43 | __bss_start = .; | 48 | __bss_start = .; |
@@ -45,20 +50,13 @@ SECTIONS | |||
45 | __bss_end = .; | 50 | __bss_end = .; |
46 | } | 51 | } |
47 | 52 | ||
48 | . = HEADER_OFFSET; | 53 | .signature : { |
49 | .header : { | 54 | *(.signature) |
50 | *(.header) | ||
51 | } | 55 | } |
52 | 56 | ||
53 | . = ALIGN(16); | ||
54 | _end = .; | 57 | _end = .; |
55 | 58 | ||
56 | /DISCARD/ : { | 59 | /DISCARD/ : { |
57 | *(.note*) | 60 | *(.note*) |
58 | } | 61 | } |
59 | |||
60 | /* | ||
61 | * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: | ||
62 | */ | ||
63 | . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); | ||
64 | } | 62 | } |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 68d1537b8c81..ff93bc1b09c3 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -18,37 +18,28 @@ | |||
18 | #include "realmode/wakeup.h" | 18 | #include "realmode/wakeup.h" |
19 | #include "sleep.h" | 19 | #include "sleep.h" |
20 | 20 | ||
21 | unsigned long acpi_wakeup_address; | ||
22 | unsigned long acpi_realmode_flags; | 21 | unsigned long acpi_realmode_flags; |
23 | 22 | ||
24 | /* address in low memory of the wakeup routine. */ | ||
25 | static unsigned long acpi_realmode; | ||
26 | |||
27 | #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) | 23 | #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) |
28 | static char temp_stack[4096]; | 24 | static char temp_stack[4096]; |
29 | #endif | 25 | #endif |
30 | 26 | ||
31 | /** | 27 | /** |
32 | * acpi_save_state_mem - save kernel state | 28 | * acpi_suspend_lowlevel - save kernel state |
33 | * | 29 | * |
34 | * Create an identity mapped page table and copy the wakeup routine to | 30 | * Create an identity mapped page table and copy the wakeup routine to |
35 | * low memory. | 31 | * low memory. |
36 | * | ||
37 | * Note that this is too late to change acpi_wakeup_address. | ||
38 | */ | 32 | */ |
39 | int acpi_save_state_mem(void) | 33 | int acpi_suspend_lowlevel(void) |
40 | { | 34 | { |
41 | struct wakeup_header *header; | 35 | struct wakeup_header *header; |
36 | /* address in low memory of the wakeup routine. */ | ||
37 | char *acpi_realmode; | ||
42 | 38 | ||
43 | if (!acpi_realmode) { | 39 | acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code); |
44 | printk(KERN_ERR "Could not allocate memory during boot, " | ||
45 | "S3 disabled\n"); | ||
46 | return -ENOMEM; | ||
47 | } | ||
48 | memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE); | ||
49 | 40 | ||
50 | header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET); | 41 | header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET); |
51 | if (header->signature != 0x51ee1111) { | 42 | if (header->signature != WAKEUP_HEADER_SIGNATURE) { |
52 | printk(KERN_ERR "wakeup header does not match\n"); | 43 | printk(KERN_ERR "wakeup header does not match\n"); |
53 | return -EINVAL; | 44 | return -EINVAL; |
54 | } | 45 | } |
@@ -68,9 +59,7 @@ int acpi_save_state_mem(void) | |||
68 | /* GDT[0]: GDT self-pointer */ | 59 | /* GDT[0]: GDT self-pointer */ |
69 | header->wakeup_gdt[0] = | 60 | header->wakeup_gdt[0] = |
70 | (u64)(sizeof(header->wakeup_gdt) - 1) + | 61 | (u64)(sizeof(header->wakeup_gdt) - 1) + |
71 | ((u64)(acpi_wakeup_address + | 62 | ((u64)__pa(&header->wakeup_gdt) << 16); |
72 | ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) | ||
73 | << 16); | ||
74 | /* GDT[1]: big real mode-like code segment */ | 63 | /* GDT[1]: big real mode-like code segment */ |
75 | header->wakeup_gdt[1] = | 64 | header->wakeup_gdt[1] = |
76 | GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff); | 65 | GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff); |
@@ -96,7 +85,7 @@ int acpi_save_state_mem(void) | |||
96 | header->pmode_cr3 = (u32)__pa(&initial_page_table); | 85 | header->pmode_cr3 = (u32)__pa(&initial_page_table); |
97 | saved_magic = 0x12345678; | 86 | saved_magic = 0x12345678; |
98 | #else /* CONFIG_64BIT */ | 87 | #else /* CONFIG_64BIT */ |
99 | header->trampoline_segment = setup_trampoline() >> 4; | 88 | header->trampoline_segment = trampoline_address() >> 4; |
100 | #ifdef CONFIG_SMP | 89 | #ifdef CONFIG_SMP |
101 | stack_start = (unsigned long)temp_stack + sizeof(temp_stack); | 90 | stack_start = (unsigned long)temp_stack + sizeof(temp_stack); |
102 | early_gdt_descr.address = | 91 | early_gdt_descr.address = |
@@ -107,56 +96,10 @@ int acpi_save_state_mem(void) | |||
107 | saved_magic = 0x123456789abcdef0L; | 96 | saved_magic = 0x123456789abcdef0L; |
108 | #endif /* CONFIG_64BIT */ | 97 | #endif /* CONFIG_64BIT */ |
109 | 98 | ||
99 | do_suspend_lowlevel(); | ||
110 | return 0; | 100 | return 0; |
111 | } | 101 | } |
112 | 102 | ||
113 | /* | ||
114 | * acpi_restore_state - undo effects of acpi_save_state_mem | ||
115 | */ | ||
116 | void acpi_restore_state_mem(void) | ||
117 | { | ||
118 | } | ||
119 | |||
120 | |||
121 | /** | ||
122 | * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation | ||
123 | * | ||
124 | * We allocate a page from the first 1MB of memory for the wakeup | ||
125 | * routine for when we come back from a sleep state. The | ||
126 | * runtime allocator allows specification of <16MB pages, but not | ||
127 | * <1MB pages. | ||
128 | */ | ||
129 | void __init acpi_reserve_wakeup_memory(void) | ||
130 | { | ||
131 | phys_addr_t mem; | ||
132 | |||
133 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { | ||
134 | printk(KERN_ERR | ||
135 | "ACPI: Wakeup code way too big, S3 disabled.\n"); | ||
136 | return; | ||
137 | } | ||
138 | |||
139 | mem = memblock_find_in_range(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); | ||
140 | |||
141 | if (mem == MEMBLOCK_ERROR) { | ||
142 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | ||
143 | return; | ||
144 | } | ||
145 | acpi_realmode = (unsigned long) phys_to_virt(mem); | ||
146 | acpi_wakeup_address = mem; | ||
147 | memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); | ||
148 | } | ||
149 | |||
150 | int __init acpi_configure_wakeup_memory(void) | ||
151 | { | ||
152 | if (acpi_realmode) | ||
153 | set_memory_x(acpi_realmode, WAKEUP_SIZE >> PAGE_SHIFT); | ||
154 | |||
155 | return 0; | ||
156 | } | ||
157 | arch_initcall(acpi_configure_wakeup_memory); | ||
158 | |||
159 | |||
160 | static int __init acpi_sleep_setup(char *str) | 103 | static int __init acpi_sleep_setup(char *str) |
161 | { | 104 | { |
162 | while ((str != NULL) && (*str != '\0')) { | 105 | while ((str != NULL) && (*str != '\0')) { |
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index adbcbaa6f1df..416d4be13fef 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h | |||
@@ -4,13 +4,12 @@ | |||
4 | 4 | ||
5 | #include <asm/trampoline.h> | 5 | #include <asm/trampoline.h> |
6 | 6 | ||
7 | extern char wakeup_code_start, wakeup_code_end; | ||
8 | |||
9 | extern unsigned long saved_video_mode; | 7 | extern unsigned long saved_video_mode; |
10 | extern long saved_magic; | 8 | extern long saved_magic; |
11 | 9 | ||
12 | extern int wakeup_pmode_return; | 10 | extern int wakeup_pmode_return; |
13 | extern char swsusp_pg_dir[PAGE_SIZE]; | ||
14 | 11 | ||
15 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); | 12 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); |
16 | extern void wakeup_long64(void); | 13 | extern void wakeup_long64(void); |
14 | |||
15 | extern void do_suspend_lowlevel(void); | ||
diff --git a/arch/x86/kernel/acpi/wakeup_rm.S b/arch/x86/kernel/acpi/wakeup_rm.S index 6ff3b5730575..63b8ab524f2c 100644 --- a/arch/x86/kernel/acpi/wakeup_rm.S +++ b/arch/x86/kernel/acpi/wakeup_rm.S | |||
@@ -2,9 +2,11 @@ | |||
2 | * Wrapper script for the realmode binary as a transport object | 2 | * Wrapper script for the realmode binary as a transport object |
3 | * before copying to low memory. | 3 | * before copying to low memory. |
4 | */ | 4 | */ |
5 | .section ".rodata","a" | 5 | #include <asm/page_types.h> |
6 | .globl wakeup_code_start, wakeup_code_end | 6 | |
7 | wakeup_code_start: | 7 | .section ".x86_trampoline","a" |
8 | .balign PAGE_SIZE | ||
9 | .globl acpi_wakeup_code | ||
10 | acpi_wakeup_code: | ||
8 | .incbin "arch/x86/kernel/acpi/realmode/wakeup.bin" | 11 | .incbin "arch/x86/kernel/acpi/realmode/wakeup.bin" |
9 | wakeup_code_end: | 12 | .size acpi_wakeup_code, .-acpi_wakeup_code |
10 | .size wakeup_code_start, .-wakeup_code_start | ||
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 7038b95d363f..4a234677e213 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -199,7 +199,7 @@ void *text_poke_early(void *addr, const void *opcode, size_t len); | |||
199 | 199 | ||
200 | /* Replace instructions with better alternatives for this CPU type. | 200 | /* Replace instructions with better alternatives for this CPU type. |
201 | This runs before SMP is initialized to avoid SMP problems with | 201 | This runs before SMP is initialized to avoid SMP problems with |
202 | self modifying code. This implies that assymetric systems where | 202 | self modifying code. This implies that asymmetric systems where |
203 | APs have less capabilities than the boot processor are not handled. | 203 | APs have less capabilities than the boot processor are not handled. |
204 | Tough. Make sure you disable such features by hand. */ | 204 | Tough. Make sure you disable such features by hand. */ |
205 | 205 | ||
@@ -620,7 +620,12 @@ static int __kprobes stop_machine_text_poke(void *data) | |||
620 | flush_icache_range((unsigned long)p->addr, | 620 | flush_icache_range((unsigned long)p->addr, |
621 | (unsigned long)p->addr + p->len); | 621 | (unsigned long)p->addr + p->len); |
622 | } | 622 | } |
623 | 623 | /* | |
624 | * Intel Archiecture Software Developer's Manual section 7.1.3 specifies | ||
625 | * that a core serializing instruction such as "cpuid" should be | ||
626 | * executed on _each_ core before the new instruction is made visible. | ||
627 | */ | ||
628 | sync_core(); | ||
624 | return 0; | 629 | return 0; |
625 | } | 630 | } |
626 | 631 | ||
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index ed3c2e5b714a..6801959a8b2a 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -15,7 +15,7 @@ static u32 *flush_words; | |||
15 | const struct pci_device_id amd_nb_misc_ids[] = { | 15 | const struct pci_device_id amd_nb_misc_ids[] = { |
16 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, | 16 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
17 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 17 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
18 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, | 18 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, |
19 | {} | 19 | {} |
20 | }; | 20 | }; |
21 | EXPORT_SYMBOL(amd_nb_misc_ids); | 21 | EXPORT_SYMBOL(amd_nb_misc_ids); |
@@ -48,7 +48,7 @@ static struct pci_dev *next_northbridge(struct pci_dev *dev, | |||
48 | 48 | ||
49 | int amd_cache_northbridges(void) | 49 | int amd_cache_northbridges(void) |
50 | { | 50 | { |
51 | int i = 0; | 51 | u16 i = 0; |
52 | struct amd_northbridge *nb; | 52 | struct amd_northbridge *nb; |
53 | struct pci_dev *misc, *link; | 53 | struct pci_dev *misc, *link; |
54 | 54 | ||
@@ -103,9 +103,11 @@ int amd_cache_northbridges(void) | |||
103 | } | 103 | } |
104 | EXPORT_SYMBOL_GPL(amd_cache_northbridges); | 104 | EXPORT_SYMBOL_GPL(amd_cache_northbridges); |
105 | 105 | ||
106 | /* Ignores subdevice/subvendor but as far as I can figure out | 106 | /* |
107 | they're useless anyways */ | 107 | * Ignores subdevice/subvendor but as far as I can figure out |
108 | int __init early_is_amd_nb(u32 device) | 108 | * they're useless anyways |
109 | */ | ||
110 | bool __init early_is_amd_nb(u32 device) | ||
109 | { | 111 | { |
110 | const struct pci_device_id *id; | 112 | const struct pci_device_id *id; |
111 | u32 vendor = device & 0xffff; | 113 | u32 vendor = device & 0xffff; |
@@ -113,8 +115,8 @@ int __init early_is_amd_nb(u32 device) | |||
113 | device >>= 16; | 115 | device >>= 16; |
114 | for (id = amd_nb_misc_ids; id->vendor; id++) | 116 | for (id = amd_nb_misc_ids; id->vendor; id++) |
115 | if (vendor == id->vendor && device == id->device) | 117 | if (vendor == id->vendor && device == id->device) |
116 | return 1; | 118 | return true; |
117 | return 0; | 119 | return false; |
118 | } | 120 | } |
119 | 121 | ||
120 | int amd_get_subcaches(int cpu) | 122 | int amd_get_subcaches(int cpu) |
@@ -176,9 +178,9 @@ int amd_set_subcaches(int cpu, int mask) | |||
176 | return 0; | 178 | return 0; |
177 | } | 179 | } |
178 | 180 | ||
179 | int amd_cache_gart(void) | 181 | static int amd_cache_gart(void) |
180 | { | 182 | { |
181 | int i; | 183 | u16 i; |
182 | 184 | ||
183 | if (!amd_nb_has_feature(AMD_NB_GART)) | 185 | if (!amd_nb_has_feature(AMD_NB_GART)) |
184 | return 0; | 186 | return 0; |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 7b1e8e10b89c..86d1ad4962a7 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -73,7 +73,7 @@ static u32 __init allocate_aperture(void) | |||
73 | /* | 73 | /* |
74 | * using 512M as goal, in case kexec will load kernel_big | 74 | * using 512M as goal, in case kexec will load kernel_big |
75 | * that will do the on position decompress, and could overlap with | 75 | * that will do the on position decompress, and could overlap with |
76 | * that positon with gart that is used. | 76 | * that position with gart that is used. |
77 | * sequende: | 77 | * sequende: |
78 | * kernel_small | 78 | * kernel_small |
79 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) | 79 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4b5ebd26f565..180ca240e03c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -1886,7 +1886,7 @@ void disable_IO_APIC(void) | |||
1886 | * | 1886 | * |
1887 | * With interrupt-remapping, for now we will use virtual wire A mode, | 1887 | * With interrupt-remapping, for now we will use virtual wire A mode, |
1888 | * as virtual wire B is little complex (need to configure both | 1888 | * as virtual wire B is little complex (need to configure both |
1889 | * IOAPIC RTE aswell as interrupt-remapping table entry). | 1889 | * IOAPIC RTE as well as interrupt-remapping table entry). |
1890 | * As this gets called during crash dump, keep this simple for now. | 1890 | * As this gets called during crash dump, keep this simple for now. |
1891 | */ | 1891 | */ |
1892 | if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { | 1892 | if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { |
@@ -2905,7 +2905,7 @@ void __init setup_IO_APIC(void) | |||
2905 | } | 2905 | } |
2906 | 2906 | ||
2907 | /* | 2907 | /* |
2908 | * Called after all the initialization is done. If we didnt find any | 2908 | * Called after all the initialization is done. If we didn't find any |
2909 | * APIC bugs then we can allow the modify fast path | 2909 | * APIC bugs then we can allow the modify fast path |
2910 | */ | 2910 | */ |
2911 | 2911 | ||
@@ -3983,7 +3983,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi) | |||
3983 | static __init int bad_ioapic(unsigned long address) | 3983 | static __init int bad_ioapic(unsigned long address) |
3984 | { | 3984 | { |
3985 | if (nr_ioapics >= MAX_IO_APICS) { | 3985 | if (nr_ioapics >= MAX_IO_APICS) { |
3986 | printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " | 3986 | printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded " |
3987 | "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); | 3987 | "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); |
3988 | return 1; | 3988 | return 1; |
3989 | } | 3989 | } |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 0e4f24c2a746..0b4be431c620 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -66,7 +66,7 @@ | |||
66 | * 1.5: Fix segment register reloading (in case of bad segments saved | 66 | * 1.5: Fix segment register reloading (in case of bad segments saved |
67 | * across BIOS call). | 67 | * across BIOS call). |
68 | * Stephen Rothwell | 68 | * Stephen Rothwell |
69 | * 1.6: Cope with complier/assembler differences. | 69 | * 1.6: Cope with compiler/assembler differences. |
70 | * Only try to turn off the first display device. | 70 | * Only try to turn off the first display device. |
71 | * Fix OOPS at power off with no APM BIOS by Jan Echternach | 71 | * Fix OOPS at power off with no APM BIOS by Jan Echternach |
72 | * <echter@informatik.uni-rostock.de> | 72 | * <echter@informatik.uni-rostock.de> |
@@ -227,6 +227,7 @@ | |||
227 | #include <linux/suspend.h> | 227 | #include <linux/suspend.h> |
228 | #include <linux/kthread.h> | 228 | #include <linux/kthread.h> |
229 | #include <linux/jiffies.h> | 229 | #include <linux/jiffies.h> |
230 | #include <linux/acpi.h> | ||
230 | 231 | ||
231 | #include <asm/system.h> | 232 | #include <asm/system.h> |
232 | #include <asm/uaccess.h> | 233 | #include <asm/uaccess.h> |
@@ -975,20 +976,10 @@ recalc: | |||
975 | 976 | ||
976 | static void apm_power_off(void) | 977 | static void apm_power_off(void) |
977 | { | 978 | { |
978 | unsigned char po_bios_call[] = { | ||
979 | 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ | ||
980 | 0x8e, 0xd0, /* movw ax,ss */ | ||
981 | 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ | ||
982 | 0xb8, 0x07, 0x53, /* movw $0x5307,ax */ | ||
983 | 0xbb, 0x01, 0x00, /* movw $0x0001,bx */ | ||
984 | 0xb9, 0x03, 0x00, /* movw $0x0003,cx */ | ||
985 | 0xcd, 0x15 /* int $0x15 */ | ||
986 | }; | ||
987 | |||
988 | /* Some bioses don't like being called from CPU != 0 */ | 979 | /* Some bioses don't like being called from CPU != 0 */ |
989 | if (apm_info.realmode_power_off) { | 980 | if (apm_info.realmode_power_off) { |
990 | set_cpus_allowed_ptr(current, cpumask_of(0)); | 981 | set_cpus_allowed_ptr(current, cpumask_of(0)); |
991 | machine_real_restart(po_bios_call, sizeof(po_bios_call)); | 982 | machine_real_restart(MRR_APM); |
992 | } else { | 983 | } else { |
993 | (void)set_system_power_state(APM_STATE_OFF); | 984 | (void)set_system_power_state(APM_STATE_OFF); |
994 | } | 985 | } |
@@ -2331,12 +2322,11 @@ static int __init apm_init(void) | |||
2331 | apm_info.disabled = 1; | 2322 | apm_info.disabled = 1; |
2332 | return -ENODEV; | 2323 | return -ENODEV; |
2333 | } | 2324 | } |
2334 | if (pm_flags & PM_ACPI) { | 2325 | if (!acpi_disabled) { |
2335 | printk(KERN_NOTICE "apm: overridden by ACPI.\n"); | 2326 | printk(KERN_NOTICE "apm: overridden by ACPI.\n"); |
2336 | apm_info.disabled = 1; | 2327 | apm_info.disabled = 1; |
2337 | return -ENODEV; | 2328 | return -ENODEV; |
2338 | } | 2329 | } |
2339 | pm_flags |= PM_APM; | ||
2340 | 2330 | ||
2341 | /* | 2331 | /* |
2342 | * Set up the long jump entry point to the APM BIOS, which is called | 2332 | * Set up the long jump entry point to the APM BIOS, which is called |
@@ -2428,7 +2418,6 @@ static void __exit apm_exit(void) | |||
2428 | kthread_stop(kapmd_task); | 2418 | kthread_stop(kapmd_task); |
2429 | kapmd_task = NULL; | 2419 | kapmd_task = NULL; |
2430 | } | 2420 | } |
2431 | pm_flags &= ~PM_APM; | ||
2432 | } | 2421 | } |
2433 | 2422 | ||
2434 | module_init(apm_init); | 2423 | module_init(apm_init); |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f771ab6b49e9..3ecece0217ef 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -611,6 +611,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
611 | } | 611 | } |
612 | } | 612 | } |
613 | #endif | 613 | #endif |
614 | |||
615 | /* As a rule processors have APIC timer running in deep C states */ | ||
616 | if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) | ||
617 | set_cpu_cap(c, X86_FEATURE_ARAT); | ||
614 | } | 618 | } |
615 | 619 | ||
616 | #ifdef CONFIG_X86_32 | 620 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 03162dac6271..cf48cdd6907d 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -444,7 +444,7 @@ static int __cpuinit longhaul_get_ranges(void) | |||
444 | return -EINVAL; | 444 | return -EINVAL; |
445 | } | 445 | } |
446 | /* Get max multiplier - as we always did. | 446 | /* Get max multiplier - as we always did. |
447 | * Longhaul MSR is usefull only when voltage scaling is enabled. | 447 | * Longhaul MSR is useful only when voltage scaling is enabled. |
448 | * C3 is booting at max anyway. */ | 448 | * C3 is booting at max anyway. */ |
449 | maxmult = mult; | 449 | maxmult = mult; |
450 | /* Get min multiplier */ | 450 | /* Get min multiplier */ |
@@ -1011,7 +1011,7 @@ static void __exit longhaul_exit(void) | |||
1011 | * trigger frequency transition in some cases. */ | 1011 | * trigger frequency transition in some cases. */ |
1012 | module_param(disable_acpi_c3, int, 0644); | 1012 | module_param(disable_acpi_c3, int, 0644); |
1013 | MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); | 1013 | MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); |
1014 | /* Change CPU voltage with frequency. Very usefull to save | 1014 | /* Change CPU voltage with frequency. Very useful to save |
1015 | * power, but most VIA C3 processors aren't supporting it. */ | 1015 | * power, but most VIA C3 processors aren't supporting it. */ |
1016 | module_param(scale_voltage, int, 0644); | 1016 | module_param(scale_voltage, int, 0644); |
1017 | MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); | 1017 | MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); |
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index 4a5a42b842ad..755a31e0f5b0 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | |||
@@ -315,8 +315,6 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle) | |||
315 | 315 | ||
316 | input.count = 4; | 316 | input.count = 4; |
317 | input.pointer = in_params; | 317 | input.pointer = in_params; |
318 | input.count = 4; | ||
319 | input.pointer = in_params; | ||
320 | in_params[0].type = ACPI_TYPE_BUFFER; | 318 | in_params[0].type = ACPI_TYPE_BUFFER; |
321 | in_params[0].buffer.length = 16; | 319 | in_params[0].buffer.length = 16; |
322 | in_params[0].buffer.pointer = OSC_UUID; | 320 | in_params[0].buffer.pointer = OSC_UUID; |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c567dec854f6..2368e38327b3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -630,8 +630,7 @@ static void print_basics(struct powernow_k8_data *data) | |||
630 | data->powernow_table[j].frequency/1000); | 630 | data->powernow_table[j].frequency/1000); |
631 | } else { | 631 | } else { |
632 | printk(KERN_INFO PFX | 632 | printk(KERN_INFO PFX |
633 | " %d : fid 0x%x (%d MHz), vid 0x%x\n", | 633 | "fid 0x%x (%d MHz), vid 0x%x\n", |
634 | j, | ||
635 | data->powernow_table[j].index & 0xff, | 634 | data->powernow_table[j].index & 0xff, |
636 | data->powernow_table[j].frequency/1000, | 635 | data->powernow_table[j].frequency/1000, |
637 | data->powernow_table[j].index >> 8); | 636 | data->powernow_table[j].index >> 8); |
@@ -1276,7 +1275,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1276 | 1275 | ||
1277 | if (powernow_k8_cpu_init_acpi(data)) { | 1276 | if (powernow_k8_cpu_init_acpi(data)) { |
1278 | /* | 1277 | /* |
1279 | * Use the PSB BIOS structure. This is only availabe on | 1278 | * Use the PSB BIOS structure. This is only available on |
1280 | * an UP version, and is deprecated by AMD. | 1279 | * an UP version, and is deprecated by AMD. |
1281 | */ | 1280 | */ |
1282 | if (num_online_cpus() != 1) { | 1281 | if (num_online_cpus() != 1) { |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index 8abd869baabf..91bc25b67bc1 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | |||
@@ -292,7 +292,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) | |||
292 | 292 | ||
293 | result = speedstep_smi_ownership(); | 293 | result = speedstep_smi_ownership(); |
294 | if (result) { | 294 | if (result) { |
295 | dprintk("fails in aquiring ownership of a SMI interface.\n"); | 295 | dprintk("fails in acquiring ownership of a SMI interface.\n"); |
296 | return -EINVAL; | 296 | return -EINVAL; |
297 | } | 297 | } |
298 | 298 | ||
@@ -360,7 +360,7 @@ static int speedstep_resume(struct cpufreq_policy *policy) | |||
360 | int result = speedstep_smi_ownership(); | 360 | int result = speedstep_smi_ownership(); |
361 | 361 | ||
362 | if (result) | 362 | if (result) |
363 | dprintk("fails in re-aquiring ownership of a SMI interface.\n"); | 363 | dprintk("fails in re-acquiring ownership of a SMI interface.\n"); |
364 | 364 | ||
365 | return result; | 365 | return result; |
366 | } | 366 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 8209472b27a5..83930deec3c6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -106,24 +106,34 @@ int apei_write_mce(struct mce *m) | |||
106 | ssize_t apei_read_mce(struct mce *m, u64 *record_id) | 106 | ssize_t apei_read_mce(struct mce *m, u64 *record_id) |
107 | { | 107 | { |
108 | struct cper_mce_record rcd; | 108 | struct cper_mce_record rcd; |
109 | ssize_t len; | 109 | int rc, pos; |
110 | 110 | ||
111 | len = erst_read_next(&rcd.hdr, sizeof(rcd)); | 111 | rc = erst_get_record_id_begin(&pos); |
112 | if (len <= 0) | 112 | if (rc) |
113 | return len; | 113 | return rc; |
114 | /* Can not skip other records in storage via ERST unless clear them */ | 114 | retry: |
115 | else if (len != sizeof(rcd) || | 115 | rc = erst_get_record_id_next(&pos, record_id); |
116 | uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { | 116 | if (rc) |
117 | if (printk_ratelimit()) | 117 | goto out; |
118 | pr_warning( | 118 | /* no more record */ |
119 | "MCE-APEI: Can not skip the unknown record in ERST"); | 119 | if (*record_id == APEI_ERST_INVALID_RECORD_ID) |
120 | return -EIO; | 120 | goto out; |
121 | } | 121 | rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd)); |
122 | 122 | /* someone else has cleared the record, try next one */ | |
123 | if (rc == -ENOENT) | ||
124 | goto retry; | ||
125 | else if (rc < 0) | ||
126 | goto out; | ||
127 | /* try to skip other type records in storage */ | ||
128 | else if (rc != sizeof(rcd) || | ||
129 | uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) | ||
130 | goto retry; | ||
123 | memcpy(m, &rcd.mce, sizeof(*m)); | 131 | memcpy(m, &rcd.mce, sizeof(*m)); |
124 | *record_id = rcd.hdr.record_id; | 132 | rc = sizeof(*m); |
133 | out: | ||
134 | erst_get_record_id_end(); | ||
125 | 135 | ||
126 | return sizeof(*m); | 136 | return rc; |
127 | } | 137 | } |
128 | 138 | ||
129 | /* Check whether there is record in ERST */ | 139 | /* Check whether there is record in ERST */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index a77971979564..0ed633c5048b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -32,7 +32,7 @@ static void inject_mce(struct mce *m) | |||
32 | { | 32 | { |
33 | struct mce *i = &per_cpu(injectm, m->extcpu); | 33 | struct mce *i = &per_cpu(injectm, m->extcpu); |
34 | 34 | ||
35 | /* Make sure noone reads partially written injectm */ | 35 | /* Make sure no one reads partially written injectm */ |
36 | i->finished = 0; | 36 | i->finished = 0; |
37 | mb(); | 37 | mb(); |
38 | m->finished = 0; | 38 | m->finished = 0; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d916183b7f9c..ab1122998dba 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -881,7 +881,7 @@ reset: | |||
881 | * Check if the address reported by the CPU is in a format we can parse. | 881 | * Check if the address reported by the CPU is in a format we can parse. |
882 | * It would be possible to add code for most other cases, but all would | 882 | * It would be possible to add code for most other cases, but all would |
883 | * be somewhat complicated (e.g. segment offset would require an instruction | 883 | * be somewhat complicated (e.g. segment offset would require an instruction |
884 | * parser). So only support physical addresses upto page granuality for now. | 884 | * parser). So only support physical addresses up to page granuality for now. |
885 | */ | 885 | */ |
886 | static int mce_usable_address(struct mce *m) | 886 | static int mce_usable_address(struct mce *m) |
887 | { | 887 | { |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 9f27228ceffd..a71efcdbb092 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong | 2 | * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong |
3 | * because MTRRs can span upto 40 bits (36bits on most modern x86) | 3 | * because MTRRs can span up to 40 bits (36bits on most modern x86) |
4 | */ | 4 | */ |
5 | #define DEBUG | 5 | #define DEBUG |
6 | 6 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index eb00677ee2ae..eed3673a8656 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1114,7 +1114,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) | |||
1114 | 1114 | ||
1115 | /* | 1115 | /* |
1116 | * If group events scheduling transaction was started, | 1116 | * If group events scheduling transaction was started, |
1117 | * skip the schedulability test here, it will be peformed | 1117 | * skip the schedulability test here, it will be performed |
1118 | * at commit time (->commit_txn) as a whole | 1118 | * at commit time (->commit_txn) as a whole |
1119 | */ | 1119 | */ |
1120 | if (cpuc->group_flag & PERF_EVENT_TXN) | 1120 | if (cpuc->group_flag & PERF_EVENT_TXN) |
@@ -1795,7 +1795,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1795 | 1795 | ||
1796 | perf_callchain_store(entry, regs->ip); | 1796 | perf_callchain_store(entry, regs->ip); |
1797 | 1797 | ||
1798 | dump_trace(NULL, regs, NULL, &backtrace_ops, entry); | 1798 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); |
1799 | } | 1799 | } |
1800 | 1800 | ||
1801 | #ifdef CONFIG_COMPAT | 1801 | #ifdef CONFIG_COMPAT |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index d3d7b59841e5..c2520e178d32 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Netburst Perfomance Events (P4, old Xeon) | 2 | * Netburst Performance Events (P4, old Xeon) |
3 | * | 3 | * |
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | 4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> |
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | 5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> |
@@ -679,7 +679,7 @@ static int p4_validate_raw_event(struct perf_event *event) | |||
679 | */ | 679 | */ |
680 | 680 | ||
681 | /* | 681 | /* |
682 | * if an event is shared accross the logical threads | 682 | * if an event is shared across the logical threads |
683 | * the user needs special permissions to be able to use it | 683 | * the user needs special permissions to be able to use it |
684 | */ | 684 | */ |
685 | if (p4_ht_active() && p4_event_bind_map[v].shared) { | 685 | if (p4_ht_active() && p4_event_bind_map[v].shared) { |
@@ -791,13 +791,13 @@ static void p4_pmu_disable_pebs(void) | |||
791 | * | 791 | * |
792 | * It's still allowed that two threads setup same cache | 792 | * It's still allowed that two threads setup same cache |
793 | * events so we can't simply clear metrics until we knew | 793 | * events so we can't simply clear metrics until we knew |
794 | * noone is depending on us, so we need kind of counter | 794 | * no one is depending on us, so we need kind of counter |
795 | * for "ReplayEvent" users. | 795 | * for "ReplayEvent" users. |
796 | * | 796 | * |
797 | * What is more complex -- RAW events, if user (for some | 797 | * What is more complex -- RAW events, if user (for some |
798 | * reason) will pass some cache event metric with improper | 798 | * reason) will pass some cache event metric with improper |
799 | * event opcode -- it's fine from hardware point of view | 799 | * event opcode -- it's fine from hardware point of view |
800 | * but completely nonsence from "meaning" of such action. | 800 | * but completely nonsense from "meaning" of such action. |
801 | * | 801 | * |
802 | * So at moment let leave metrics turned on forever -- it's | 802 | * So at moment let leave metrics turned on forever -- it's |
803 | * ok for now but need to be revisited! | 803 | * ok for now but need to be revisited! |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 227b0448960d..d22d0c4edcfd 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -86,7 +86,7 @@ static void __init vmware_platform_setup(void) | |||
86 | } | 86 | } |
87 | 87 | ||
88 | /* | 88 | /* |
89 | * While checking the dmi string infomation, just checking the product | 89 | * While checking the dmi string information, just checking the product |
90 | * serial key should be enough, as this will always have a VMware | 90 | * serial key should be enough, as this will always have a VMware |
91 | * specific string when running under VMware hypervisor. | 91 | * specific string when running under VMware hypervisor. |
92 | */ | 92 | */ |
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index d5cd13945d5a..642f75a68cd5 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c | |||
@@ -14,9 +14,6 @@ | |||
14 | 14 | ||
15 | static void *kdump_buf_page; | 15 | static void *kdump_buf_page; |
16 | 16 | ||
17 | /* Stores the physical address of elf header of crash image. */ | ||
18 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; | ||
19 | |||
20 | static inline bool is_crashed_pfn_valid(unsigned long pfn) | 17 | static inline bool is_crashed_pfn_valid(unsigned long pfn) |
21 | { | 18 | { |
22 | #ifndef CONFIG_X86_PAE | 19 | #ifndef CONFIG_X86_PAE |
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 994828899e09..afa64adb75ee 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c | |||
@@ -10,9 +10,6 @@ | |||
10 | #include <linux/uaccess.h> | 10 | #include <linux/uaccess.h> |
11 | #include <linux/io.h> | 11 | #include <linux/io.h> |
12 | 12 | ||
13 | /* Stores the physical address of elf header of crash image. */ | ||
14 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; | ||
15 | |||
16 | /** | 13 | /** |
17 | * copy_oldmem_page - copy one page from "oldmem" | 14 | * copy_oldmem_page - copy one page from "oldmem" |
18 | * @pfn: page frame number to be copied | 15 | * @pfn: page frame number to be copied |
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 7a8cebc9ff29..706a9fb46a58 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c | |||
@@ -65,12 +65,10 @@ unsigned int irq_create_of_mapping(struct device_node *controller, | |||
65 | return 0; | 65 | return 0; |
66 | ret = ih->xlate(ih, intspec, intsize, &virq, &type); | 66 | ret = ih->xlate(ih, intspec, intsize, &virq, &type); |
67 | if (ret) | 67 | if (ret) |
68 | return ret; | 68 | return 0; |
69 | if (type == IRQ_TYPE_NONE) | 69 | if (type == IRQ_TYPE_NONE) |
70 | return virq; | 70 | return virq; |
71 | /* set the mask if it is different from current */ | 71 | irq_set_irq_type(virq, type); |
72 | if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK)) | ||
73 | set_irq_type(virq, type); | ||
74 | return virq; | 72 | return virq; |
75 | } | 73 | } |
76 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); | 74 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 220a1c11cfde..e2a3f0606da4 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -27,7 +27,7 @@ static int die_counter; | |||
27 | 27 | ||
28 | void printk_address(unsigned long address, int reliable) | 28 | void printk_address(unsigned long address, int reliable) |
29 | { | 29 | { |
30 | printk(" [<%p>] %s%pS\n", (void *) address, | 30 | printk(" [<%p>] %s%pB\n", (void *) address, |
31 | reliable ? "" : "? ", (void *) address); | 31 | reliable ? "" : "? ", (void *) address); |
32 | } | 32 | } |
33 | 33 | ||
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = { | |||
175 | 175 | ||
176 | void | 176 | void |
177 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 177 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
178 | unsigned long *stack, char *log_lvl) | 178 | unsigned long *stack, unsigned long bp, char *log_lvl) |
179 | { | 179 | { |
180 | printk("%sCall Trace:\n", log_lvl); | 180 | printk("%sCall Trace:\n", log_lvl); |
181 | dump_trace(task, regs, stack, &print_trace_ops, log_lvl); | 181 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); |
182 | } | 182 | } |
183 | 183 | ||
184 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 184 | void show_trace(struct task_struct *task, struct pt_regs *regs, |
185 | unsigned long *stack) | 185 | unsigned long *stack, unsigned long bp) |
186 | { | 186 | { |
187 | show_trace_log_lvl(task, regs, stack, ""); | 187 | show_trace_log_lvl(task, regs, stack, bp, ""); |
188 | } | 188 | } |
189 | 189 | ||
190 | void show_stack(struct task_struct *task, unsigned long *sp) | 190 | void show_stack(struct task_struct *task, unsigned long *sp) |
191 | { | 191 | { |
192 | show_stack_log_lvl(task, NULL, sp, ""); | 192 | show_stack_log_lvl(task, NULL, sp, 0, ""); |
193 | } | 193 | } |
194 | 194 | ||
195 | /* | 195 | /* |
@@ -197,14 +197,16 @@ void show_stack(struct task_struct *task, unsigned long *sp) | |||
197 | */ | 197 | */ |
198 | void dump_stack(void) | 198 | void dump_stack(void) |
199 | { | 199 | { |
200 | unsigned long bp; | ||
200 | unsigned long stack; | 201 | unsigned long stack; |
201 | 202 | ||
203 | bp = stack_frame(current, NULL); | ||
202 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | 204 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", |
203 | current->pid, current->comm, print_tainted(), | 205 | current->pid, current->comm, print_tainted(), |
204 | init_utsname()->release, | 206 | init_utsname()->release, |
205 | (int)strcspn(init_utsname()->version, " "), | 207 | (int)strcspn(init_utsname()->version, " "), |
206 | init_utsname()->version); | 208 | init_utsname()->version); |
207 | show_trace(NULL, NULL, &stack); | 209 | show_trace(NULL, NULL, &stack, bp); |
208 | } | 210 | } |
209 | EXPORT_SYMBOL(dump_stack); | 211 | EXPORT_SYMBOL(dump_stack); |
210 | 212 | ||
@@ -320,16 +322,6 @@ void die(const char *str, struct pt_regs *regs, long err) | |||
320 | oops_end(flags, regs, sig); | 322 | oops_end(flags, regs, sig); |
321 | } | 323 | } |
322 | 324 | ||
323 | static int __init oops_setup(char *s) | ||
324 | { | ||
325 | if (!s) | ||
326 | return -EINVAL; | ||
327 | if (!strcmp(s, "panic")) | ||
328 | panic_on_oops = 1; | ||
329 | return 0; | ||
330 | } | ||
331 | early_param("oops", oops_setup); | ||
332 | |||
333 | static int __init kstack_setup(char *s) | 325 | static int __init kstack_setup(char *s) |
334 | { | 326 | { |
335 | if (!s) | 327 | if (!s) |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 74cc1eda384b..3b97a80ce329 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -17,12 +17,11 @@ | |||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | 19 | ||
20 | void dump_trace(struct task_struct *task, | 20 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
21 | struct pt_regs *regs, unsigned long *stack, | 21 | unsigned long *stack, unsigned long bp, |
22 | const struct stacktrace_ops *ops, void *data) | 22 | const struct stacktrace_ops *ops, void *data) |
23 | { | 23 | { |
24 | int graph = 0; | 24 | int graph = 0; |
25 | unsigned long bp; | ||
26 | 25 | ||
27 | if (!task) | 26 | if (!task) |
28 | task = current; | 27 | task = current; |
@@ -35,7 +34,9 @@ void dump_trace(struct task_struct *task, | |||
35 | stack = (unsigned long *)task->thread.sp; | 34 | stack = (unsigned long *)task->thread.sp; |
36 | } | 35 | } |
37 | 36 | ||
38 | bp = stack_frame(task, regs); | 37 | if (!bp) |
38 | bp = stack_frame(task, regs); | ||
39 | |||
39 | for (;;) { | 40 | for (;;) { |
40 | struct thread_info *context; | 41 | struct thread_info *context; |
41 | 42 | ||
@@ -55,7 +56,7 @@ EXPORT_SYMBOL(dump_trace); | |||
55 | 56 | ||
56 | void | 57 | void |
57 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 58 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
58 | unsigned long *sp, char *log_lvl) | 59 | unsigned long *sp, unsigned long bp, char *log_lvl) |
59 | { | 60 | { |
60 | unsigned long *stack; | 61 | unsigned long *stack; |
61 | int i; | 62 | int i; |
@@ -77,7 +78,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
77 | touch_nmi_watchdog(); | 78 | touch_nmi_watchdog(); |
78 | } | 79 | } |
79 | printk(KERN_CONT "\n"); | 80 | printk(KERN_CONT "\n"); |
80 | show_trace_log_lvl(task, regs, sp, log_lvl); | 81 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
81 | } | 82 | } |
82 | 83 | ||
83 | 84 | ||
@@ -102,7 +103,7 @@ void show_registers(struct pt_regs *regs) | |||
102 | u8 *ip; | 103 | u8 *ip; |
103 | 104 | ||
104 | printk(KERN_EMERG "Stack:\n"); | 105 | printk(KERN_EMERG "Stack:\n"); |
105 | show_stack_log_lvl(NULL, regs, ®s->sp, KERN_EMERG); | 106 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); |
106 | 107 | ||
107 | printk(KERN_EMERG "Code: "); | 108 | printk(KERN_EMERG "Code: "); |
108 | 109 | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a6b6fcf7f0ae..e71c98d3c0d2 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack, | |||
139 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 139 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
140 | */ | 140 | */ |
141 | 141 | ||
142 | void dump_trace(struct task_struct *task, | 142 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
143 | struct pt_regs *regs, unsigned long *stack, | 143 | unsigned long *stack, unsigned long bp, |
144 | const struct stacktrace_ops *ops, void *data) | 144 | const struct stacktrace_ops *ops, void *data) |
145 | { | 145 | { |
146 | const unsigned cpu = get_cpu(); | 146 | const unsigned cpu = get_cpu(); |
@@ -150,7 +150,6 @@ void dump_trace(struct task_struct *task, | |||
150 | struct thread_info *tinfo; | 150 | struct thread_info *tinfo; |
151 | int graph = 0; | 151 | int graph = 0; |
152 | unsigned long dummy; | 152 | unsigned long dummy; |
153 | unsigned long bp; | ||
154 | 153 | ||
155 | if (!task) | 154 | if (!task) |
156 | task = current; | 155 | task = current; |
@@ -161,7 +160,8 @@ void dump_trace(struct task_struct *task, | |||
161 | stack = (unsigned long *)task->thread.sp; | 160 | stack = (unsigned long *)task->thread.sp; |
162 | } | 161 | } |
163 | 162 | ||
164 | bp = stack_frame(task, regs); | 163 | if (!bp) |
164 | bp = stack_frame(task, regs); | ||
165 | /* | 165 | /* |
166 | * Print function call entries in all stacks, starting at the | 166 | * Print function call entries in all stacks, starting at the |
167 | * current stack address. If the stacks consist of nested | 167 | * current stack address. If the stacks consist of nested |
@@ -225,7 +225,7 @@ EXPORT_SYMBOL(dump_trace); | |||
225 | 225 | ||
226 | void | 226 | void |
227 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 227 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
228 | unsigned long *sp, char *log_lvl) | 228 | unsigned long *sp, unsigned long bp, char *log_lvl) |
229 | { | 229 | { |
230 | unsigned long *irq_stack_end; | 230 | unsigned long *irq_stack_end; |
231 | unsigned long *irq_stack; | 231 | unsigned long *irq_stack; |
@@ -269,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
269 | preempt_enable(); | 269 | preempt_enable(); |
270 | 270 | ||
271 | printk(KERN_CONT "\n"); | 271 | printk(KERN_CONT "\n"); |
272 | show_trace_log_lvl(task, regs, sp, log_lvl); | 272 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
273 | } | 273 | } |
274 | 274 | ||
275 | void show_registers(struct pt_regs *regs) | 275 | void show_registers(struct pt_regs *regs) |
@@ -298,7 +298,7 @@ void show_registers(struct pt_regs *regs) | |||
298 | 298 | ||
299 | printk(KERN_EMERG "Stack:\n"); | 299 | printk(KERN_EMERG "Stack:\n"); |
300 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | 300 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, |
301 | KERN_EMERG); | 301 | 0, KERN_EMERG); |
302 | 302 | ||
303 | printk(KERN_EMERG "Code: "); | 303 | printk(KERN_EMERG "Code: "); |
304 | 304 | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index cdf5bfd9d4d5..3e2ef8425316 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/crash_dump.h> | ||
14 | #include <linux/bootmem.h> | 15 | #include <linux/bootmem.h> |
15 | #include <linux/pfn.h> | 16 | #include <linux/pfn.h> |
16 | #include <linux/suspend.h> | 17 | #include <linux/suspend.h> |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 9efbdcc56425..3755ef494390 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -159,7 +159,12 @@ static void __init ati_bugs_contd(int num, int slot, int func) | |||
159 | if (rev >= 0x40) | 159 | if (rev >= 0x40) |
160 | acpi_fix_pin2_polarity = 1; | 160 | acpi_fix_pin2_polarity = 1; |
161 | 161 | ||
162 | if (rev > 0x13) | 162 | /* |
163 | * SB600: revisions 0x11, 0x12, 0x13, 0x14, ... | ||
164 | * SB700: revisions 0x39, 0x3a, ... | ||
165 | * SB800: revisions 0x40, 0x41, ... | ||
166 | */ | ||
167 | if (rev >= 0x39) | ||
163 | return; | 168 | return; |
164 | 169 | ||
165 | if (acpi_use_timer_override) | 170 | if (acpi_use_timer_override) |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index fa41f7298c84..5c1a91974918 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -1414,7 +1414,7 @@ ENTRY(async_page_fault) | |||
1414 | pushl_cfi $do_async_page_fault | 1414 | pushl_cfi $do_async_page_fault |
1415 | jmp error_code | 1415 | jmp error_code |
1416 | CFI_ENDPROC | 1416 | CFI_ENDPROC |
1417 | END(apf_page_fault) | 1417 | END(async_page_fault) |
1418 | #endif | 1418 | #endif |
1419 | 1419 | ||
1420 | /* | 1420 | /* |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b72b4a6466a9..8a445a0c989e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -18,7 +18,7 @@ | |||
18 | * A note on terminology: | 18 | * A note on terminology: |
19 | * - top of stack: Architecture defined interrupt frame from SS to RIP | 19 | * - top of stack: Architecture defined interrupt frame from SS to RIP |
20 | * at the top of the kernel process stack. | 20 | * at the top of the kernel process stack. |
21 | * - partial stack frame: partially saved registers upto R11. | 21 | * - partial stack frame: partially saved registers up to R11. |
22 | * - full stack frame: Like partial stack frame, but all register saved. | 22 | * - full stack frame: Like partial stack frame, but all register saved. |
23 | * | 23 | * |
24 | * Some macro usage: | 24 | * Some macro usage: |
@@ -422,7 +422,7 @@ ENTRY(ret_from_fork) | |||
422 | END(ret_from_fork) | 422 | END(ret_from_fork) |
423 | 423 | ||
424 | /* | 424 | /* |
425 | * System call entry. Upto 6 arguments in registers are supported. | 425 | * System call entry. Up to 6 arguments in registers are supported. |
426 | * | 426 | * |
427 | * SYSCALL does not save anything on the stack and does not change the | 427 | * SYSCALL does not save anything on the stack and does not change the |
428 | * stack pointer. | 428 | * stack pointer. |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 7f138b3c3c52..d6d6bb361931 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -34,15 +34,6 @@ void __init i386_start_kernel(void) | |||
34 | { | 34 | { |
35 | memblock_init(); | 35 | memblock_init(); |
36 | 36 | ||
37 | #ifdef CONFIG_X86_TRAMPOLINE | ||
38 | /* | ||
39 | * But first pinch a few for the stack/trampoline stuff | ||
40 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
41 | * trampoline before removing it. (see the GDT stuff) | ||
42 | */ | ||
43 | memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); | ||
44 | #endif | ||
45 | |||
46 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 37 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
47 | 38 | ||
48 | #ifdef CONFIG_BLK_DEV_INITRD | 39 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2d2673c28aff..5655c2272adb 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -77,9 +77,6 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
77 | /* Make NULL pointers segfault */ | 77 | /* Make NULL pointers segfault */ |
78 | zap_identity_mappings(); | 78 | zap_identity_mappings(); |
79 | 79 | ||
80 | /* Cleanup the over mapped high alias */ | ||
81 | cleanup_highmap(); | ||
82 | |||
83 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | 80 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; |
84 | 81 | ||
85 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { | 82 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 239046bd447f..e11e39478a49 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -136,10 +136,9 @@ ident_complete: | |||
136 | /* Fixup phys_base */ | 136 | /* Fixup phys_base */ |
137 | addq %rbp, phys_base(%rip) | 137 | addq %rbp, phys_base(%rip) |
138 | 138 | ||
139 | #ifdef CONFIG_X86_TRAMPOLINE | 139 | /* Fixup trampoline */ |
140 | addq %rbp, trampoline_level4_pgt + 0(%rip) | 140 | addq %rbp, trampoline_level4_pgt + 0(%rip) |
141 | addq %rbp, trampoline_level4_pgt + (511*8)(%rip) | 141 | addq %rbp, trampoline_level4_pgt + (511*8)(%rip) |
142 | #endif | ||
143 | 142 | ||
144 | /* Due to ENTRY(), sometimes the empty space gets filled with | 143 | /* Due to ENTRY(), sometimes the empty space gets filled with |
145 | * zeros. Better take a jmp than relying on empty space being | 144 | * zeros. Better take a jmp than relying on empty space being |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e60c38cc0eed..12aff2537682 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -145,7 +145,7 @@ EXPORT_SYMBOL_GPL(fpu_finit); | |||
145 | * The _current_ task is using the FPU for the first time | 145 | * The _current_ task is using the FPU for the first time |
146 | * so initialize it and set the mxcsr to its default | 146 | * so initialize it and set the mxcsr to its default |
147 | * value at reset if we support XMM instructions and then | 147 | * value at reset if we support XMM instructions and then |
148 | * remeber the current task has used the FPU. | 148 | * remember the current task has used the FPU. |
149 | */ | 149 | */ |
150 | int init_fpu(struct task_struct *tsk) | 150 | int init_fpu(struct task_struct *tsk) |
151 | { | 151 | { |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 9974d21048fd..72090705a656 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -172,7 +172,7 @@ asmlinkage void do_softirq(void) | |||
172 | 172 | ||
173 | call_on_stack(__do_softirq, isp); | 173 | call_on_stack(__do_softirq, isp); |
174 | /* | 174 | /* |
175 | * Shouldnt happen, we returned above if in_interrupt(): | 175 | * Shouldn't happen, we returned above if in_interrupt(): |
176 | */ | 176 | */ |
177 | WARN_ON_ONCE(softirq_count()); | 177 | WARN_ON_ONCE(softirq_count()); |
178 | } | 178 | } |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 7c64c420a9f6..dba0b36941a5 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -278,7 +278,7 @@ static int hw_break_release_slot(int breakno) | |||
278 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | 278 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); |
279 | if (dbg_release_bp_slot(*pevent)) | 279 | if (dbg_release_bp_slot(*pevent)) |
280 | /* | 280 | /* |
281 | * The debugger is responisble for handing the retry on | 281 | * The debugger is responsible for handing the retry on |
282 | * remove failure. | 282 | * remove failure. |
283 | */ | 283 | */ |
284 | return -1; | 284 | return -1; |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8dc44662394b..33c07b0b122e 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -493,7 +493,7 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
493 | native_smp_prepare_boot_cpu(); | 493 | native_smp_prepare_boot_cpu(); |
494 | } | 494 | } |
495 | 495 | ||
496 | static void kvm_guest_cpu_online(void *dummy) | 496 | static void __cpuinit kvm_guest_cpu_online(void *dummy) |
497 | { | 497 | { |
498 | kvm_guest_cpu_init(); | 498 | kvm_guest_cpu_init(); |
499 | } | 499 | } |
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 63eaf6596233..177183cbb6ae 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c | |||
@@ -259,7 +259,7 @@ static int __init mca_init(void) | |||
259 | /* | 259 | /* |
260 | * WARNING: Be careful when making changes here. Putting an adapter | 260 | * WARNING: Be careful when making changes here. Putting an adapter |
261 | * and the motherboard simultaneously into setup mode may result in | 261 | * and the motherboard simultaneously into setup mode may result in |
262 | * damage to chips (according to The Indispensible PC Hardware Book | 262 | * damage to chips (according to The Indispensable PC Hardware Book |
263 | * by Hans-Peter Messmer). Also, we disable system interrupts (so | 263 | * by Hans-Peter Messmer). Also, we disable system interrupts (so |
264 | * that we are not disturbed in the middle of this). | 264 | * that we are not disturbed in the middle of this). |
265 | */ | 265 | */ |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 01b0f6d06451..5a532ce646bf 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -714,10 +714,6 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) | |||
714 | *nr_m_spare += 1; | 714 | *nr_m_spare += 1; |
715 | } | 715 | } |
716 | } | 716 | } |
717 | #else /* CONFIG_X86_IO_APIC */ | ||
718 | static | ||
719 | inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} | ||
720 | #endif /* CONFIG_X86_IO_APIC */ | ||
721 | 717 | ||
722 | static int | 718 | static int |
723 | check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) | 719 | check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) |
@@ -731,6 +727,10 @@ check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) | |||
731 | 727 | ||
732 | return ret; | 728 | return ret; |
733 | } | 729 | } |
730 | #else /* CONFIG_X86_IO_APIC */ | ||
731 | static | ||
732 | inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} | ||
733 | #endif /* CONFIG_X86_IO_APIC */ | ||
734 | 734 | ||
735 | static int __init replace_intsrc_all(struct mpc_table *mpc, | 735 | static int __init replace_intsrc_all(struct mpc_table *mpc, |
736 | unsigned long mpc_new_phys, | 736 | unsigned long mpc_new_phys, |
@@ -883,7 +883,7 @@ static int __init update_mp_table(void) | |||
883 | 883 | ||
884 | if (!mpc_new_phys) { | 884 | if (!mpc_new_phys) { |
885 | unsigned char old, new; | 885 | unsigned char old, new; |
886 | /* check if we can change the postion */ | 886 | /* check if we can change the position */ |
887 | mpc->checksum = 0; | 887 | mpc->checksum = 0; |
888 | old = mpf_checksum((unsigned char *)mpc, mpc->length); | 888 | old = mpf_checksum((unsigned char *)mpc, mpc->length); |
889 | mpc->checksum = 0xff; | 889 | mpc->checksum = 0xff; |
@@ -892,7 +892,7 @@ static int __init update_mp_table(void) | |||
892 | printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); | 892 | printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); |
893 | return 0; | 893 | return 0; |
894 | } | 894 | } |
895 | printk(KERN_INFO "use in-positon replacing\n"); | 895 | printk(KERN_INFO "use in-position replacing\n"); |
896 | } else { | 896 | } else { |
897 | mpf->physptr = mpc_new_phys; | 897 | mpf->physptr = mpc_new_phys; |
898 | mpc_new = phys_to_virt(mpc_new_phys); | 898 | mpc_new = phys_to_virt(mpc_new_phys); |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index f56a117cef68..e8c33a302006 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -1279,7 +1279,7 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) | |||
1279 | 1279 | ||
1280 | if (pci_dev == PCI_DEVICE_ID_IBM_CALIOC2) { | 1280 | if (pci_dev == PCI_DEVICE_ID_IBM_CALIOC2) { |
1281 | /* | 1281 | /* |
1282 | * FIXME: properly scan for devices accross the | 1282 | * FIXME: properly scan for devices across the |
1283 | * PCI-to-PCI bridge on every CalIOC2 port. | 1283 | * PCI-to-PCI bridge on every CalIOC2 port. |
1284 | */ | 1284 | */ |
1285 | return 1; | 1285 | return 1; |
@@ -1295,7 +1295,7 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) | |||
1295 | 1295 | ||
1296 | /* | 1296 | /* |
1297 | * calgary_init_bitmap_from_tce_table(): | 1297 | * calgary_init_bitmap_from_tce_table(): |
1298 | * Funtion for kdump case. In the second/kdump kernel initialize | 1298 | * Function for kdump case. In the second/kdump kernel initialize |
1299 | * the bitmap based on the tce table entries obtained from first kernel | 1299 | * the bitmap based on the tce table entries obtained from first kernel |
1300 | */ | 1300 | */ |
1301 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) | 1301 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 99fa3adf0141..d46cbe46b7ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -87,7 +87,7 @@ void exit_thread(void) | |||
87 | void show_regs(struct pt_regs *regs) | 87 | void show_regs(struct pt_regs *regs) |
88 | { | 88 | { |
89 | show_registers(regs); | 89 | show_registers(regs); |
90 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs)); | 90 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0); |
91 | } | 91 | } |
92 | 92 | ||
93 | void show_regs_common(void) | 93 | void show_regs_common(void) |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bd387e8f73b4..6c9dd922ac0d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -501,6 +501,10 @@ void set_personality_64bit(void) | |||
501 | /* Make sure to be in 64bit mode */ | 501 | /* Make sure to be in 64bit mode */ |
502 | clear_thread_flag(TIF_IA32); | 502 | clear_thread_flag(TIF_IA32); |
503 | 503 | ||
504 | /* Ensure the corresponding mm is not marked. */ | ||
505 | if (current->mm) | ||
506 | current->mm->context.ia32_compat = 0; | ||
507 | |||
504 | /* TBD: overwrites user setup. Should have two bits. | 508 | /* TBD: overwrites user setup. Should have two bits. |
505 | But 64bit processes have always behaved this way, | 509 | But 64bit processes have always behaved this way, |
506 | so it's not too bad. The main problem is just that | 510 | so it's not too bad. The main problem is just that |
@@ -516,6 +520,10 @@ void set_personality_ia32(void) | |||
516 | set_thread_flag(TIF_IA32); | 520 | set_thread_flag(TIF_IA32); |
517 | current->personality |= force_personality32; | 521 | current->personality |= force_personality32; |
518 | 522 | ||
523 | /* Mark the associated mm as containing 32-bit tasks. */ | ||
524 | if (current->mm) | ||
525 | current->mm->context.ia32_compat = 1; | ||
526 | |||
519 | /* Prepare the first "return" to user space */ | 527 | /* Prepare the first "return" to user space */ |
520 | current_thread_info()->status |= TS_COMPAT; | 528 | current_thread_info()->status |= TS_COMPAT; |
521 | } | 529 | } |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 715037caeb43..d3ce37edb54d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -303,68 +303,16 @@ static int __init reboot_init(void) | |||
303 | } | 303 | } |
304 | core_initcall(reboot_init); | 304 | core_initcall(reboot_init); |
305 | 305 | ||
306 | /* The following code and data reboots the machine by switching to real | 306 | extern const unsigned char machine_real_restart_asm[]; |
307 | mode and jumping to the BIOS reset entry point, as if the CPU has | 307 | extern const u64 machine_real_restart_gdt[3]; |
308 | really been reset. The previous version asked the keyboard | ||
309 | controller to pulse the CPU reset line, which is more thorough, but | ||
310 | doesn't work with at least one type of 486 motherboard. It is easy | ||
311 | to stop this code working; hence the copious comments. */ | ||
312 | static const unsigned long long | ||
313 | real_mode_gdt_entries [3] = | ||
314 | { | ||
315 | 0x0000000000000000ULL, /* Null descriptor */ | ||
316 | 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ | ||
317 | 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ | ||
318 | }; | ||
319 | 308 | ||
320 | static const struct desc_ptr | 309 | void machine_real_restart(unsigned int type) |
321 | real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, | ||
322 | real_mode_idt = { 0x3ff, 0 }; | ||
323 | |||
324 | /* This is 16-bit protected mode code to disable paging and the cache, | ||
325 | switch to real mode and jump to the BIOS reset code. | ||
326 | |||
327 | The instruction that switches to real mode by writing to CR0 must be | ||
328 | followed immediately by a far jump instruction, which set CS to a | ||
329 | valid value for real mode, and flushes the prefetch queue to avoid | ||
330 | running instructions that have already been decoded in protected | ||
331 | mode. | ||
332 | |||
333 | Clears all the flags except ET, especially PG (paging), PE | ||
334 | (protected-mode enable) and TS (task switch for coprocessor state | ||
335 | save). Flushes the TLB after paging has been disabled. Sets CD and | ||
336 | NW, to disable the cache on a 486, and invalidates the cache. This | ||
337 | is more like the state of a 486 after reset. I don't know if | ||
338 | something else should be done for other chips. | ||
339 | |||
340 | More could be done here to set up the registers as if a CPU reset had | ||
341 | occurred; hopefully real BIOSs don't assume much. */ | ||
342 | static const unsigned char real_mode_switch [] = | ||
343 | { | ||
344 | 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ | ||
345 | 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ | ||
346 | 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */ | ||
347 | 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */ | ||
348 | 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */ | ||
349 | 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */ | ||
350 | 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */ | ||
351 | 0x74, 0x02, /* jz f */ | ||
352 | 0x0f, 0x09, /* wbinvd */ | ||
353 | 0x24, 0x10, /* f: andb $0x10,al */ | ||
354 | 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ | ||
355 | }; | ||
356 | static const unsigned char jump_to_bios [] = | ||
357 | { | 310 | { |
358 | 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ | 311 | void *restart_va; |
359 | }; | 312 | unsigned long restart_pa; |
313 | void (*restart_lowmem)(unsigned int); | ||
314 | u64 *lowmem_gdt; | ||
360 | 315 | ||
361 | /* | ||
362 | * Switch to real mode and then execute the code | ||
363 | * specified by the code and length parameters. | ||
364 | * We assume that length will aways be less that 100! | ||
365 | */ | ||
366 | void machine_real_restart(const unsigned char *code, int length) | ||
367 | { | ||
368 | local_irq_disable(); | 316 | local_irq_disable(); |
369 | 317 | ||
370 | /* Write zero to CMOS register number 0x0f, which the BIOS POST | 318 | /* Write zero to CMOS register number 0x0f, which the BIOS POST |
@@ -392,41 +340,23 @@ void machine_real_restart(const unsigned char *code, int length) | |||
392 | too. */ | 340 | too. */ |
393 | *((unsigned short *)0x472) = reboot_mode; | 341 | *((unsigned short *)0x472) = reboot_mode; |
394 | 342 | ||
395 | /* For the switch to real mode, copy some code to low memory. It has | 343 | /* Patch the GDT in the low memory trampoline */ |
396 | to be in the first 64k because it is running in 16-bit mode, and it | 344 | lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); |
397 | has to have the same physical and virtual address, because it turns | 345 | |
398 | off paging. Copy it near the end of the first page, out of the way | 346 | restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); |
399 | of BIOS variables. */ | 347 | restart_pa = virt_to_phys(restart_va); |
400 | memcpy((void *)(0x1000 - sizeof(real_mode_switch) - 100), | 348 | restart_lowmem = (void (*)(unsigned int))restart_pa; |
401 | real_mode_switch, sizeof (real_mode_switch)); | 349 | |
402 | memcpy((void *)(0x1000 - 100), code, length); | 350 | /* GDT[0]: GDT self-pointer */ |
403 | 351 | lowmem_gdt[0] = | |
404 | /* Set up the IDT for real mode. */ | 352 | (u64)(sizeof(machine_real_restart_gdt) - 1) + |
405 | load_idt(&real_mode_idt); | 353 | ((u64)virt_to_phys(lowmem_gdt) << 16); |
406 | 354 | /* GDT[1]: 64K real mode code segment */ | |
407 | /* Set up a GDT from which we can load segment descriptors for real | 355 | lowmem_gdt[1] = |
408 | mode. The GDT is not used in real mode; it is just needed here to | 356 | GDT_ENTRY(0x009b, restart_pa, 0xffff); |
409 | prepare the descriptors. */ | 357 | |
410 | load_gdt(&real_mode_gdt); | 358 | /* Jump to the identity-mapped low memory code */ |
411 | 359 | restart_lowmem(type); | |
412 | /* Load the data segment registers, and thus the descriptors ready for | ||
413 | real mode. The base address of each segment is 0x100, 16 times the | ||
414 | selector value being loaded here. This is so that the segment | ||
415 | registers don't have to be reloaded after switching to real mode: | ||
416 | the values are consistent for real mode operation already. */ | ||
417 | __asm__ __volatile__ ("movl $0x0010,%%eax\n" | ||
418 | "\tmovl %%eax,%%ds\n" | ||
419 | "\tmovl %%eax,%%es\n" | ||
420 | "\tmovl %%eax,%%fs\n" | ||
421 | "\tmovl %%eax,%%gs\n" | ||
422 | "\tmovl %%eax,%%ss" : : : "eax"); | ||
423 | |||
424 | /* Jump to the 16-bit code that we copied earlier. It disables paging | ||
425 | and the cache, switches to real mode, and jumps to the BIOS reset | ||
426 | entry point. */ | ||
427 | __asm__ __volatile__ ("ljmp $0x0008,%0" | ||
428 | : | ||
429 | : "i" ((void *)(0x1000 - sizeof (real_mode_switch) - 100))); | ||
430 | } | 360 | } |
431 | #ifdef CONFIG_APM_MODULE | 361 | #ifdef CONFIG_APM_MODULE |
432 | EXPORT_SYMBOL(machine_real_restart); | 362 | EXPORT_SYMBOL(machine_real_restart); |
@@ -581,7 +511,7 @@ static void native_machine_emergency_restart(void) | |||
581 | 511 | ||
582 | #ifdef CONFIG_X86_32 | 512 | #ifdef CONFIG_X86_32 |
583 | case BOOT_BIOS: | 513 | case BOOT_BIOS: |
584 | machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); | 514 | machine_real_restart(MRR_BIOS); |
585 | 515 | ||
586 | reboot_type = BOOT_KBD; | 516 | reboot_type = BOOT_KBD; |
587 | break; | 517 | break; |
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S new file mode 100644 index 000000000000..29092b38d816 --- /dev/null +++ b/arch/x86/kernel/reboot_32.S | |||
@@ -0,0 +1,135 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <asm/segment.h> | ||
4 | #include <asm/page_types.h> | ||
5 | |||
6 | /* | ||
7 | * The following code and data reboots the machine by switching to real | ||
8 | * mode and jumping to the BIOS reset entry point, as if the CPU has | ||
9 | * really been reset. The previous version asked the keyboard | ||
10 | * controller to pulse the CPU reset line, which is more thorough, but | ||
11 | * doesn't work with at least one type of 486 motherboard. It is easy | ||
12 | * to stop this code working; hence the copious comments. | ||
13 | * | ||
14 | * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. | ||
15 | */ | ||
16 | .section ".x86_trampoline","a" | ||
17 | .balign 16 | ||
18 | .code32 | ||
19 | ENTRY(machine_real_restart_asm) | ||
20 | r_base = . | ||
21 | /* Get our own relocated address */ | ||
22 | call 1f | ||
23 | 1: popl %ebx | ||
24 | subl $1b, %ebx | ||
25 | |||
26 | /* Compute the equivalent real-mode segment */ | ||
27 | movl %ebx, %ecx | ||
28 | shrl $4, %ecx | ||
29 | |||
30 | /* Patch post-real-mode segment jump */ | ||
31 | movw dispatch_table(%ebx,%eax,2),%ax | ||
32 | movw %ax, 101f(%ebx) | ||
33 | movw %cx, 102f(%ebx) | ||
34 | |||
35 | /* Set up the IDT for real mode. */ | ||
36 | lidtl machine_real_restart_idt(%ebx) | ||
37 | |||
38 | /* | ||
39 | * Set up a GDT from which we can load segment descriptors for real | ||
40 | * mode. The GDT is not used in real mode; it is just needed here to | ||
41 | * prepare the descriptors. | ||
42 | */ | ||
43 | lgdtl machine_real_restart_gdt(%ebx) | ||
44 | |||
45 | /* | ||
46 | * Load the data segment registers with 16-bit compatible values | ||
47 | */ | ||
48 | movl $16, %ecx | ||
49 | movl %ecx, %ds | ||
50 | movl %ecx, %es | ||
51 | movl %ecx, %fs | ||
52 | movl %ecx, %gs | ||
53 | movl %ecx, %ss | ||
54 | ljmpl $8, $1f - r_base | ||
55 | |||
56 | /* | ||
57 | * This is 16-bit protected mode code to disable paging and the cache, | ||
58 | * switch to real mode and jump to the BIOS reset code. | ||
59 | * | ||
60 | * The instruction that switches to real mode by writing to CR0 must be | ||
61 | * followed immediately by a far jump instruction, which set CS to a | ||
62 | * valid value for real mode, and flushes the prefetch queue to avoid | ||
63 | * running instructions that have already been decoded in protected | ||
64 | * mode. | ||
65 | * | ||
66 | * Clears all the flags except ET, especially PG (paging), PE | ||
67 | * (protected-mode enable) and TS (task switch for coprocessor state | ||
68 | * save). Flushes the TLB after paging has been disabled. Sets CD and | ||
69 | * NW, to disable the cache on a 486, and invalidates the cache. This | ||
70 | * is more like the state of a 486 after reset. I don't know if | ||
71 | * something else should be done for other chips. | ||
72 | * | ||
73 | * More could be done here to set up the registers as if a CPU reset had | ||
74 | * occurred; hopefully real BIOSs don't assume much. This is not the | ||
75 | * actual BIOS entry point, anyway (that is at 0xfffffff0). | ||
76 | * | ||
77 | * Most of this work is probably excessive, but it is what is tested. | ||
78 | */ | ||
79 | .code16 | ||
80 | 1: | ||
81 | xorl %ecx, %ecx | ||
82 | movl %cr0, %eax | ||
83 | andl $0x00000011, %eax | ||
84 | orl $0x60000000, %eax | ||
85 | movl %eax, %cr0 | ||
86 | movl %ecx, %cr3 | ||
87 | movl %cr0, %edx | ||
88 | andl $0x60000000, %edx /* If no cache bits -> no wbinvd */ | ||
89 | jz 2f | ||
90 | wbinvd | ||
91 | 2: | ||
92 | andb $0x10, %al | ||
93 | movl %eax, %cr0 | ||
94 | .byte 0xea /* ljmpw */ | ||
95 | 101: .word 0 /* Offset */ | ||
96 | 102: .word 0 /* Segment */ | ||
97 | |||
98 | bios: | ||
99 | ljmpw $0xf000, $0xfff0 | ||
100 | |||
101 | apm: | ||
102 | movw $0x1000, %ax | ||
103 | movw %ax, %ss | ||
104 | movw $0xf000, %sp | ||
105 | movw $0x5307, %ax | ||
106 | movw $0x0001, %bx | ||
107 | movw $0x0003, %cx | ||
108 | int $0x15 | ||
109 | |||
110 | END(machine_real_restart_asm) | ||
111 | |||
112 | .balign 16 | ||
113 | /* These must match <asm/reboot.h */ | ||
114 | dispatch_table: | ||
115 | .word bios - r_base | ||
116 | .word apm - r_base | ||
117 | END(dispatch_table) | ||
118 | |||
119 | .balign 16 | ||
120 | machine_real_restart_idt: | ||
121 | .word 0xffff /* Length - real mode default value */ | ||
122 | .long 0 /* Base - real mode default value */ | ||
123 | END(machine_real_restart_idt) | ||
124 | |||
125 | .balign 16 | ||
126 | ENTRY(machine_real_restart_gdt) | ||
127 | .quad 0 /* Self-pointer, filled in by PM code */ | ||
128 | .quad 0 /* 16-bit code segment, filled in by PM code */ | ||
129 | /* | ||
130 | * 16-bit data segment with the selector value 16 = 0x10 and | ||
131 | * base value 0x100; since this is consistent with real mode | ||
132 | * semantics we don't have to reload the segments once CR0.PE = 0. | ||
133 | */ | ||
134 | .quad GDT_ENTRY(0x0093, 0x100, 0xffff) | ||
135 | END(machine_real_restart_gdt) | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b176f2b1f45d..5a0484a95ad6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -294,30 +294,11 @@ static void __init init_gbpages(void) | |||
294 | else | 294 | else |
295 | direct_gbpages = 0; | 295 | direct_gbpages = 0; |
296 | } | 296 | } |
297 | |||
298 | static void __init cleanup_highmap_brk_end(void) | ||
299 | { | ||
300 | pud_t *pud; | ||
301 | pmd_t *pmd; | ||
302 | |||
303 | mmu_cr4_features = read_cr4(); | ||
304 | |||
305 | /* | ||
306 | * _brk_end cannot change anymore, but it and _end may be | ||
307 | * located on different 2M pages. cleanup_highmap(), however, | ||
308 | * can only consider _end when it runs, so destroy any | ||
309 | * mappings beyond _brk_end here. | ||
310 | */ | ||
311 | pud = pud_offset(pgd_offset_k(_brk_end), _brk_end); | ||
312 | pmd = pmd_offset(pud, _brk_end - 1); | ||
313 | while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1)) | ||
314 | pmd_clear(pmd); | ||
315 | } | ||
316 | #else | 297 | #else |
317 | static inline void init_gbpages(void) | 298 | static inline void init_gbpages(void) |
318 | { | 299 | { |
319 | } | 300 | } |
320 | static inline void cleanup_highmap_brk_end(void) | 301 | static void __init cleanup_highmap(void) |
321 | { | 302 | { |
322 | } | 303 | } |
323 | #endif | 304 | #endif |
@@ -330,8 +311,6 @@ static void __init reserve_brk(void) | |||
330 | /* Mark brk area as locked down and no longer taking any | 311 | /* Mark brk area as locked down and no longer taking any |
331 | new allocations */ | 312 | new allocations */ |
332 | _brk_start = 0; | 313 | _brk_start = 0; |
333 | |||
334 | cleanup_highmap_brk_end(); | ||
335 | } | 314 | } |
336 | 315 | ||
337 | #ifdef CONFIG_BLK_DEV_INITRD | 316 | #ifdef CONFIG_BLK_DEV_INITRD |
@@ -640,28 +619,6 @@ void __init reserve_standard_io_resources(void) | |||
640 | 619 | ||
641 | } | 620 | } |
642 | 621 | ||
643 | /* | ||
644 | * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by | ||
645 | * is_kdump_kernel() to determine if we are booting after a panic. Hence | ||
646 | * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. | ||
647 | */ | ||
648 | |||
649 | #ifdef CONFIG_CRASH_DUMP | ||
650 | /* elfcorehdr= specifies the location of elf core header | ||
651 | * stored by the crashed kernel. This option will be passed | ||
652 | * by kexec loader to the capture kernel. | ||
653 | */ | ||
654 | static int __init setup_elfcorehdr(char *arg) | ||
655 | { | ||
656 | char *end; | ||
657 | if (!arg) | ||
658 | return -EINVAL; | ||
659 | elfcorehdr_addr = memparse(arg, &end); | ||
660 | return end > arg ? 0 : -EINVAL; | ||
661 | } | ||
662 | early_param("elfcorehdr", setup_elfcorehdr); | ||
663 | #endif | ||
664 | |||
665 | static __init void reserve_ibft_region(void) | 622 | static __init void reserve_ibft_region(void) |
666 | { | 623 | { |
667 | unsigned long addr, size = 0; | 624 | unsigned long addr, size = 0; |
@@ -950,6 +907,8 @@ void __init setup_arch(char **cmdline_p) | |||
950 | */ | 907 | */ |
951 | reserve_brk(); | 908 | reserve_brk(); |
952 | 909 | ||
910 | cleanup_highmap(); | ||
911 | |||
953 | memblock.current_limit = get_max_mapped(); | 912 | memblock.current_limit = get_max_mapped(); |
954 | memblock_x86_fill(); | 913 | memblock_x86_fill(); |
955 | 914 | ||
@@ -963,15 +922,8 @@ void __init setup_arch(char **cmdline_p) | |||
963 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", | 922 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", |
964 | max_pfn_mapped<<PAGE_SHIFT); | 923 | max_pfn_mapped<<PAGE_SHIFT); |
965 | 924 | ||
966 | reserve_trampoline_memory(); | 925 | setup_trampolines(); |
967 | 926 | ||
968 | #ifdef CONFIG_ACPI_SLEEP | ||
969 | /* | ||
970 | * Reserve low memory region for sleep support. | ||
971 | * even before init_memory_mapping | ||
972 | */ | ||
973 | acpi_reserve_wakeup_memory(); | ||
974 | #endif | ||
975 | init_gbpages(); | 927 | init_gbpages(); |
976 | 928 | ||
977 | /* max_pfn_mapped is updated here */ | 929 | /* max_pfn_mapped is updated here */ |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e9efdfd51c8d..c2871d3c71b6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -711,7 +711,7 @@ do_rest: | |||
711 | stack_start = c_idle.idle->thread.sp; | 711 | stack_start = c_idle.idle->thread.sp; |
712 | 712 | ||
713 | /* start_ip had better be page-aligned! */ | 713 | /* start_ip had better be page-aligned! */ |
714 | start_ip = setup_trampoline(); | 714 | start_ip = trampoline_address(); |
715 | 715 | ||
716 | /* So we see what's up */ | 716 | /* So we see what's up */ |
717 | announce_cpu(cpu, apicid); | 717 | announce_cpu(cpu, apicid); |
@@ -721,6 +721,8 @@ do_rest: | |||
721 | * the targeted processor. | 721 | * the targeted processor. |
722 | */ | 722 | */ |
723 | 723 | ||
724 | printk(KERN_DEBUG "smpboot cpu %d: start_ip = %lx\n", cpu, start_ip); | ||
725 | |||
724 | atomic_set(&init_deasserted, 0); | 726 | atomic_set(&init_deasserted, 0); |
725 | 727 | ||
726 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { | 728 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { |
@@ -774,8 +776,8 @@ do_rest: | |||
774 | pr_debug("CPU%d: has booted.\n", cpu); | 776 | pr_debug("CPU%d: has booted.\n", cpu); |
775 | else { | 777 | else { |
776 | boot_error = 1; | 778 | boot_error = 1; |
777 | if (*((volatile unsigned char *)trampoline_base) | 779 | if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) |
778 | == 0xA5) | 780 | == 0xA5A5A5A5) |
779 | /* trampoline started but...? */ | 781 | /* trampoline started but...? */ |
780 | pr_err("CPU%d: Stuck ??\n", cpu); | 782 | pr_err("CPU%d: Stuck ??\n", cpu); |
781 | else | 783 | else |
@@ -801,7 +803,7 @@ do_rest: | |||
801 | } | 803 | } |
802 | 804 | ||
803 | /* mark "stuck" area as not stuck */ | 805 | /* mark "stuck" area as not stuck */ |
804 | *((volatile unsigned long *)trampoline_base) = 0; | 806 | *(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) = 0; |
805 | 807 | ||
806 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { | 808 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { |
807 | /* | 809 | /* |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 938c8e10a19a..6515733a289d 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -73,7 +73,7 @@ static const struct stacktrace_ops save_stack_ops_nosched = { | |||
73 | */ | 73 | */ |
74 | void save_stack_trace(struct stack_trace *trace) | 74 | void save_stack_trace(struct stack_trace *trace) |
75 | { | 75 | { |
76 | dump_trace(current, NULL, NULL, &save_stack_ops, trace); | 76 | dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); |
77 | if (trace->nr_entries < trace->max_entries) | 77 | if (trace->nr_entries < trace->max_entries) |
78 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 78 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
79 | } | 79 | } |
@@ -81,14 +81,14 @@ EXPORT_SYMBOL_GPL(save_stack_trace); | |||
81 | 81 | ||
82 | void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) | 82 | void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) |
83 | { | 83 | { |
84 | dump_trace(current, regs, NULL, &save_stack_ops, trace); | 84 | dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); |
85 | if (trace->nr_entries < trace->max_entries) | 85 | if (trace->nr_entries < trace->max_entries) |
86 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 86 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
87 | } | 87 | } |
88 | 88 | ||
89 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 89 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
90 | { | 90 | { |
91 | dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace); | 91 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); |
92 | if (trace->nr_entries < trace->max_entries) | 92 | if (trace->nr_entries < trace->max_entries) |
93 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 93 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
94 | } | 94 | } |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 58de45ee08b6..7977f0cfe339 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -166,7 +166,7 @@ static void enable_step(struct task_struct *child, bool block) | |||
166 | * Make sure block stepping (BTF) is not enabled unless it should be. | 166 | * Make sure block stepping (BTF) is not enabled unless it should be. |
167 | * Note that we don't try to worry about any is_setting_trap_flag() | 167 | * Note that we don't try to worry about any is_setting_trap_flag() |
168 | * instructions after the first when using block stepping. | 168 | * instructions after the first when using block stepping. |
169 | * So noone should try to use debugger block stepping in a program | 169 | * So no one should try to use debugger block stepping in a program |
170 | * that uses user-mode single stepping itself. | 170 | * that uses user-mode single stepping itself. |
171 | */ | 171 | */ |
172 | if (enable_single_step(child) && block) { | 172 | if (enable_single_step(child) && block) { |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 5f181742e8f9..abce34d5c79d 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -343,3 +343,4 @@ ENTRY(sys_call_table) | |||
343 | .long sys_name_to_handle_at | 343 | .long sys_name_to_handle_at |
344 | .long sys_open_by_handle_at | 344 | .long sys_open_by_handle_at |
345 | .long sys_clock_adjtime | 345 | .long sys_clock_adjtime |
346 | .long sys_syncfs | ||
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 7e4515957a1c..8927486a4649 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c | |||
@@ -39,7 +39,7 @@ int __ref arch_register_cpu(int num) | |||
39 | /* | 39 | /* |
40 | * CPU0 cannot be offlined due to several | 40 | * CPU0 cannot be offlined due to several |
41 | * restrictions and assumptions in kernel. This basically | 41 | * restrictions and assumptions in kernel. This basically |
42 | * doesnt add a control file, one cannot attempt to offline | 42 | * doesn't add a control file, one cannot attempt to offline |
43 | * BSP. | 43 | * BSP. |
44 | * | 44 | * |
45 | * Also certain PCI quirks require not to enable hotplug control | 45 | * Also certain PCI quirks require not to enable hotplug control |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index a375616d77f7..a91ae7709b49 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -2,39 +2,41 @@ | |||
2 | #include <linux/memblock.h> | 2 | #include <linux/memblock.h> |
3 | 3 | ||
4 | #include <asm/trampoline.h> | 4 | #include <asm/trampoline.h> |
5 | #include <asm/cacheflush.h> | ||
5 | #include <asm/pgtable.h> | 6 | #include <asm/pgtable.h> |
6 | 7 | ||
7 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) | 8 | unsigned char *x86_trampoline_base; |
8 | #define __trampinit | ||
9 | #define __trampinitdata | ||
10 | #else | ||
11 | #define __trampinit __cpuinit | ||
12 | #define __trampinitdata __cpuinitdata | ||
13 | #endif | ||
14 | 9 | ||
15 | /* ready for x86_64 and x86 */ | 10 | void __init setup_trampolines(void) |
16 | unsigned char *__trampinitdata trampoline_base; | ||
17 | |||
18 | void __init reserve_trampoline_memory(void) | ||
19 | { | 11 | { |
20 | phys_addr_t mem; | 12 | phys_addr_t mem; |
13 | size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); | ||
21 | 14 | ||
22 | /* Has to be in very low memory so we can execute real-mode AP code. */ | 15 | /* Has to be in very low memory so we can execute real-mode AP code. */ |
23 | mem = memblock_find_in_range(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); | 16 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); |
24 | if (mem == MEMBLOCK_ERROR) | 17 | if (mem == MEMBLOCK_ERROR) |
25 | panic("Cannot allocate trampoline\n"); | 18 | panic("Cannot allocate trampoline\n"); |
26 | 19 | ||
27 | trampoline_base = __va(mem); | 20 | x86_trampoline_base = __va(mem); |
28 | memblock_x86_reserve_range(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); | 21 | memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); |
22 | |||
23 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | ||
24 | x86_trampoline_base, (unsigned long long)mem, size); | ||
25 | |||
26 | memcpy(x86_trampoline_base, x86_trampoline_start, size); | ||
29 | } | 27 | } |
30 | 28 | ||
31 | /* | 29 | /* |
32 | * Currently trivial. Write the real->protected mode | 30 | * setup_trampolines() gets called very early, to guarantee the |
33 | * bootstrap into the page concerned. The caller | 31 | * availability of low memory. This is before the proper kernel page |
34 | * has made sure it's suitably aligned. | 32 | * tables are set up, so we cannot set page permissions in that |
33 | * function. Thus, we use an arch_initcall instead. | ||
35 | */ | 34 | */ |
36 | unsigned long __trampinit setup_trampoline(void) | 35 | static int __init configure_trampolines(void) |
37 | { | 36 | { |
38 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); | 37 | size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); |
39 | return virt_to_phys(trampoline_base); | 38 | |
39 | set_memory_x((unsigned long)x86_trampoline_base, size >> PAGE_SHIFT); | ||
40 | return 0; | ||
40 | } | 41 | } |
42 | arch_initcall(configure_trampolines); | ||
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 8508237e8e43..451c0a7ef7fd 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S | |||
@@ -32,9 +32,11 @@ | |||
32 | #include <asm/segment.h> | 32 | #include <asm/segment.h> |
33 | #include <asm/page_types.h> | 33 | #include <asm/page_types.h> |
34 | 34 | ||
35 | /* We can free up trampoline after bootup if cpu hotplug is not supported. */ | 35 | #ifdef CONFIG_SMP |
36 | __CPUINITRODATA | 36 | |
37 | .code16 | 37 | .section ".x86_trampoline","a" |
38 | .balign PAGE_SIZE | ||
39 | .code16 | ||
38 | 40 | ||
39 | ENTRY(trampoline_data) | 41 | ENTRY(trampoline_data) |
40 | r_base = . | 42 | r_base = . |
@@ -44,7 +46,7 @@ r_base = . | |||
44 | 46 | ||
45 | cli # We should be safe anyway | 47 | cli # We should be safe anyway |
46 | 48 | ||
47 | movl $0xA5A5A5A5, trampoline_data - r_base | 49 | movl $0xA5A5A5A5, trampoline_status - r_base |
48 | # write marker for master knows we're running | 50 | # write marker for master knows we're running |
49 | 51 | ||
50 | /* GDT tables in non default location kernel can be beyond 16MB and | 52 | /* GDT tables in non default location kernel can be beyond 16MB and |
@@ -72,5 +74,10 @@ boot_idt_descr: | |||
72 | .word 0 # idt limit = 0 | 74 | .word 0 # idt limit = 0 |
73 | .long 0 # idt base = 0L | 75 | .long 0 # idt base = 0L |
74 | 76 | ||
77 | ENTRY(trampoline_status) | ||
78 | .long 0 | ||
79 | |||
75 | .globl trampoline_end | 80 | .globl trampoline_end |
76 | trampoline_end: | 81 | trampoline_end: |
82 | |||
83 | #endif /* CONFIG_SMP */ | ||
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 075d130efcf9..09ff51799e96 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S | |||
@@ -32,13 +32,9 @@ | |||
32 | #include <asm/segment.h> | 32 | #include <asm/segment.h> |
33 | #include <asm/processor-flags.h> | 33 | #include <asm/processor-flags.h> |
34 | 34 | ||
35 | #ifdef CONFIG_ACPI_SLEEP | 35 | .section ".x86_trampoline","a" |
36 | .section .rodata, "a", @progbits | 36 | .balign PAGE_SIZE |
37 | #else | 37 | .code16 |
38 | /* We can free up the trampoline after bootup if cpu hotplug is not supported. */ | ||
39 | __CPUINITRODATA | ||
40 | #endif | ||
41 | .code16 | ||
42 | 38 | ||
43 | ENTRY(trampoline_data) | 39 | ENTRY(trampoline_data) |
44 | r_base = . | 40 | r_base = . |
@@ -50,7 +46,7 @@ r_base = . | |||
50 | mov %ax, %ss | 46 | mov %ax, %ss |
51 | 47 | ||
52 | 48 | ||
53 | movl $0xA5A5A5A5, trampoline_data - r_base | 49 | movl $0xA5A5A5A5, trampoline_status - r_base |
54 | # write marker for master knows we're running | 50 | # write marker for master knows we're running |
55 | 51 | ||
56 | # Setup stack | 52 | # Setup stack |
@@ -64,10 +60,13 @@ r_base = . | |||
64 | movzx %ax, %esi # Find the 32bit trampoline location | 60 | movzx %ax, %esi # Find the 32bit trampoline location |
65 | shll $4, %esi | 61 | shll $4, %esi |
66 | 62 | ||
67 | # Fixup the vectors | 63 | # Fixup the absolute vectors |
68 | addl %esi, startup_32_vector - r_base | 64 | leal (startup_32 - r_base)(%esi), %eax |
69 | addl %esi, startup_64_vector - r_base | 65 | movl %eax, startup_32_vector - r_base |
70 | addl %esi, tgdt + 2 - r_base # Fixup the gdt pointer | 66 | leal (startup_64 - r_base)(%esi), %eax |
67 | movl %eax, startup_64_vector - r_base | ||
68 | leal (tgdt - r_base)(%esi), %eax | ||
69 | movl %eax, (tgdt + 2 - r_base) | ||
71 | 70 | ||
72 | /* | 71 | /* |
73 | * GDT tables in non default location kernel can be beyond 16MB and | 72 | * GDT tables in non default location kernel can be beyond 16MB and |
@@ -129,6 +128,7 @@ no_longmode: | |||
129 | jmp no_longmode | 128 | jmp no_longmode |
130 | #include "verify_cpu.S" | 129 | #include "verify_cpu.S" |
131 | 130 | ||
131 | .balign 4 | ||
132 | # Careful these need to be in the same 64K segment as the above; | 132 | # Careful these need to be in the same 64K segment as the above; |
133 | tidt: | 133 | tidt: |
134 | .word 0 # idt limit = 0 | 134 | .word 0 # idt limit = 0 |
@@ -156,6 +156,10 @@ startup_64_vector: | |||
156 | .long startup_64 - r_base | 156 | .long startup_64 - r_base |
157 | .word __KERNEL_CS, 0 | 157 | .word __KERNEL_CS, 0 |
158 | 158 | ||
159 | .balign 4 | ||
160 | ENTRY(trampoline_status) | ||
161 | .long 0 | ||
162 | |||
159 | trampoline_stack: | 163 | trampoline_stack: |
160 | .org 0x1000 | 164 | .org 0x1000 |
161 | trampoline_stack_end: | 165 | trampoline_stack_end: |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ffe5755caa8b..9335bf7dd2e7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -427,7 +427,7 @@ unsigned long native_calibrate_tsc(void) | |||
427 | * the delta to the previous read. We keep track of the min | 427 | * the delta to the previous read. We keep track of the min |
428 | * and max values of that delta. The delta is mostly defined | 428 | * and max values of that delta. The delta is mostly defined |
429 | * by the IO time of the PIT access, so we can detect when a | 429 | * by the IO time of the PIT access, so we can detect when a |
430 | * SMI/SMM disturbance happend between the two reads. If the | 430 | * SMI/SMM disturbance happened between the two reads. If the |
431 | * maximum time is significantly larger than the minimum time, | 431 | * maximum time is significantly larger than the minimum time, |
432 | * then we discard the result and have another try. | 432 | * then we discard the result and have another try. |
433 | * | 433 | * |
@@ -900,7 +900,7 @@ static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); | |||
900 | * timer based, instead of loop based, we don't block the boot | 900 | * timer based, instead of loop based, we don't block the boot |
901 | * process while this longer calibration is done. | 901 | * process while this longer calibration is done. |
902 | * | 902 | * |
903 | * If there are any calibration anomolies (too many SMIs, etc), | 903 | * If there are any calibration anomalies (too many SMIs, etc), |
904 | * or the refined calibration is off by 1% of the fast early | 904 | * or the refined calibration is off by 1% of the fast early |
905 | * calibration, we throw out the new calibration and use the | 905 | * calibration, we throw out the new calibration and use the |
906 | * early calibration. | 906 | * early calibration. |
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 0edefc19a113..b9242bacbe59 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S | |||
@@ -18,7 +18,7 @@ | |||
18 | * This file is expected to run in 32bit code. Currently: | 18 | * This file is expected to run in 32bit code. Currently: |
19 | * | 19 | * |
20 | * arch/x86/boot/compressed/head_64.S: Boot cpu verification | 20 | * arch/x86/boot/compressed/head_64.S: Boot cpu verification |
21 | * arch/x86/kernel/trampoline_64.S: secondary processor verfication | 21 | * arch/x86/kernel/trampoline_64.S: secondary processor verification |
22 | * arch/x86/kernel/head_32.S: processor startup | 22 | * arch/x86/kernel/head_32.S: processor startup |
23 | * | 23 | * |
24 | * verify_cpu, returns the status of longmode and SSE in register %eax. | 24 | * verify_cpu, returns the status of longmode and SSE in register %eax. |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0381e1f3baed..624a2016198e 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -231,7 +231,7 @@ SECTIONS | |||
231 | * output PHDR, so the next output section - .init.text - should | 231 | * output PHDR, so the next output section - .init.text - should |
232 | * start another segment - init. | 232 | * start another segment - init. |
233 | */ | 233 | */ |
234 | PERCPU_VADDR(0, :percpu) | 234 | PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) |
235 | #endif | 235 | #endif |
236 | 236 | ||
237 | INIT_TEXT_SECTION(PAGE_SIZE) | 237 | INIT_TEXT_SECTION(PAGE_SIZE) |
@@ -241,6 +241,18 @@ SECTIONS | |||
241 | 241 | ||
242 | INIT_DATA_SECTION(16) | 242 | INIT_DATA_SECTION(16) |
243 | 243 | ||
244 | /* | ||
245 | * Code and data for a variety of lowlevel trampolines, to be | ||
246 | * copied into base memory (< 1 MiB) during initialization. | ||
247 | * Since it is copied early, the main copy can be discarded | ||
248 | * afterwards. | ||
249 | */ | ||
250 | .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) { | ||
251 | x86_trampoline_start = .; | ||
252 | *(.x86_trampoline) | ||
253 | x86_trampoline_end = .; | ||
254 | } | ||
255 | |||
244 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { | 256 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { |
245 | __x86_cpu_dev_start = .; | 257 | __x86_cpu_dev_start = .; |
246 | *(.x86_cpu_dev.init) | 258 | *(.x86_cpu_dev.init) |
@@ -292,6 +304,7 @@ SECTIONS | |||
292 | *(.iommu_table) | 304 | *(.iommu_table) |
293 | __iommu_table_end = .; | 305 | __iommu_table_end = .; |
294 | } | 306 | } |
307 | |||
295 | . = ALIGN(8); | 308 | . = ALIGN(8); |
296 | /* | 309 | /* |
297 | * .exit.text is discard at runtime, not link time, to deal with | 310 | * .exit.text is discard at runtime, not link time, to deal with |
@@ -306,7 +319,7 @@ SECTIONS | |||
306 | } | 319 | } |
307 | 320 | ||
308 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) | 321 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) |
309 | PERCPU(PAGE_SIZE) | 322 | PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE) |
310 | #endif | 323 | #endif |
311 | 324 | ||
312 | . = ALIGN(PAGE_SIZE); | 325 | . = ALIGN(PAGE_SIZE); |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 547128546cc3..a3911343976b 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -53,7 +53,7 @@ void __sanitize_i387_state(struct task_struct *tsk) | |||
53 | 53 | ||
54 | /* | 54 | /* |
55 | * None of the feature bits are in init state. So nothing else | 55 | * None of the feature bits are in init state. So nothing else |
56 | * to do for us, as the memory layout is upto date. | 56 | * to do for us, as the memory layout is up to date. |
57 | */ | 57 | */ |
58 | if ((xstate_bv & pcntxt_mask) == pcntxt_mask) | 58 | if ((xstate_bv & pcntxt_mask) == pcntxt_mask) |
59 | return; | 59 | return; |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index caf966781d25..0ad47b819a8b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -76,6 +76,7 @@ | |||
76 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 76 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
77 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 77 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
78 | /* Misc flags */ | 78 | /* Misc flags */ |
79 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ | ||
79 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ | 80 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ |
80 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ | 81 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ |
81 | #define Undefined (1<<25) /* No Such Instruction */ | 82 | #define Undefined (1<<25) /* No Such Instruction */ |
@@ -877,7 +878,8 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | |||
877 | if (selector & 1 << 2) { | 878 | if (selector & 1 << 2) { |
878 | struct desc_struct desc; | 879 | struct desc_struct desc; |
879 | memset (dt, 0, sizeof *dt); | 880 | memset (dt, 0, sizeof *dt); |
880 | if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) | 881 | if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, |
882 | ctxt->vcpu)) | ||
881 | return; | 883 | return; |
882 | 884 | ||
883 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | 885 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ |
@@ -929,6 +931,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
929 | return ret; | 931 | return ret; |
930 | } | 932 | } |
931 | 933 | ||
934 | /* Does not support long mode */ | ||
932 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 935 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
933 | struct x86_emulate_ops *ops, | 936 | struct x86_emulate_ops *ops, |
934 | u16 selector, int seg) | 937 | u16 selector, int seg) |
@@ -1040,7 +1043,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1040 | } | 1043 | } |
1041 | load: | 1044 | load: |
1042 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | 1045 | ops->set_segment_selector(selector, seg, ctxt->vcpu); |
1043 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | 1046 | ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); |
1044 | return X86EMUL_CONTINUE; | 1047 | return X86EMUL_CONTINUE; |
1045 | exception: | 1048 | exception: |
1046 | emulate_exception(ctxt, err_vec, err_code, true); | 1049 | emulate_exception(ctxt, err_vec, err_code, true); |
@@ -1560,7 +1563,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | |||
1560 | struct desc_struct *ss) | 1563 | struct desc_struct *ss) |
1561 | { | 1564 | { |
1562 | memset(cs, 0, sizeof(struct desc_struct)); | 1565 | memset(cs, 0, sizeof(struct desc_struct)); |
1563 | ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu); | 1566 | ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); |
1564 | memset(ss, 0, sizeof(struct desc_struct)); | 1567 | memset(ss, 0, sizeof(struct desc_struct)); |
1565 | 1568 | ||
1566 | cs->l = 0; /* will be adjusted later */ | 1569 | cs->l = 0; /* will be adjusted later */ |
@@ -1607,9 +1610,9 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1607 | cs.d = 0; | 1610 | cs.d = 0; |
1608 | cs.l = 1; | 1611 | cs.l = 1; |
1609 | } | 1612 | } |
1610 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); | 1613 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); |
1611 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1614 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1612 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | 1615 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); |
1613 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | 1616 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); |
1614 | 1617 | ||
1615 | c->regs[VCPU_REGS_RCX] = c->eip; | 1618 | c->regs[VCPU_REGS_RCX] = c->eip; |
@@ -1679,9 +1682,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1679 | cs.l = 1; | 1682 | cs.l = 1; |
1680 | } | 1683 | } |
1681 | 1684 | ||
1682 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); | 1685 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); |
1683 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1686 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1684 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | 1687 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); |
1685 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | 1688 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); |
1686 | 1689 | ||
1687 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); | 1690 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); |
@@ -1736,9 +1739,9 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1736 | cs_sel |= SELECTOR_RPL_MASK; | 1739 | cs_sel |= SELECTOR_RPL_MASK; |
1737 | ss_sel |= SELECTOR_RPL_MASK; | 1740 | ss_sel |= SELECTOR_RPL_MASK; |
1738 | 1741 | ||
1739 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); | 1742 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); |
1740 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1743 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1741 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | 1744 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); |
1742 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | 1745 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); |
1743 | 1746 | ||
1744 | c->eip = c->regs[VCPU_REGS_RDX]; | 1747 | c->eip = c->regs[VCPU_REGS_RDX]; |
@@ -1764,24 +1767,28 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
1764 | u16 port, u16 len) | 1767 | u16 port, u16 len) |
1765 | { | 1768 | { |
1766 | struct desc_struct tr_seg; | 1769 | struct desc_struct tr_seg; |
1770 | u32 base3; | ||
1767 | int r; | 1771 | int r; |
1768 | u16 io_bitmap_ptr; | 1772 | u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; |
1769 | u8 perm, bit_idx = port & 0x7; | ||
1770 | unsigned mask = (1 << len) - 1; | 1773 | unsigned mask = (1 << len) - 1; |
1774 | unsigned long base; | ||
1771 | 1775 | ||
1772 | ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu); | 1776 | ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); |
1773 | if (!tr_seg.p) | 1777 | if (!tr_seg.p) |
1774 | return false; | 1778 | return false; |
1775 | if (desc_limit_scaled(&tr_seg) < 103) | 1779 | if (desc_limit_scaled(&tr_seg) < 103) |
1776 | return false; | 1780 | return false; |
1777 | r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2, | 1781 | base = get_desc_base(&tr_seg); |
1778 | ctxt->vcpu, NULL); | 1782 | #ifdef CONFIG_X86_64 |
1783 | base |= ((u64)base3) << 32; | ||
1784 | #endif | ||
1785 | r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); | ||
1779 | if (r != X86EMUL_CONTINUE) | 1786 | if (r != X86EMUL_CONTINUE) |
1780 | return false; | 1787 | return false; |
1781 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) | 1788 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) |
1782 | return false; | 1789 | return false; |
1783 | r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8, | 1790 | r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, |
1784 | &perm, 1, ctxt->vcpu, NULL); | 1791 | NULL); |
1785 | if (r != X86EMUL_CONTINUE) | 1792 | if (r != X86EMUL_CONTINUE) |
1786 | return false; | 1793 | return false; |
1787 | if ((perm >> bit_idx) & mask) | 1794 | if ((perm >> bit_idx) & mask) |
@@ -2126,7 +2133,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2126 | } | 2133 | } |
2127 | 2134 | ||
2128 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | 2135 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); |
2129 | ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); | 2136 | ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); |
2130 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | 2137 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); |
2131 | 2138 | ||
2132 | if (has_error_code) { | 2139 | if (has_error_code) { |
@@ -2365,7 +2372,8 @@ static struct group_dual group7 = { { | |||
2365 | D(SrcMem16 | ModRM | Mov | Priv), | 2372 | D(SrcMem16 | ModRM | Mov | Priv), |
2366 | D(SrcMem | ModRM | ByteOp | Priv | NoAccess), | 2373 | D(SrcMem | ModRM | ByteOp | Priv | NoAccess), |
2367 | }, { | 2374 | }, { |
2368 | D(SrcNone | ModRM | Priv), N, N, D(SrcNone | ModRM | Priv), | 2375 | D(SrcNone | ModRM | Priv | VendorSpecific), N, |
2376 | N, D(SrcNone | ModRM | Priv | VendorSpecific), | ||
2369 | D(SrcNone | ModRM | DstMem | Mov), N, | 2377 | D(SrcNone | ModRM | DstMem | Mov), N, |
2370 | D(SrcMem16 | ModRM | Mov | Priv), N, | 2378 | D(SrcMem16 | ModRM | Mov | Priv), N, |
2371 | } }; | 2379 | } }; |
@@ -2489,7 +2497,7 @@ static struct opcode opcode_table[256] = { | |||
2489 | static struct opcode twobyte_table[256] = { | 2497 | static struct opcode twobyte_table[256] = { |
2490 | /* 0x00 - 0x0F */ | 2498 | /* 0x00 - 0x0F */ |
2491 | N, GD(0, &group7), N, N, | 2499 | N, GD(0, &group7), N, N, |
2492 | N, D(ImplicitOps), D(ImplicitOps | Priv), N, | 2500 | N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, |
2493 | D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, | 2501 | D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, |
2494 | N, D(ImplicitOps | ModRM), N, N, | 2502 | N, D(ImplicitOps | ModRM), N, N, |
2495 | /* 0x10 - 0x1F */ | 2503 | /* 0x10 - 0x1F */ |
@@ -2502,7 +2510,8 @@ static struct opcode twobyte_table[256] = { | |||
2502 | /* 0x30 - 0x3F */ | 2510 | /* 0x30 - 0x3F */ |
2503 | D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), | 2511 | D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), |
2504 | D(ImplicitOps | Priv), N, | 2512 | D(ImplicitOps | Priv), N, |
2505 | D(ImplicitOps), D(ImplicitOps | Priv), N, N, | 2513 | D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), |
2514 | N, N, | ||
2506 | N, N, N, N, N, N, N, N, | 2515 | N, N, N, N, N, N, N, N, |
2507 | /* 0x40 - 0x4F */ | 2516 | /* 0x40 - 0x4F */ |
2508 | X16(D(DstReg | SrcMem | ModRM | Mov)), | 2517 | X16(D(DstReg | SrcMem | ModRM | Mov)), |
@@ -2741,6 +2750,9 @@ done_prefixes: | |||
2741 | if (c->d == 0 || (c->d & Undefined)) | 2750 | if (c->d == 0 || (c->d & Undefined)) |
2742 | return -1; | 2751 | return -1; |
2743 | 2752 | ||
2753 | if (!(c->d & VendorSpecific) && ctxt->only_vendor_specific_insn) | ||
2754 | return -1; | ||
2755 | |||
2744 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) | 2756 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) |
2745 | c->op_bytes = 8; | 2757 | c->op_bytes = 8; |
2746 | 2758 | ||
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 3cece05e4ac4..19fe855e7953 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -62,9 +62,6 @@ static void pic_unlock(struct kvm_pic *s) | |||
62 | } | 62 | } |
63 | 63 | ||
64 | if (!found) | 64 | if (!found) |
65 | found = s->kvm->bsp_vcpu; | ||
66 | |||
67 | if (!found) | ||
68 | return; | 65 | return; |
69 | 66 | ||
70 | kvm_make_request(KVM_REQ_EVENT, found); | 67 | kvm_make_request(KVM_REQ_EVENT, found); |
@@ -75,7 +72,6 @@ static void pic_unlock(struct kvm_pic *s) | |||
75 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 72 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
76 | { | 73 | { |
77 | s->isr &= ~(1 << irq); | 74 | s->isr &= ~(1 << irq); |
78 | s->isr_ack |= (1 << irq); | ||
79 | if (s != &s->pics_state->pics[0]) | 75 | if (s != &s->pics_state->pics[0]) |
80 | irq += 8; | 76 | irq += 8; |
81 | /* | 77 | /* |
@@ -89,16 +85,6 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
89 | pic_lock(s->pics_state); | 85 | pic_lock(s->pics_state); |
90 | } | 86 | } |
91 | 87 | ||
92 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | ||
93 | { | ||
94 | struct kvm_pic *s = pic_irqchip(kvm); | ||
95 | |||
96 | pic_lock(s); | ||
97 | s->pics[0].isr_ack = 0xff; | ||
98 | s->pics[1].isr_ack = 0xff; | ||
99 | pic_unlock(s); | ||
100 | } | ||
101 | |||
102 | /* | 88 | /* |
103 | * set irq level. If an edge is detected, then the IRR is set to 1 | 89 | * set irq level. If an edge is detected, then the IRR is set to 1 |
104 | */ | 90 | */ |
@@ -281,7 +267,6 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
281 | s->irr = 0; | 267 | s->irr = 0; |
282 | s->imr = 0; | 268 | s->imr = 0; |
283 | s->isr = 0; | 269 | s->isr = 0; |
284 | s->isr_ack = 0xff; | ||
285 | s->priority_add = 0; | 270 | s->priority_add = 0; |
286 | s->irq_base = 0; | 271 | s->irq_base = 0; |
287 | s->read_reg_select = 0; | 272 | s->read_reg_select = 0; |
@@ -545,15 +530,11 @@ static int picdev_read(struct kvm_io_device *this, | |||
545 | */ | 530 | */ |
546 | static void pic_irq_request(struct kvm *kvm, int level) | 531 | static void pic_irq_request(struct kvm *kvm, int level) |
547 | { | 532 | { |
548 | struct kvm_vcpu *vcpu = kvm->bsp_vcpu; | ||
549 | struct kvm_pic *s = pic_irqchip(kvm); | 533 | struct kvm_pic *s = pic_irqchip(kvm); |
550 | int irq = pic_get_irq(&s->pics[0]); | ||
551 | 534 | ||
552 | s->output = level; | 535 | if (!s->output) |
553 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | ||
554 | s->pics[0].isr_ack &= ~(1 << irq); | ||
555 | s->wakeup_needed = true; | 536 | s->wakeup_needed = true; |
556 | } | 537 | s->output = level; |
557 | } | 538 | } |
558 | 539 | ||
559 | static const struct kvm_io_device_ops picdev_ops = { | 540 | static const struct kvm_io_device_ops picdev_ops = { |
@@ -575,8 +556,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
575 | s->pics[1].elcr_mask = 0xde; | 556 | s->pics[1].elcr_mask = 0xde; |
576 | s->pics[0].pics_state = s; | 557 | s->pics[0].pics_state = s; |
577 | s->pics[1].pics_state = s; | 558 | s->pics[1].pics_state = s; |
578 | s->pics[0].isr_ack = 0xff; | ||
579 | s->pics[1].isr_ack = 0xff; | ||
580 | 559 | ||
581 | /* | 560 | /* |
582 | * Initialize PIO device | 561 | * Initialize PIO device |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 93cf9d0d3653..2b2255b1f04b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -417,10 +417,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
417 | case APIC_DM_INIT: | 417 | case APIC_DM_INIT: |
418 | if (level) { | 418 | if (level) { |
419 | result = 1; | 419 | result = 1; |
420 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | ||
421 | printk(KERN_DEBUG | ||
422 | "INIT on a runnable vcpu %d\n", | ||
423 | vcpu->vcpu_id); | ||
424 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 420 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; |
425 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 421 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
426 | kvm_vcpu_kick(vcpu); | 422 | kvm_vcpu_kick(vcpu); |
@@ -875,8 +871,8 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) | |||
875 | 871 | ||
876 | hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); | 872 | hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); |
877 | 873 | ||
878 | if (vcpu->arch.apic->regs_page) | 874 | if (vcpu->arch.apic->regs) |
879 | __free_page(vcpu->arch.apic->regs_page); | 875 | free_page((unsigned long)vcpu->arch.apic->regs); |
880 | 876 | ||
881 | kfree(vcpu->arch.apic); | 877 | kfree(vcpu->arch.apic); |
882 | } | 878 | } |
@@ -1065,13 +1061,12 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
1065 | 1061 | ||
1066 | vcpu->arch.apic = apic; | 1062 | vcpu->arch.apic = apic; |
1067 | 1063 | ||
1068 | apic->regs_page = alloc_page(GFP_KERNEL|__GFP_ZERO); | 1064 | apic->regs = (void *)get_zeroed_page(GFP_KERNEL); |
1069 | if (apic->regs_page == NULL) { | 1065 | if (!apic->regs) { |
1070 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", | 1066 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", |
1071 | vcpu->vcpu_id); | 1067 | vcpu->vcpu_id); |
1072 | goto nomem_free_apic; | 1068 | goto nomem_free_apic; |
1073 | } | 1069 | } |
1074 | apic->regs = page_address(apic->regs_page); | ||
1075 | apic->vcpu = vcpu; | 1070 | apic->vcpu = vcpu; |
1076 | 1071 | ||
1077 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, | 1072 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index f5fe32c5edad..52c9e6b9e725 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -13,7 +13,6 @@ struct kvm_lapic { | |||
13 | u32 divide_count; | 13 | u32 divide_count; |
14 | struct kvm_vcpu *vcpu; | 14 | struct kvm_vcpu *vcpu; |
15 | bool irr_pending; | 15 | bool irr_pending; |
16 | struct page *regs_page; | ||
17 | void *regs; | 16 | void *regs; |
18 | gpa_t vapic_addr; | 17 | gpa_t vapic_addr; |
19 | struct page *vapic_page; | 18 | struct page *vapic_page; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f02b8edc3d44..22fae7593ee7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -111,9 +111,6 @@ module_param(oos_shadow, bool, 0644); | |||
111 | #define PT64_LEVEL_SHIFT(level) \ | 111 | #define PT64_LEVEL_SHIFT(level) \ |
112 | (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS) | 112 | (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS) |
113 | 113 | ||
114 | #define PT64_LEVEL_MASK(level) \ | ||
115 | (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level)) | ||
116 | |||
117 | #define PT64_INDEX(address, level)\ | 114 | #define PT64_INDEX(address, level)\ |
118 | (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) | 115 | (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) |
119 | 116 | ||
@@ -123,8 +120,6 @@ module_param(oos_shadow, bool, 0644); | |||
123 | #define PT32_LEVEL_SHIFT(level) \ | 120 | #define PT32_LEVEL_SHIFT(level) \ |
124 | (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS) | 121 | (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS) |
125 | 122 | ||
126 | #define PT32_LEVEL_MASK(level) \ | ||
127 | (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level)) | ||
128 | #define PT32_LVL_OFFSET_MASK(level) \ | 123 | #define PT32_LVL_OFFSET_MASK(level) \ |
129 | (PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 124 | (PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
130 | * PT32_LEVEL_BITS))) - 1)) | 125 | * PT32_LEVEL_BITS))) - 1)) |
@@ -379,15 +374,15 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, | |||
379 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | 374 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, |
380 | int min) | 375 | int min) |
381 | { | 376 | { |
382 | struct page *page; | 377 | void *page; |
383 | 378 | ||
384 | if (cache->nobjs >= min) | 379 | if (cache->nobjs >= min) |
385 | return 0; | 380 | return 0; |
386 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | 381 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
387 | page = alloc_page(GFP_KERNEL); | 382 | page = (void *)__get_free_page(GFP_KERNEL); |
388 | if (!page) | 383 | if (!page) |
389 | return -ENOMEM; | 384 | return -ENOMEM; |
390 | cache->objects[cache->nobjs++] = page_address(page); | 385 | cache->objects[cache->nobjs++] = page; |
391 | } | 386 | } |
392 | return 0; | 387 | return 0; |
393 | } | 388 | } |
@@ -554,13 +549,23 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | |||
554 | return ret; | 549 | return ret; |
555 | } | 550 | } |
556 | 551 | ||
557 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 552 | static struct kvm_memory_slot * |
553 | gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
554 | bool no_dirty_log) | ||
558 | { | 555 | { |
559 | struct kvm_memory_slot *slot; | 556 | struct kvm_memory_slot *slot; |
560 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 557 | |
561 | if (slot && slot->dirty_bitmap) | 558 | slot = gfn_to_memslot(vcpu->kvm, gfn); |
562 | return true; | 559 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || |
563 | return false; | 560 | (no_dirty_log && slot->dirty_bitmap)) |
561 | slot = NULL; | ||
562 | |||
563 | return slot; | ||
564 | } | ||
565 | |||
566 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) | ||
567 | { | ||
568 | return gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); | ||
564 | } | 569 | } |
565 | 570 | ||
566 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 571 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
@@ -1032,9 +1037,9 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1032 | ASSERT(is_empty_shadow_page(sp->spt)); | 1037 | ASSERT(is_empty_shadow_page(sp->spt)); |
1033 | hlist_del(&sp->hash_link); | 1038 | hlist_del(&sp->hash_link); |
1034 | list_del(&sp->link); | 1039 | list_del(&sp->link); |
1035 | __free_page(virt_to_page(sp->spt)); | 1040 | free_page((unsigned long)sp->spt); |
1036 | if (!sp->role.direct) | 1041 | if (!sp->role.direct) |
1037 | __free_page(virt_to_page(sp->gfns)); | 1042 | free_page((unsigned long)sp->gfns); |
1038 | kmem_cache_free(mmu_page_header_cache, sp); | 1043 | kmem_cache_free(mmu_page_header_cache, sp); |
1039 | kvm_mod_used_mmu_pages(kvm, -1); | 1044 | kvm_mod_used_mmu_pages(kvm, -1); |
1040 | } | 1045 | } |
@@ -1199,6 +1204,13 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | |||
1199 | { | 1204 | { |
1200 | } | 1205 | } |
1201 | 1206 | ||
1207 | static void nonpaging_update_pte(struct kvm_vcpu *vcpu, | ||
1208 | struct kvm_mmu_page *sp, u64 *spte, | ||
1209 | const void *pte, unsigned long mmu_seq) | ||
1210 | { | ||
1211 | WARN_ON(1); | ||
1212 | } | ||
1213 | |||
1202 | #define KVM_PAGE_ARRAY_NR 16 | 1214 | #define KVM_PAGE_ARRAY_NR 16 |
1203 | 1215 | ||
1204 | struct kvm_mmu_pages { | 1216 | struct kvm_mmu_pages { |
@@ -2150,26 +2162,13 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
2150 | { | 2162 | { |
2151 | } | 2163 | } |
2152 | 2164 | ||
2153 | static struct kvm_memory_slot * | ||
2154 | pte_prefetch_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) | ||
2155 | { | ||
2156 | struct kvm_memory_slot *slot; | ||
2157 | |||
2158 | slot = gfn_to_memslot(vcpu->kvm, gfn); | ||
2159 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || | ||
2160 | (no_dirty_log && slot->dirty_bitmap)) | ||
2161 | slot = NULL; | ||
2162 | |||
2163 | return slot; | ||
2164 | } | ||
2165 | |||
2166 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | 2165 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, |
2167 | bool no_dirty_log) | 2166 | bool no_dirty_log) |
2168 | { | 2167 | { |
2169 | struct kvm_memory_slot *slot; | 2168 | struct kvm_memory_slot *slot; |
2170 | unsigned long hva; | 2169 | unsigned long hva; |
2171 | 2170 | ||
2172 | slot = pte_prefetch_gfn_to_memslot(vcpu, gfn, no_dirty_log); | 2171 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); |
2173 | if (!slot) { | 2172 | if (!slot) { |
2174 | get_page(bad_page); | 2173 | get_page(bad_page); |
2175 | return page_to_pfn(bad_page); | 2174 | return page_to_pfn(bad_page); |
@@ -2190,7 +2189,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | |||
2190 | gfn_t gfn; | 2189 | gfn_t gfn; |
2191 | 2190 | ||
2192 | gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); | 2191 | gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); |
2193 | if (!pte_prefetch_gfn_to_memslot(vcpu, gfn, access & ACC_WRITE_MASK)) | 2192 | if (!gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK)) |
2194 | return -1; | 2193 | return -1; |
2195 | 2194 | ||
2196 | ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start); | 2195 | ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start); |
@@ -2804,6 +2803,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu, | |||
2804 | context->prefetch_page = nonpaging_prefetch_page; | 2803 | context->prefetch_page = nonpaging_prefetch_page; |
2805 | context->sync_page = nonpaging_sync_page; | 2804 | context->sync_page = nonpaging_sync_page; |
2806 | context->invlpg = nonpaging_invlpg; | 2805 | context->invlpg = nonpaging_invlpg; |
2806 | context->update_pte = nonpaging_update_pte; | ||
2807 | context->root_level = 0; | 2807 | context->root_level = 0; |
2808 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 2808 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
2809 | context->root_hpa = INVALID_PAGE; | 2809 | context->root_hpa = INVALID_PAGE; |
@@ -2933,6 +2933,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
2933 | context->prefetch_page = paging64_prefetch_page; | 2933 | context->prefetch_page = paging64_prefetch_page; |
2934 | context->sync_page = paging64_sync_page; | 2934 | context->sync_page = paging64_sync_page; |
2935 | context->invlpg = paging64_invlpg; | 2935 | context->invlpg = paging64_invlpg; |
2936 | context->update_pte = paging64_update_pte; | ||
2936 | context->free = paging_free; | 2937 | context->free = paging_free; |
2937 | context->root_level = level; | 2938 | context->root_level = level; |
2938 | context->shadow_root_level = level; | 2939 | context->shadow_root_level = level; |
@@ -2961,6 +2962,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
2961 | context->prefetch_page = paging32_prefetch_page; | 2962 | context->prefetch_page = paging32_prefetch_page; |
2962 | context->sync_page = paging32_sync_page; | 2963 | context->sync_page = paging32_sync_page; |
2963 | context->invlpg = paging32_invlpg; | 2964 | context->invlpg = paging32_invlpg; |
2965 | context->update_pte = paging32_update_pte; | ||
2964 | context->root_level = PT32_ROOT_LEVEL; | 2966 | context->root_level = PT32_ROOT_LEVEL; |
2965 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 2967 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
2966 | context->root_hpa = INVALID_PAGE; | 2968 | context->root_hpa = INVALID_PAGE; |
@@ -2985,6 +2987,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
2985 | context->prefetch_page = nonpaging_prefetch_page; | 2987 | context->prefetch_page = nonpaging_prefetch_page; |
2986 | context->sync_page = nonpaging_sync_page; | 2988 | context->sync_page = nonpaging_sync_page; |
2987 | context->invlpg = nonpaging_invlpg; | 2989 | context->invlpg = nonpaging_invlpg; |
2990 | context->update_pte = nonpaging_update_pte; | ||
2988 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 2991 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
2989 | context->root_hpa = INVALID_PAGE; | 2992 | context->root_hpa = INVALID_PAGE; |
2990 | context->direct_map = true; | 2993 | context->direct_map = true; |
@@ -3089,8 +3092,6 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3089 | 3092 | ||
3090 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 3093 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) |
3091 | { | 3094 | { |
3092 | vcpu->arch.update_pte.pfn = bad_pfn; | ||
3093 | |||
3094 | if (mmu_is_nested(vcpu)) | 3095 | if (mmu_is_nested(vcpu)) |
3095 | return init_kvm_nested_mmu(vcpu); | 3096 | return init_kvm_nested_mmu(vcpu); |
3096 | else if (tdp_enabled) | 3097 | else if (tdp_enabled) |
@@ -3164,7 +3165,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
3164 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 3165 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
3165 | struct kvm_mmu_page *sp, | 3166 | struct kvm_mmu_page *sp, |
3166 | u64 *spte, | 3167 | u64 *spte, |
3167 | const void *new) | 3168 | const void *new, unsigned long mmu_seq) |
3168 | { | 3169 | { |
3169 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { | 3170 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { |
3170 | ++vcpu->kvm->stat.mmu_pde_zapped; | 3171 | ++vcpu->kvm->stat.mmu_pde_zapped; |
@@ -3172,10 +3173,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
3172 | } | 3173 | } |
3173 | 3174 | ||
3174 | ++vcpu->kvm->stat.mmu_pte_updated; | 3175 | ++vcpu->kvm->stat.mmu_pte_updated; |
3175 | if (!sp->role.cr4_pae) | 3176 | vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); |
3176 | paging32_update_pte(vcpu, sp, spte, new); | ||
3177 | else | ||
3178 | paging64_update_pte(vcpu, sp, spte, new); | ||
3179 | } | 3177 | } |
3180 | 3178 | ||
3181 | static bool need_remote_flush(u64 old, u64 new) | 3179 | static bool need_remote_flush(u64 old, u64 new) |
@@ -3210,28 +3208,6 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
3210 | return !!(spte && (*spte & shadow_accessed_mask)); | 3208 | return !!(spte && (*spte & shadow_accessed_mask)); |
3211 | } | 3209 | } |
3212 | 3210 | ||
3213 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
3214 | u64 gpte) | ||
3215 | { | ||
3216 | gfn_t gfn; | ||
3217 | pfn_t pfn; | ||
3218 | |||
3219 | if (!is_present_gpte(gpte)) | ||
3220 | return; | ||
3221 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | ||
3222 | |||
3223 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
3224 | smp_rmb(); | ||
3225 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
3226 | |||
3227 | if (is_error_pfn(pfn)) { | ||
3228 | kvm_release_pfn_clean(pfn); | ||
3229 | return; | ||
3230 | } | ||
3231 | vcpu->arch.update_pte.gfn = gfn; | ||
3232 | vcpu->arch.update_pte.pfn = pfn; | ||
3233 | } | ||
3234 | |||
3235 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | 3211 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) |
3236 | { | 3212 | { |
3237 | u64 *spte = vcpu->arch.last_pte_updated; | 3213 | u64 *spte = vcpu->arch.last_pte_updated; |
@@ -3253,21 +3229,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3253 | struct kvm_mmu_page *sp; | 3229 | struct kvm_mmu_page *sp; |
3254 | struct hlist_node *node; | 3230 | struct hlist_node *node; |
3255 | LIST_HEAD(invalid_list); | 3231 | LIST_HEAD(invalid_list); |
3256 | u64 entry, gentry; | 3232 | unsigned long mmu_seq; |
3257 | u64 *spte; | 3233 | u64 entry, gentry, *spte; |
3258 | unsigned offset = offset_in_page(gpa); | 3234 | unsigned pte_size, page_offset, misaligned, quadrant, offset; |
3259 | unsigned pte_size; | 3235 | int level, npte, invlpg_counter, r, flooded = 0; |
3260 | unsigned page_offset; | ||
3261 | unsigned misaligned; | ||
3262 | unsigned quadrant; | ||
3263 | int level; | ||
3264 | int flooded = 0; | ||
3265 | int npte; | ||
3266 | int r; | ||
3267 | int invlpg_counter; | ||
3268 | bool remote_flush, local_flush, zap_page; | 3236 | bool remote_flush, local_flush, zap_page; |
3269 | 3237 | ||
3270 | zap_page = remote_flush = local_flush = false; | 3238 | zap_page = remote_flush = local_flush = false; |
3239 | offset = offset_in_page(gpa); | ||
3271 | 3240 | ||
3272 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 3241 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
3273 | 3242 | ||
@@ -3275,9 +3244,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3275 | 3244 | ||
3276 | /* | 3245 | /* |
3277 | * Assume that the pte write on a page table of the same type | 3246 | * Assume that the pte write on a page table of the same type |
3278 | * as the current vcpu paging mode. This is nearly always true | 3247 | * as the current vcpu paging mode since we update the sptes only |
3279 | * (might be false while changing modes). Note it is verified later | 3248 | * when they have the same mode. |
3280 | * by update_pte(). | ||
3281 | */ | 3249 | */ |
3282 | if ((is_pae(vcpu) && bytes == 4) || !new) { | 3250 | if ((is_pae(vcpu) && bytes == 4) || !new) { |
3283 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | 3251 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ |
@@ -3303,15 +3271,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3303 | break; | 3271 | break; |
3304 | } | 3272 | } |
3305 | 3273 | ||
3306 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | 3274 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
3275 | smp_rmb(); | ||
3276 | |||
3307 | spin_lock(&vcpu->kvm->mmu_lock); | 3277 | spin_lock(&vcpu->kvm->mmu_lock); |
3308 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | 3278 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) |
3309 | gentry = 0; | 3279 | gentry = 0; |
3310 | kvm_mmu_access_page(vcpu, gfn); | ||
3311 | kvm_mmu_free_some_pages(vcpu); | 3280 | kvm_mmu_free_some_pages(vcpu); |
3312 | ++vcpu->kvm->stat.mmu_pte_write; | 3281 | ++vcpu->kvm->stat.mmu_pte_write; |
3313 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); | 3282 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); |
3314 | if (guest_initiated) { | 3283 | if (guest_initiated) { |
3284 | kvm_mmu_access_page(vcpu, gfn); | ||
3315 | if (gfn == vcpu->arch.last_pt_write_gfn | 3285 | if (gfn == vcpu->arch.last_pt_write_gfn |
3316 | && !last_updated_pte_accessed(vcpu)) { | 3286 | && !last_updated_pte_accessed(vcpu)) { |
3317 | ++vcpu->arch.last_pt_write_count; | 3287 | ++vcpu->arch.last_pt_write_count; |
@@ -3375,7 +3345,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3375 | if (gentry && | 3345 | if (gentry && |
3376 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3346 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
3377 | & mask.word)) | 3347 | & mask.word)) |
3378 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 3348 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, |
3349 | mmu_seq); | ||
3379 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3350 | if (!remote_flush && need_remote_flush(entry, *spte)) |
3380 | remote_flush = true; | 3351 | remote_flush = true; |
3381 | ++spte; | 3352 | ++spte; |
@@ -3385,10 +3356,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3385 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3356 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
3386 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); | 3357 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); |
3387 | spin_unlock(&vcpu->kvm->mmu_lock); | 3358 | spin_unlock(&vcpu->kvm->mmu_lock); |
3388 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { | ||
3389 | kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); | ||
3390 | vcpu->arch.update_pte.pfn = bad_pfn; | ||
3391 | } | ||
3392 | } | 3359 | } |
3393 | 3360 | ||
3394 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | 3361 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) |
@@ -3538,14 +3505,23 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
3538 | if (!test_bit(slot, sp->slot_bitmap)) | 3505 | if (!test_bit(slot, sp->slot_bitmap)) |
3539 | continue; | 3506 | continue; |
3540 | 3507 | ||
3541 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) | ||
3542 | continue; | ||
3543 | |||
3544 | pt = sp->spt; | 3508 | pt = sp->spt; |
3545 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 3509 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
3510 | if (!is_shadow_present_pte(pt[i]) || | ||
3511 | !is_last_spte(pt[i], sp->role.level)) | ||
3512 | continue; | ||
3513 | |||
3514 | if (is_large_pte(pt[i])) { | ||
3515 | drop_spte(kvm, &pt[i], | ||
3516 | shadow_trap_nonpresent_pte); | ||
3517 | --kvm->stat.lpages; | ||
3518 | continue; | ||
3519 | } | ||
3520 | |||
3546 | /* avoid RMW */ | 3521 | /* avoid RMW */ |
3547 | if (is_writable_pte(pt[i])) | 3522 | if (is_writable_pte(pt[i])) |
3548 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); | 3523 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); |
3524 | } | ||
3549 | } | 3525 | } |
3550 | kvm_flush_remote_tlbs(kvm); | 3526 | kvm_flush_remote_tlbs(kvm); |
3551 | } | 3527 | } |
@@ -3583,7 +3559,7 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
3583 | if (nr_to_scan == 0) | 3559 | if (nr_to_scan == 0) |
3584 | goto out; | 3560 | goto out; |
3585 | 3561 | ||
3586 | spin_lock(&kvm_lock); | 3562 | raw_spin_lock(&kvm_lock); |
3587 | 3563 | ||
3588 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3564 | list_for_each_entry(kvm, &vm_list, vm_list) { |
3589 | int idx, freed_pages; | 3565 | int idx, freed_pages; |
@@ -3606,7 +3582,7 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
3606 | if (kvm_freed) | 3582 | if (kvm_freed) |
3607 | list_move_tail(&kvm_freed->vm_list, &vm_list); | 3583 | list_move_tail(&kvm_freed->vm_list, &vm_list); |
3608 | 3584 | ||
3609 | spin_unlock(&kvm_lock); | 3585 | raw_spin_unlock(&kvm_lock); |
3610 | 3586 | ||
3611 | out: | 3587 | out: |
3612 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); | 3588 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bccc24c4181..c6397795d865 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -31,7 +31,6 @@ | |||
31 | #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) | 31 | #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) |
32 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | 32 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) |
33 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 33 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
34 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) | ||
35 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
36 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
37 | #define PT_MAX_FULL_LEVELS 4 | 36 | #define PT_MAX_FULL_LEVELS 4 |
@@ -48,7 +47,6 @@ | |||
48 | #define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl) | 47 | #define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl) |
49 | #define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl) | 48 | #define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl) |
50 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 49 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
51 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) | ||
52 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | 50 | #define PT_LEVEL_BITS PT32_LEVEL_BITS |
53 | #define PT_MAX_FULL_LEVELS 2 | 51 | #define PT_MAX_FULL_LEVELS 2 |
54 | #define CMPXCHG cmpxchg | 52 | #define CMPXCHG cmpxchg |
@@ -327,7 +325,7 @@ no_present: | |||
327 | } | 325 | } |
328 | 326 | ||
329 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 327 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
330 | u64 *spte, const void *pte) | 328 | u64 *spte, const void *pte, unsigned long mmu_seq) |
331 | { | 329 | { |
332 | pt_element_t gpte; | 330 | pt_element_t gpte; |
333 | unsigned pte_access; | 331 | unsigned pte_access; |
@@ -339,16 +337,16 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
339 | 337 | ||
340 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 338 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
341 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 339 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
342 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 340 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); |
341 | if (is_error_pfn(pfn)) { | ||
342 | kvm_release_pfn_clean(pfn); | ||
343 | return; | 343 | return; |
344 | pfn = vcpu->arch.update_pte.pfn; | 344 | } |
345 | if (is_error_pfn(pfn)) | 345 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
346 | return; | ||
347 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) | ||
348 | return; | 346 | return; |
349 | kvm_get_pfn(pfn); | 347 | |
350 | /* | 348 | /* |
351 | * we call mmu_set_spte() with host_writable = true beacuse that | 349 | * we call mmu_set_spte() with host_writable = true because that |
352 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 350 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). |
353 | */ | 351 | */ |
354 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | 352 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
@@ -829,7 +827,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
829 | #undef FNAME | 827 | #undef FNAME |
830 | #undef PT_BASE_ADDR_MASK | 828 | #undef PT_BASE_ADDR_MASK |
831 | #undef PT_INDEX | 829 | #undef PT_INDEX |
832 | #undef PT_LEVEL_MASK | ||
833 | #undef PT_LVL_ADDR_MASK | 830 | #undef PT_LVL_ADDR_MASK |
834 | #undef PT_LVL_OFFSET_MASK | 831 | #undef PT_LVL_OFFSET_MASK |
835 | #undef PT_LEVEL_BITS | 832 | #undef PT_LEVEL_BITS |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 63fec1531e89..6bb15d583e47 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -135,6 +135,8 @@ struct vcpu_svm { | |||
135 | 135 | ||
136 | u32 *msrpm; | 136 | u32 *msrpm; |
137 | 137 | ||
138 | ulong nmi_iret_rip; | ||
139 | |||
138 | struct nested_state nested; | 140 | struct nested_state nested; |
139 | 141 | ||
140 | bool nmi_singlestep; | 142 | bool nmi_singlestep; |
@@ -1153,8 +1155,10 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
1153 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | 1155 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); |
1154 | load_gs_index(svm->host.gs); | 1156 | load_gs_index(svm->host.gs); |
1155 | #else | 1157 | #else |
1158 | #ifdef CONFIG_X86_32_LAZY_GS | ||
1156 | loadsegment(gs, svm->host.gs); | 1159 | loadsegment(gs, svm->host.gs); |
1157 | #endif | 1160 | #endif |
1161 | #endif | ||
1158 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 1162 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
1159 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1163 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1160 | } | 1164 | } |
@@ -2653,6 +2657,7 @@ static int iret_interception(struct vcpu_svm *svm) | |||
2653 | ++svm->vcpu.stat.nmi_window_exits; | 2657 | ++svm->vcpu.stat.nmi_window_exits; |
2654 | clr_intercept(svm, INTERCEPT_IRET); | 2658 | clr_intercept(svm, INTERCEPT_IRET); |
2655 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2659 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
2660 | svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); | ||
2656 | return 1; | 2661 | return 1; |
2657 | } | 2662 | } |
2658 | 2663 | ||
@@ -3474,7 +3479,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
3474 | 3479 | ||
3475 | svm->int3_injected = 0; | 3480 | svm->int3_injected = 0; |
3476 | 3481 | ||
3477 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) { | 3482 | /* |
3483 | * If we've made progress since setting HF_IRET_MASK, we've | ||
3484 | * executed an IRET and can allow NMI injection. | ||
3485 | */ | ||
3486 | if ((svm->vcpu.arch.hflags & HF_IRET_MASK) | ||
3487 | && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { | ||
3478 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | 3488 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); |
3479 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | 3489 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); |
3480 | } | 3490 | } |
@@ -3641,19 +3651,30 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3641 | wrmsrl(MSR_GS_BASE, svm->host.gs_base); | 3651 | wrmsrl(MSR_GS_BASE, svm->host.gs_base); |
3642 | #else | 3652 | #else |
3643 | loadsegment(fs, svm->host.fs); | 3653 | loadsegment(fs, svm->host.fs); |
3654 | #ifndef CONFIG_X86_32_LAZY_GS | ||
3655 | loadsegment(gs, svm->host.gs); | ||
3656 | #endif | ||
3644 | #endif | 3657 | #endif |
3645 | 3658 | ||
3646 | reload_tss(vcpu); | 3659 | reload_tss(vcpu); |
3647 | 3660 | ||
3648 | local_irq_disable(); | 3661 | local_irq_disable(); |
3649 | 3662 | ||
3650 | stgi(); | ||
3651 | |||
3652 | vcpu->arch.cr2 = svm->vmcb->save.cr2; | 3663 | vcpu->arch.cr2 = svm->vmcb->save.cr2; |
3653 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | 3664 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; |
3654 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | 3665 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; |
3655 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | 3666 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; |
3656 | 3667 | ||
3668 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | ||
3669 | kvm_before_handle_nmi(&svm->vcpu); | ||
3670 | |||
3671 | stgi(); | ||
3672 | |||
3673 | /* Any pending NMI will happen here */ | ||
3674 | |||
3675 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | ||
3676 | kvm_after_handle_nmi(&svm->vcpu); | ||
3677 | |||
3657 | sync_cr8_to_lapic(vcpu); | 3678 | sync_cr8_to_lapic(vcpu); |
3658 | 3679 | ||
3659 | svm->next_rip = 0; | 3680 | svm->next_rip = 0; |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index fc7a101c4a35..abd86e865be3 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
@@ -25,7 +25,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
25 | 25 | ||
26 | /* | 26 | /* |
27 | * There is a race window between reading and incrementing, but we do | 27 | * There is a race window between reading and incrementing, but we do |
28 | * not care about potentially loosing timer events in the !reinject | 28 | * not care about potentially losing timer events in the !reinject |
29 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked | 29 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked |
30 | * in vcpu_enter_guest. | 30 | * in vcpu_enter_guest. |
31 | */ | 31 | */ |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bf89ec2cfb82..5b4cdcbd154c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -93,14 +93,14 @@ module_param(yield_on_hlt, bool, S_IRUGO); | |||
93 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | 93 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: |
94 | * ple_gap: upper bound on the amount of time between two successive | 94 | * ple_gap: upper bound on the amount of time between two successive |
95 | * executions of PAUSE in a loop. Also indicate if ple enabled. | 95 | * executions of PAUSE in a loop. Also indicate if ple enabled. |
96 | * According to test, this time is usually small than 41 cycles. | 96 | * According to test, this time is usually smaller than 128 cycles. |
97 | * ple_window: upper bound on the amount of time a guest is allowed to execute | 97 | * ple_window: upper bound on the amount of time a guest is allowed to execute |
98 | * in a PAUSE loop. Tests indicate that most spinlocks are held for | 98 | * in a PAUSE loop. Tests indicate that most spinlocks are held for |
99 | * less than 2^12 cycles | 99 | * less than 2^12 cycles |
100 | * Time is measured based on a counter that runs at the same rate as the TSC, | 100 | * Time is measured based on a counter that runs at the same rate as the TSC, |
101 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | 101 | * refer SDM volume 3b section 21.6.13 & 22.1.3. |
102 | */ | 102 | */ |
103 | #define KVM_VMX_DEFAULT_PLE_GAP 41 | 103 | #define KVM_VMX_DEFAULT_PLE_GAP 128 |
104 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | 104 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 |
105 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | 105 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; |
106 | module_param(ple_gap, int, S_IRUGO); | 106 | module_param(ple_gap, int, S_IRUGO); |
@@ -176,11 +176,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
176 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 176 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
177 | } | 177 | } |
178 | 178 | ||
179 | static int init_rmode(struct kvm *kvm); | ||
180 | static u64 construct_eptp(unsigned long root_hpa); | 179 | static u64 construct_eptp(unsigned long root_hpa); |
181 | static void kvm_cpu_vmxon(u64 addr); | 180 | static void kvm_cpu_vmxon(u64 addr); |
182 | static void kvm_cpu_vmxoff(void); | 181 | static void kvm_cpu_vmxoff(void); |
183 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 182 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
183 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | ||
184 | 184 | ||
185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -1333,19 +1333,25 @@ static __init int vmx_disabled_by_bios(void) | |||
1333 | 1333 | ||
1334 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1334 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
1335 | if (msr & FEATURE_CONTROL_LOCKED) { | 1335 | if (msr & FEATURE_CONTROL_LOCKED) { |
1336 | /* launched w/ TXT and VMX disabled */ | ||
1336 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) | 1337 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) |
1337 | && tboot_enabled()) | 1338 | && tboot_enabled()) |
1338 | return 1; | 1339 | return 1; |
1340 | /* launched w/o TXT and VMX only enabled w/ TXT */ | ||
1339 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | 1341 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) |
1342 | && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) | ||
1340 | && !tboot_enabled()) { | 1343 | && !tboot_enabled()) { |
1341 | printk(KERN_WARNING "kvm: disable TXT in the BIOS or " | 1344 | printk(KERN_WARNING "kvm: disable TXT in the BIOS or " |
1342 | " activate TXT before enabling KVM\n"); | 1345 | "activate TXT before enabling KVM\n"); |
1343 | return 1; | 1346 | return 1; |
1344 | } | 1347 | } |
1348 | /* launched w/o TXT and VMX disabled */ | ||
1349 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | ||
1350 | && !tboot_enabled()) | ||
1351 | return 1; | ||
1345 | } | 1352 | } |
1346 | 1353 | ||
1347 | return 0; | 1354 | return 0; |
1348 | /* locked but not enabled */ | ||
1349 | } | 1355 | } |
1350 | 1356 | ||
1351 | static void kvm_cpu_vmxon(u64 addr) | 1357 | static void kvm_cpu_vmxon(u64 addr) |
@@ -1683,6 +1689,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1683 | vmx->emulation_required = 1; | 1689 | vmx->emulation_required = 1; |
1684 | vmx->rmode.vm86_active = 0; | 1690 | vmx->rmode.vm86_active = 0; |
1685 | 1691 | ||
1692 | vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); | ||
1686 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); | 1693 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); |
1687 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); | 1694 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); |
1688 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); | 1695 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); |
@@ -1756,6 +1763,19 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1756 | vmx->emulation_required = 1; | 1763 | vmx->emulation_required = 1; |
1757 | vmx->rmode.vm86_active = 1; | 1764 | vmx->rmode.vm86_active = 1; |
1758 | 1765 | ||
1766 | /* | ||
1767 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | ||
1768 | * vcpu. Call it here with phys address pointing 16M below 4G. | ||
1769 | */ | ||
1770 | if (!vcpu->kvm->arch.tss_addr) { | ||
1771 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " | ||
1772 | "called before entering vcpu\n"); | ||
1773 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
1774 | vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); | ||
1775 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
1776 | } | ||
1777 | |||
1778 | vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); | ||
1759 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1779 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
1760 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 1780 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); |
1761 | 1781 | ||
@@ -1794,7 +1814,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1794 | 1814 | ||
1795 | continue_rmode: | 1815 | continue_rmode: |
1796 | kvm_mmu_reset_context(vcpu); | 1816 | kvm_mmu_reset_context(vcpu); |
1797 | init_rmode(vcpu->kvm); | ||
1798 | } | 1817 | } |
1799 | 1818 | ||
1800 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1819 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
@@ -2030,23 +2049,40 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
2030 | vmcs_writel(GUEST_CR4, hw_cr4); | 2049 | vmcs_writel(GUEST_CR4, hw_cr4); |
2031 | } | 2050 | } |
2032 | 2051 | ||
2033 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | ||
2034 | { | ||
2035 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
2036 | |||
2037 | return vmcs_readl(sf->base); | ||
2038 | } | ||
2039 | |||
2040 | static void vmx_get_segment(struct kvm_vcpu *vcpu, | 2052 | static void vmx_get_segment(struct kvm_vcpu *vcpu, |
2041 | struct kvm_segment *var, int seg) | 2053 | struct kvm_segment *var, int seg) |
2042 | { | 2054 | { |
2055 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2043 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2056 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2057 | struct kvm_save_segment *save; | ||
2044 | u32 ar; | 2058 | u32 ar; |
2045 | 2059 | ||
2060 | if (vmx->rmode.vm86_active | ||
2061 | && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES | ||
2062 | || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS | ||
2063 | || seg == VCPU_SREG_GS) | ||
2064 | && !emulate_invalid_guest_state) { | ||
2065 | switch (seg) { | ||
2066 | case VCPU_SREG_TR: save = &vmx->rmode.tr; break; | ||
2067 | case VCPU_SREG_ES: save = &vmx->rmode.es; break; | ||
2068 | case VCPU_SREG_DS: save = &vmx->rmode.ds; break; | ||
2069 | case VCPU_SREG_FS: save = &vmx->rmode.fs; break; | ||
2070 | case VCPU_SREG_GS: save = &vmx->rmode.gs; break; | ||
2071 | default: BUG(); | ||
2072 | } | ||
2073 | var->selector = save->selector; | ||
2074 | var->base = save->base; | ||
2075 | var->limit = save->limit; | ||
2076 | ar = save->ar; | ||
2077 | if (seg == VCPU_SREG_TR | ||
2078 | || var->selector == vmcs_read16(sf->selector)) | ||
2079 | goto use_saved_rmode_seg; | ||
2080 | } | ||
2046 | var->base = vmcs_readl(sf->base); | 2081 | var->base = vmcs_readl(sf->base); |
2047 | var->limit = vmcs_read32(sf->limit); | 2082 | var->limit = vmcs_read32(sf->limit); |
2048 | var->selector = vmcs_read16(sf->selector); | 2083 | var->selector = vmcs_read16(sf->selector); |
2049 | ar = vmcs_read32(sf->ar_bytes); | 2084 | ar = vmcs_read32(sf->ar_bytes); |
2085 | use_saved_rmode_seg: | ||
2050 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) | 2086 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) |
2051 | ar = 0; | 2087 | ar = 0; |
2052 | var->type = ar & 15; | 2088 | var->type = ar & 15; |
@@ -2060,6 +2096,18 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
2060 | var->unusable = (ar >> 16) & 1; | 2096 | var->unusable = (ar >> 16) & 1; |
2061 | } | 2097 | } |
2062 | 2098 | ||
2099 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | ||
2100 | { | ||
2101 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
2102 | struct kvm_segment s; | ||
2103 | |||
2104 | if (to_vmx(vcpu)->rmode.vm86_active) { | ||
2105 | vmx_get_segment(vcpu, &s, seg); | ||
2106 | return s.base; | ||
2107 | } | ||
2108 | return vmcs_readl(sf->base); | ||
2109 | } | ||
2110 | |||
2063 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 2111 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
2064 | { | 2112 | { |
2065 | if (!is_protmode(vcpu)) | 2113 | if (!is_protmode(vcpu)) |
@@ -2101,6 +2149,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
2101 | u32 ar; | 2149 | u32 ar; |
2102 | 2150 | ||
2103 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { | 2151 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { |
2152 | vmcs_write16(sf->selector, var->selector); | ||
2104 | vmx->rmode.tr.selector = var->selector; | 2153 | vmx->rmode.tr.selector = var->selector; |
2105 | vmx->rmode.tr.base = var->base; | 2154 | vmx->rmode.tr.base = var->base; |
2106 | vmx->rmode.tr.limit = var->limit; | 2155 | vmx->rmode.tr.limit = var->limit; |
@@ -2361,11 +2410,12 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu) | |||
2361 | 2410 | ||
2362 | static int init_rmode_tss(struct kvm *kvm) | 2411 | static int init_rmode_tss(struct kvm *kvm) |
2363 | { | 2412 | { |
2364 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 2413 | gfn_t fn; |
2365 | u16 data = 0; | 2414 | u16 data = 0; |
2366 | int ret = 0; | 2415 | int r, idx, ret = 0; |
2367 | int r; | ||
2368 | 2416 | ||
2417 | idx = srcu_read_lock(&kvm->srcu); | ||
2418 | fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | ||
2369 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 2419 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
2370 | if (r < 0) | 2420 | if (r < 0) |
2371 | goto out; | 2421 | goto out; |
@@ -2389,12 +2439,13 @@ static int init_rmode_tss(struct kvm *kvm) | |||
2389 | 2439 | ||
2390 | ret = 1; | 2440 | ret = 1; |
2391 | out: | 2441 | out: |
2442 | srcu_read_unlock(&kvm->srcu, idx); | ||
2392 | return ret; | 2443 | return ret; |
2393 | } | 2444 | } |
2394 | 2445 | ||
2395 | static int init_rmode_identity_map(struct kvm *kvm) | 2446 | static int init_rmode_identity_map(struct kvm *kvm) |
2396 | { | 2447 | { |
2397 | int i, r, ret; | 2448 | int i, idx, r, ret; |
2398 | pfn_t identity_map_pfn; | 2449 | pfn_t identity_map_pfn; |
2399 | u32 tmp; | 2450 | u32 tmp; |
2400 | 2451 | ||
@@ -2409,6 +2460,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
2409 | return 1; | 2460 | return 1; |
2410 | ret = 0; | 2461 | ret = 0; |
2411 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; | 2462 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; |
2463 | idx = srcu_read_lock(&kvm->srcu); | ||
2412 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); | 2464 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); |
2413 | if (r < 0) | 2465 | if (r < 0) |
2414 | goto out; | 2466 | goto out; |
@@ -2424,6 +2476,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
2424 | kvm->arch.ept_identity_pagetable_done = true; | 2476 | kvm->arch.ept_identity_pagetable_done = true; |
2425 | ret = 1; | 2477 | ret = 1; |
2426 | out: | 2478 | out: |
2479 | srcu_read_unlock(&kvm->srcu, idx); | ||
2427 | return ret; | 2480 | return ret; |
2428 | } | 2481 | } |
2429 | 2482 | ||
@@ -2699,22 +2752,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2699 | return 0; | 2752 | return 0; |
2700 | } | 2753 | } |
2701 | 2754 | ||
2702 | static int init_rmode(struct kvm *kvm) | ||
2703 | { | ||
2704 | int idx, ret = 0; | ||
2705 | |||
2706 | idx = srcu_read_lock(&kvm->srcu); | ||
2707 | if (!init_rmode_tss(kvm)) | ||
2708 | goto exit; | ||
2709 | if (!init_rmode_identity_map(kvm)) | ||
2710 | goto exit; | ||
2711 | |||
2712 | ret = 1; | ||
2713 | exit: | ||
2714 | srcu_read_unlock(&kvm->srcu, idx); | ||
2715 | return ret; | ||
2716 | } | ||
2717 | |||
2718 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 2755 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
2719 | { | 2756 | { |
2720 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2757 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -2722,10 +2759,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2722 | int ret; | 2759 | int ret; |
2723 | 2760 | ||
2724 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2761 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
2725 | if (!init_rmode(vmx->vcpu.kvm)) { | ||
2726 | ret = -ENOMEM; | ||
2727 | goto out; | ||
2728 | } | ||
2729 | 2762 | ||
2730 | vmx->rmode.vm86_active = 0; | 2763 | vmx->rmode.vm86_active = 0; |
2731 | 2764 | ||
@@ -2805,7 +2838,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2805 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); | 2838 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); |
2806 | if (vm_need_tpr_shadow(vmx->vcpu.kvm)) | 2839 | if (vm_need_tpr_shadow(vmx->vcpu.kvm)) |
2807 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | 2840 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, |
2808 | page_to_phys(vmx->vcpu.arch.apic->regs_page)); | 2841 | __pa(vmx->vcpu.arch.apic->regs)); |
2809 | vmcs_write32(TPR_THRESHOLD, 0); | 2842 | vmcs_write32(TPR_THRESHOLD, 0); |
2810 | } | 2843 | } |
2811 | 2844 | ||
@@ -2971,6 +3004,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
2971 | if (ret) | 3004 | if (ret) |
2972 | return ret; | 3005 | return ret; |
2973 | kvm->arch.tss_addr = addr; | 3006 | kvm->arch.tss_addr = addr; |
3007 | if (!init_rmode_tss(kvm)) | ||
3008 | return -ENOMEM; | ||
3009 | |||
2974 | return 0; | 3010 | return 0; |
2975 | } | 3011 | } |
2976 | 3012 | ||
@@ -3962,7 +3998,7 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu) | |||
3962 | #define Q "l" | 3998 | #define Q "l" |
3963 | #endif | 3999 | #endif |
3964 | 4000 | ||
3965 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | 4001 | static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3966 | { | 4002 | { |
3967 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4003 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3968 | 4004 | ||
@@ -3991,6 +4027,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3991 | asm( | 4027 | asm( |
3992 | /* Store host registers */ | 4028 | /* Store host registers */ |
3993 | "push %%"R"dx; push %%"R"bp;" | 4029 | "push %%"R"dx; push %%"R"bp;" |
4030 | "push %%"R"cx \n\t" /* placeholder for guest rcx */ | ||
3994 | "push %%"R"cx \n\t" | 4031 | "push %%"R"cx \n\t" |
3995 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" | 4032 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" |
3996 | "je 1f \n\t" | 4033 | "je 1f \n\t" |
@@ -4032,10 +4069,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4032 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" | 4069 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" |
4033 | ".Lkvm_vmx_return: " | 4070 | ".Lkvm_vmx_return: " |
4034 | /* Save guest registers, load host registers, keep flags */ | 4071 | /* Save guest registers, load host registers, keep flags */ |
4035 | "xchg %0, (%%"R"sp) \n\t" | 4072 | "mov %0, %c[wordsize](%%"R"sp) \n\t" |
4073 | "pop %0 \n\t" | ||
4036 | "mov %%"R"ax, %c[rax](%0) \n\t" | 4074 | "mov %%"R"ax, %c[rax](%0) \n\t" |
4037 | "mov %%"R"bx, %c[rbx](%0) \n\t" | 4075 | "mov %%"R"bx, %c[rbx](%0) \n\t" |
4038 | "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t" | 4076 | "pop"Q" %c[rcx](%0) \n\t" |
4039 | "mov %%"R"dx, %c[rdx](%0) \n\t" | 4077 | "mov %%"R"dx, %c[rdx](%0) \n\t" |
4040 | "mov %%"R"si, %c[rsi](%0) \n\t" | 4078 | "mov %%"R"si, %c[rsi](%0) \n\t" |
4041 | "mov %%"R"di, %c[rdi](%0) \n\t" | 4079 | "mov %%"R"di, %c[rdi](%0) \n\t" |
@@ -4053,7 +4091,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4053 | "mov %%cr2, %%"R"ax \n\t" | 4091 | "mov %%cr2, %%"R"ax \n\t" |
4054 | "mov %%"R"ax, %c[cr2](%0) \n\t" | 4092 | "mov %%"R"ax, %c[cr2](%0) \n\t" |
4055 | 4093 | ||
4056 | "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t" | 4094 | "pop %%"R"bp; pop %%"R"dx \n\t" |
4057 | "setbe %c[fail](%0) \n\t" | 4095 | "setbe %c[fail](%0) \n\t" |
4058 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), | 4096 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), |
4059 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), | 4097 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), |
@@ -4076,7 +4114,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4076 | [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), | 4114 | [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), |
4077 | [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), | 4115 | [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), |
4078 | #endif | 4116 | #endif |
4079 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) | 4117 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), |
4118 | [wordsize]"i"(sizeof(ulong)) | ||
4080 | : "cc", "memory" | 4119 | : "cc", "memory" |
4081 | , R"ax", R"bx", R"di", R"si" | 4120 | , R"ax", R"bx", R"di", R"si" |
4082 | #ifdef CONFIG_X86_64 | 4121 | #ifdef CONFIG_X86_64 |
@@ -4183,8 +4222,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4183 | if (!kvm->arch.ept_identity_map_addr) | 4222 | if (!kvm->arch.ept_identity_map_addr) |
4184 | kvm->arch.ept_identity_map_addr = | 4223 | kvm->arch.ept_identity_map_addr = |
4185 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; | 4224 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; |
4225 | err = -ENOMEM; | ||
4186 | if (alloc_identity_pagetable(kvm) != 0) | 4226 | if (alloc_identity_pagetable(kvm) != 0) |
4187 | goto free_vmcs; | 4227 | goto free_vmcs; |
4228 | if (!init_rmode_identity_map(kvm)) | ||
4229 | goto free_vmcs; | ||
4188 | } | 4230 | } |
4189 | 4231 | ||
4190 | return &vmx->vcpu; | 4232 | return &vmx->vcpu; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bcc0efce85bf..58f517b59645 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -81,9 +81,10 @@ | |||
81 | * - enable LME and LMA per default on 64 bit KVM | 81 | * - enable LME and LMA per default on 64 bit KVM |
82 | */ | 82 | */ |
83 | #ifdef CONFIG_X86_64 | 83 | #ifdef CONFIG_X86_64 |
84 | static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL; | 84 | static |
85 | u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); | ||
85 | #else | 86 | #else |
86 | static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; | 87 | static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); |
87 | #endif | 88 | #endif |
88 | 89 | ||
89 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM | 90 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM |
@@ -360,8 +361,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | |||
360 | 361 | ||
361 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 362 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
362 | { | 363 | { |
364 | kvm_make_request(KVM_REQ_NMI, vcpu); | ||
363 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 365 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
364 | vcpu->arch.nmi_pending = 1; | ||
365 | } | 366 | } |
366 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); | 367 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); |
367 | 368 | ||
@@ -525,8 +526,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
525 | 526 | ||
526 | kvm_x86_ops->set_cr0(vcpu, cr0); | 527 | kvm_x86_ops->set_cr0(vcpu, cr0); |
527 | 528 | ||
528 | if ((cr0 ^ old_cr0) & X86_CR0_PG) | 529 | if ((cr0 ^ old_cr0) & X86_CR0_PG) { |
529 | kvm_clear_async_pf_completion_queue(vcpu); | 530 | kvm_clear_async_pf_completion_queue(vcpu); |
531 | kvm_async_pf_hash_reset(vcpu); | ||
532 | } | ||
530 | 533 | ||
531 | if ((cr0 ^ old_cr0) & update_bits) | 534 | if ((cr0 ^ old_cr0) & update_bits) |
532 | kvm_mmu_reset_context(vcpu); | 535 | kvm_mmu_reset_context(vcpu); |
@@ -1017,7 +1020,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1017 | unsigned long flags; | 1020 | unsigned long flags; |
1018 | s64 sdiff; | 1021 | s64 sdiff; |
1019 | 1022 | ||
1020 | spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1023 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
1021 | offset = data - native_read_tsc(); | 1024 | offset = data - native_read_tsc(); |
1022 | ns = get_kernel_ns(); | 1025 | ns = get_kernel_ns(); |
1023 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1026 | elapsed = ns - kvm->arch.last_tsc_nsec; |
@@ -1028,7 +1031,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1028 | /* | 1031 | /* |
1029 | * Special case: close write to TSC within 5 seconds of | 1032 | * Special case: close write to TSC within 5 seconds of |
1030 | * another CPU is interpreted as an attempt to synchronize | 1033 | * another CPU is interpreted as an attempt to synchronize |
1031 | * The 5 seconds is to accomodate host load / swapping as | 1034 | * The 5 seconds is to accommodate host load / swapping as |
1032 | * well as any reset of TSC during the boot process. | 1035 | * well as any reset of TSC during the boot process. |
1033 | * | 1036 | * |
1034 | * In that case, for a reliable TSC, we can match TSC offsets, | 1037 | * In that case, for a reliable TSC, we can match TSC offsets, |
@@ -1050,7 +1053,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1050 | kvm->arch.last_tsc_write = data; | 1053 | kvm->arch.last_tsc_write = data; |
1051 | kvm->arch.last_tsc_offset = offset; | 1054 | kvm->arch.last_tsc_offset = offset; |
1052 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | 1055 | kvm_x86_ops->write_tsc_offset(vcpu, offset); |
1053 | spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | 1056 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); |
1054 | 1057 | ||
1055 | /* Reset of TSC must disable overshoot protection below */ | 1058 | /* Reset of TSC must disable overshoot protection below */ |
1056 | vcpu->arch.hv_clock.tsc_timestamp = 0; | 1059 | vcpu->arch.hv_clock.tsc_timestamp = 0; |
@@ -1453,6 +1456,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | |||
1453 | return 0; | 1456 | return 0; |
1454 | } | 1457 | } |
1455 | 1458 | ||
1459 | static void kvmclock_reset(struct kvm_vcpu *vcpu) | ||
1460 | { | ||
1461 | if (vcpu->arch.time_page) { | ||
1462 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
1463 | vcpu->arch.time_page = NULL; | ||
1464 | } | ||
1465 | } | ||
1466 | |||
1456 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1467 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1457 | { | 1468 | { |
1458 | switch (msr) { | 1469 | switch (msr) { |
@@ -1510,10 +1521,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1510 | break; | 1521 | break; |
1511 | case MSR_KVM_SYSTEM_TIME_NEW: | 1522 | case MSR_KVM_SYSTEM_TIME_NEW: |
1512 | case MSR_KVM_SYSTEM_TIME: { | 1523 | case MSR_KVM_SYSTEM_TIME: { |
1513 | if (vcpu->arch.time_page) { | 1524 | kvmclock_reset(vcpu); |
1514 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
1515 | vcpu->arch.time_page = NULL; | ||
1516 | } | ||
1517 | 1525 | ||
1518 | vcpu->arch.time = data; | 1526 | vcpu->arch.time = data; |
1519 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 1527 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
@@ -1592,6 +1600,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1592 | } else | 1600 | } else |
1593 | return set_msr_hyperv(vcpu, msr, data); | 1601 | return set_msr_hyperv(vcpu, msr, data); |
1594 | break; | 1602 | break; |
1603 | case MSR_IA32_BBL_CR_CTL3: | ||
1604 | /* Drop writes to this legacy MSR -- see rdmsr | ||
1605 | * counterpart for further detail. | ||
1606 | */ | ||
1607 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); | ||
1608 | break; | ||
1595 | default: | 1609 | default: |
1596 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1610 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1597 | return xen_hvm_config(vcpu, data); | 1611 | return xen_hvm_config(vcpu, data); |
@@ -1846,6 +1860,19 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1846 | } else | 1860 | } else |
1847 | return get_msr_hyperv(vcpu, msr, pdata); | 1861 | return get_msr_hyperv(vcpu, msr, pdata); |
1848 | break; | 1862 | break; |
1863 | case MSR_IA32_BBL_CR_CTL3: | ||
1864 | /* This legacy MSR exists but isn't fully documented in current | ||
1865 | * silicon. It is however accessed by winxp in very narrow | ||
1866 | * scenarios where it sets bit #19, itself documented as | ||
1867 | * a "reserved" bit. Best effort attempt to source coherent | ||
1868 | * read data here should the balance of the register be | ||
1869 | * interpreted by the guest: | ||
1870 | * | ||
1871 | * L2 cache control register 3: 64GB range, 256KB size, | ||
1872 | * enabled, latency 0x1, configured | ||
1873 | */ | ||
1874 | data = 0xbe702111; | ||
1875 | break; | ||
1849 | default: | 1876 | default: |
1850 | if (!ignore_msrs) { | 1877 | if (!ignore_msrs) { |
1851 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1878 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
@@ -2100,8 +2127,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2100 | if (check_tsc_unstable()) { | 2127 | if (check_tsc_unstable()) { |
2101 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); | 2128 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); |
2102 | vcpu->arch.tsc_catchup = 1; | 2129 | vcpu->arch.tsc_catchup = 1; |
2103 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
2104 | } | 2130 | } |
2131 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
2105 | if (vcpu->cpu != cpu) | 2132 | if (vcpu->cpu != cpu) |
2106 | kvm_migrate_timers(vcpu); | 2133 | kvm_migrate_timers(vcpu); |
2107 | vcpu->cpu = cpu; | 2134 | vcpu->cpu = cpu; |
@@ -2575,9 +2602,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
2575 | if (mce->status & MCI_STATUS_UC) { | 2602 | if (mce->status & MCI_STATUS_UC) { |
2576 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || | 2603 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || |
2577 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { | 2604 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { |
2578 | printk(KERN_DEBUG "kvm: set_mce: " | ||
2579 | "injects mce exception while " | ||
2580 | "previous one is in progress!\n"); | ||
2581 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 2605 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2582 | return 0; | 2606 | return 0; |
2583 | } | 2607 | } |
@@ -2648,8 +2672,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2648 | vcpu->arch.interrupt.pending = events->interrupt.injected; | 2672 | vcpu->arch.interrupt.pending = events->interrupt.injected; |
2649 | vcpu->arch.interrupt.nr = events->interrupt.nr; | 2673 | vcpu->arch.interrupt.nr = events->interrupt.nr; |
2650 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 2674 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
2651 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | ||
2652 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
2653 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | 2675 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) |
2654 | kvm_x86_ops->set_interrupt_shadow(vcpu, | 2676 | kvm_x86_ops->set_interrupt_shadow(vcpu, |
2655 | events->interrupt.shadow); | 2677 | events->interrupt.shadow); |
@@ -4140,8 +4162,8 @@ static unsigned long emulator_get_cached_segment_base(int seg, | |||
4140 | return get_segment_base(vcpu, seg); | 4162 | return get_segment_base(vcpu, seg); |
4141 | } | 4163 | } |
4142 | 4164 | ||
4143 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | 4165 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, |
4144 | struct kvm_vcpu *vcpu) | 4166 | int seg, struct kvm_vcpu *vcpu) |
4145 | { | 4167 | { |
4146 | struct kvm_segment var; | 4168 | struct kvm_segment var; |
4147 | 4169 | ||
@@ -4154,6 +4176,10 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | |||
4154 | var.limit >>= 12; | 4176 | var.limit >>= 12; |
4155 | set_desc_limit(desc, var.limit); | 4177 | set_desc_limit(desc, var.limit); |
4156 | set_desc_base(desc, (unsigned long)var.base); | 4178 | set_desc_base(desc, (unsigned long)var.base); |
4179 | #ifdef CONFIG_X86_64 | ||
4180 | if (base3) | ||
4181 | *base3 = var.base >> 32; | ||
4182 | #endif | ||
4157 | desc->type = var.type; | 4183 | desc->type = var.type; |
4158 | desc->s = var.s; | 4184 | desc->s = var.s; |
4159 | desc->dpl = var.dpl; | 4185 | desc->dpl = var.dpl; |
@@ -4166,8 +4192,8 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | |||
4166 | return true; | 4192 | return true; |
4167 | } | 4193 | } |
4168 | 4194 | ||
4169 | static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | 4195 | static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, |
4170 | struct kvm_vcpu *vcpu) | 4196 | int seg, struct kvm_vcpu *vcpu) |
4171 | { | 4197 | { |
4172 | struct kvm_segment var; | 4198 | struct kvm_segment var; |
4173 | 4199 | ||
@@ -4175,6 +4201,9 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | |||
4175 | kvm_get_segment(vcpu, &var, seg); | 4201 | kvm_get_segment(vcpu, &var, seg); |
4176 | 4202 | ||
4177 | var.base = get_desc_base(desc); | 4203 | var.base = get_desc_base(desc); |
4204 | #ifdef CONFIG_X86_64 | ||
4205 | var.base |= ((u64)base3) << 32; | ||
4206 | #endif | ||
4178 | var.limit = get_desc_limit(desc); | 4207 | var.limit = get_desc_limit(desc); |
4179 | if (desc->g) | 4208 | if (desc->g) |
4180 | var.limit = (var.limit << 12) | 0xfff; | 4209 | var.limit = (var.limit << 12) | 0xfff; |
@@ -4390,41 +4419,16 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4390 | vcpu->arch.emulate_ctxt.have_exception = false; | 4419 | vcpu->arch.emulate_ctxt.have_exception = false; |
4391 | vcpu->arch.emulate_ctxt.perm_ok = false; | 4420 | vcpu->arch.emulate_ctxt.perm_ok = false; |
4392 | 4421 | ||
4422 | vcpu->arch.emulate_ctxt.only_vendor_specific_insn | ||
4423 | = emulation_type & EMULTYPE_TRAP_UD; | ||
4424 | |||
4393 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); | 4425 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); |
4394 | if (r == X86EMUL_PROPAGATE_FAULT) | ||
4395 | goto done; | ||
4396 | 4426 | ||
4397 | trace_kvm_emulate_insn_start(vcpu); | 4427 | trace_kvm_emulate_insn_start(vcpu); |
4398 | |||
4399 | /* Only allow emulation of specific instructions on #UD | ||
4400 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | ||
4401 | if (emulation_type & EMULTYPE_TRAP_UD) { | ||
4402 | if (!c->twobyte) | ||
4403 | return EMULATE_FAIL; | ||
4404 | switch (c->b) { | ||
4405 | case 0x01: /* VMMCALL */ | ||
4406 | if (c->modrm_mod != 3 || c->modrm_rm != 1) | ||
4407 | return EMULATE_FAIL; | ||
4408 | break; | ||
4409 | case 0x34: /* sysenter */ | ||
4410 | case 0x35: /* sysexit */ | ||
4411 | if (c->modrm_mod != 0 || c->modrm_rm != 0) | ||
4412 | return EMULATE_FAIL; | ||
4413 | break; | ||
4414 | case 0x05: /* syscall */ | ||
4415 | if (c->modrm_mod != 0 || c->modrm_rm != 0) | ||
4416 | return EMULATE_FAIL; | ||
4417 | break; | ||
4418 | default: | ||
4419 | return EMULATE_FAIL; | ||
4420 | } | ||
4421 | |||
4422 | if (!(c->modrm_reg == 0 || c->modrm_reg == 3)) | ||
4423 | return EMULATE_FAIL; | ||
4424 | } | ||
4425 | |||
4426 | ++vcpu->stat.insn_emulation; | 4428 | ++vcpu->stat.insn_emulation; |
4427 | if (r) { | 4429 | if (r) { |
4430 | if (emulation_type & EMULTYPE_TRAP_UD) | ||
4431 | return EMULATE_FAIL; | ||
4428 | if (reexecute_instruction(vcpu, cr2)) | 4432 | if (reexecute_instruction(vcpu, cr2)) |
4429 | return EMULATE_DONE; | 4433 | return EMULATE_DONE; |
4430 | if (emulation_type & EMULTYPE_SKIP) | 4434 | if (emulation_type & EMULTYPE_SKIP) |
@@ -4452,7 +4456,6 @@ restart: | |||
4452 | return handle_emulation_failure(vcpu); | 4456 | return handle_emulation_failure(vcpu); |
4453 | } | 4457 | } |
4454 | 4458 | ||
4455 | done: | ||
4456 | if (vcpu->arch.emulate_ctxt.have_exception) { | 4459 | if (vcpu->arch.emulate_ctxt.have_exception) { |
4457 | inject_emulated_exception(vcpu); | 4460 | inject_emulated_exception(vcpu); |
4458 | r = EMULATE_DONE; | 4461 | r = EMULATE_DONE; |
@@ -4562,7 +4565,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4562 | 4565 | ||
4563 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); | 4566 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); |
4564 | 4567 | ||
4565 | spin_lock(&kvm_lock); | 4568 | raw_spin_lock(&kvm_lock); |
4566 | list_for_each_entry(kvm, &vm_list, vm_list) { | 4569 | list_for_each_entry(kvm, &vm_list, vm_list) { |
4567 | kvm_for_each_vcpu(i, vcpu, kvm) { | 4570 | kvm_for_each_vcpu(i, vcpu, kvm) { |
4568 | if (vcpu->cpu != freq->cpu) | 4571 | if (vcpu->cpu != freq->cpu) |
@@ -4572,7 +4575,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4572 | send_ipi = 1; | 4575 | send_ipi = 1; |
4573 | } | 4576 | } |
4574 | } | 4577 | } |
4575 | spin_unlock(&kvm_lock); | 4578 | raw_spin_unlock(&kvm_lock); |
4576 | 4579 | ||
4577 | if (freq->old < freq->new && send_ipi) { | 4580 | if (freq->old < freq->new && send_ipi) { |
4578 | /* | 4581 | /* |
@@ -5185,6 +5188,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5185 | r = 1; | 5188 | r = 1; |
5186 | goto out; | 5189 | goto out; |
5187 | } | 5190 | } |
5191 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | ||
5192 | vcpu->arch.nmi_pending = true; | ||
5188 | } | 5193 | } |
5189 | 5194 | ||
5190 | r = kvm_mmu_reload(vcpu); | 5195 | r = kvm_mmu_reload(vcpu); |
@@ -5213,14 +5218,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5213 | kvm_load_guest_fpu(vcpu); | 5218 | kvm_load_guest_fpu(vcpu); |
5214 | kvm_load_guest_xcr0(vcpu); | 5219 | kvm_load_guest_xcr0(vcpu); |
5215 | 5220 | ||
5216 | atomic_set(&vcpu->guest_mode, 1); | 5221 | vcpu->mode = IN_GUEST_MODE; |
5217 | smp_wmb(); | 5222 | |
5223 | /* We should set ->mode before check ->requests, | ||
5224 | * see the comment in make_all_cpus_request. | ||
5225 | */ | ||
5226 | smp_mb(); | ||
5218 | 5227 | ||
5219 | local_irq_disable(); | 5228 | local_irq_disable(); |
5220 | 5229 | ||
5221 | if (!atomic_read(&vcpu->guest_mode) || vcpu->requests | 5230 | if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests |
5222 | || need_resched() || signal_pending(current)) { | 5231 | || need_resched() || signal_pending(current)) { |
5223 | atomic_set(&vcpu->guest_mode, 0); | 5232 | vcpu->mode = OUTSIDE_GUEST_MODE; |
5224 | smp_wmb(); | 5233 | smp_wmb(); |
5225 | local_irq_enable(); | 5234 | local_irq_enable(); |
5226 | preempt_enable(); | 5235 | preempt_enable(); |
@@ -5256,7 +5265,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5256 | 5265 | ||
5257 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); | 5266 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); |
5258 | 5267 | ||
5259 | atomic_set(&vcpu->guest_mode, 0); | 5268 | vcpu->mode = OUTSIDE_GUEST_MODE; |
5260 | smp_wmb(); | 5269 | smp_wmb(); |
5261 | local_irq_enable(); | 5270 | local_irq_enable(); |
5262 | 5271 | ||
@@ -5574,7 +5583,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5574 | struct kvm_sregs *sregs) | 5583 | struct kvm_sregs *sregs) |
5575 | { | 5584 | { |
5576 | int mmu_reset_needed = 0; | 5585 | int mmu_reset_needed = 0; |
5577 | int pending_vec, max_bits; | 5586 | int pending_vec, max_bits, idx; |
5578 | struct desc_ptr dt; | 5587 | struct desc_ptr dt; |
5579 | 5588 | ||
5580 | dt.size = sregs->idt.limit; | 5589 | dt.size = sregs->idt.limit; |
@@ -5603,10 +5612,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5603 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5612 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
5604 | if (sregs->cr4 & X86_CR4_OSXSAVE) | 5613 | if (sregs->cr4 & X86_CR4_OSXSAVE) |
5605 | update_cpuid(vcpu); | 5614 | update_cpuid(vcpu); |
5615 | |||
5616 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5606 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5617 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
5607 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); | 5618 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); |
5608 | mmu_reset_needed = 1; | 5619 | mmu_reset_needed = 1; |
5609 | } | 5620 | } |
5621 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
5610 | 5622 | ||
5611 | if (mmu_reset_needed) | 5623 | if (mmu_reset_needed) |
5612 | kvm_mmu_reset_context(vcpu); | 5624 | kvm_mmu_reset_context(vcpu); |
@@ -5617,8 +5629,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5617 | if (pending_vec < max_bits) { | 5629 | if (pending_vec < max_bits) { |
5618 | kvm_queue_interrupt(vcpu, pending_vec, false); | 5630 | kvm_queue_interrupt(vcpu, pending_vec, false); |
5619 | pr_debug("Set back pending irq %d\n", pending_vec); | 5631 | pr_debug("Set back pending irq %d\n", pending_vec); |
5620 | if (irqchip_in_kernel(vcpu->kvm)) | ||
5621 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
5622 | } | 5632 | } |
5623 | 5633 | ||
5624 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 5634 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
@@ -5814,10 +5824,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
5814 | 5824 | ||
5815 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 5825 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
5816 | { | 5826 | { |
5817 | if (vcpu->arch.time_page) { | 5827 | kvmclock_reset(vcpu); |
5818 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
5819 | vcpu->arch.time_page = NULL; | ||
5820 | } | ||
5821 | 5828 | ||
5822 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); | 5829 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); |
5823 | fx_free(vcpu); | 5830 | fx_free(vcpu); |
@@ -5878,6 +5885,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
5878 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5885 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5879 | vcpu->arch.apf.msr_val = 0; | 5886 | vcpu->arch.apf.msr_val = 0; |
5880 | 5887 | ||
5888 | kvmclock_reset(vcpu); | ||
5889 | |||
5881 | kvm_clear_async_pf_completion_queue(vcpu); | 5890 | kvm_clear_async_pf_completion_queue(vcpu); |
5882 | kvm_async_pf_hash_reset(vcpu); | 5891 | kvm_async_pf_hash_reset(vcpu); |
5883 | vcpu->arch.apf.halted = false; | 5892 | vcpu->arch.apf.halted = false; |
@@ -6005,7 +6014,7 @@ int kvm_arch_init_vm(struct kvm *kvm) | |||
6005 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 6014 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
6006 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); | 6015 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); |
6007 | 6016 | ||
6008 | spin_lock_init(&kvm->arch.tsc_write_lock); | 6017 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); |
6009 | 6018 | ||
6010 | return 0; | 6019 | return 0; |
6011 | } | 6020 | } |
@@ -6103,7 +6112,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6103 | int user_alloc) | 6112 | int user_alloc) |
6104 | { | 6113 | { |
6105 | 6114 | ||
6106 | int npages = mem->memory_size >> PAGE_SHIFT; | 6115 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; |
6107 | 6116 | ||
6108 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { | 6117 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { |
6109 | int ret; | 6118 | int ret; |
@@ -6118,12 +6127,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6118 | "failed to munmap memory\n"); | 6127 | "failed to munmap memory\n"); |
6119 | } | 6128 | } |
6120 | 6129 | ||
6130 | if (!kvm->arch.n_requested_mmu_pages) | ||
6131 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | ||
6132 | |||
6121 | spin_lock(&kvm->mmu_lock); | 6133 | spin_lock(&kvm->mmu_lock); |
6122 | if (!kvm->arch.n_requested_mmu_pages) { | 6134 | if (nr_mmu_pages) |
6123 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | ||
6124 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 6135 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
6125 | } | ||
6126 | |||
6127 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 6136 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
6128 | spin_unlock(&kvm->mmu_lock); | 6137 | spin_unlock(&kvm->mmu_lock); |
6129 | } | 6138 | } |
@@ -6157,7 +6166,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | |||
6157 | 6166 | ||
6158 | me = get_cpu(); | 6167 | me = get_cpu(); |
6159 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) | 6168 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
6160 | if (atomic_xchg(&vcpu->guest_mode, 0)) | 6169 | if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) |
6161 | smp_send_reschedule(cpu); | 6170 | smp_send_reschedule(cpu); |
6162 | put_cpu(); | 6171 | put_cpu(); |
6163 | } | 6172 | } |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index b9ec1c74943c..1cd608973ce5 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -397,7 +397,7 @@ static void lguest_load_tr_desc(void) | |||
397 | * instead we just use the real "cpuid" instruction. Then I pretty much turned | 397 | * instead we just use the real "cpuid" instruction. Then I pretty much turned |
398 | * off feature bits until the Guest booted. (Don't say that: you'll damage | 398 | * off feature bits until the Guest booted. (Don't say that: you'll damage |
399 | * lguest sales!) Shut up, inner voice! (Hey, just pointing out that this is | 399 | * lguest sales!) Shut up, inner voice! (Hey, just pointing out that this is |
400 | * hardly future proof.) Noone's listening! They don't like you anyway, | 400 | * hardly future proof.) No one's listening! They don't like you anyway, |
401 | * parenthetic weirdo! | 401 | * parenthetic weirdo! |
402 | * | 402 | * |
403 | * Replacing the cpuid so we can turn features off is great for the kernel, but | 403 | * Replacing the cpuid so we can turn features off is great for the kernel, but |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e10cf070ede0..f2479f19ddde 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -42,4 +42,5 @@ else | |||
42 | lib-y += memmove_64.o memset_64.o | 42 | lib-y += memmove_64.o memset_64.o |
43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o | 43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o |
44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o | 44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o |
45 | lib-y += cmpxchg16b_emu.o | ||
45 | endif | 46 | endif |
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S new file mode 100644 index 000000000000..3e8b08a6de2b --- /dev/null +++ b/arch/x86/lib/cmpxchg16b_emu.S | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License | ||
4 | * as published by the Free Software Foundation; version 2 | ||
5 | * of the License. | ||
6 | * | ||
7 | */ | ||
8 | #include <linux/linkage.h> | ||
9 | #include <asm/alternative-asm.h> | ||
10 | #include <asm/frame.h> | ||
11 | #include <asm/dwarf2.h> | ||
12 | |||
13 | .text | ||
14 | |||
15 | /* | ||
16 | * Inputs: | ||
17 | * %rsi : memory location to compare | ||
18 | * %rax : low 64 bits of old value | ||
19 | * %rdx : high 64 bits of old value | ||
20 | * %rbx : low 64 bits of new value | ||
21 | * %rcx : high 64 bits of new value | ||
22 | * %al : Operation successful | ||
23 | */ | ||
24 | ENTRY(this_cpu_cmpxchg16b_emu) | ||
25 | CFI_STARTPROC | ||
26 | |||
27 | # | ||
28 | # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not | ||
29 | # via the ZF. Caller will access %al to get result. | ||
30 | # | ||
31 | # Note that this is only useful for a cpuops operation. Meaning that we | ||
32 | # do *not* have a fully atomic operation but just an operation that is | ||
33 | # *atomic* on a single cpu (as provided by the this_cpu_xx class of | ||
34 | # macros). | ||
35 | # | ||
36 | this_cpu_cmpxchg16b_emu: | ||
37 | pushf | ||
38 | cli | ||
39 | |||
40 | cmpq %gs:(%rsi), %rax | ||
41 | jne not_same | ||
42 | cmpq %gs:8(%rsi), %rdx | ||
43 | jne not_same | ||
44 | |||
45 | movq %rbx, %gs:(%rsi) | ||
46 | movq %rcx, %gs:8(%rsi) | ||
47 | |||
48 | popf | ||
49 | mov $1, %al | ||
50 | ret | ||
51 | |||
52 | not_same: | ||
53 | popf | ||
54 | xor %al,%al | ||
55 | ret | ||
56 | |||
57 | CFI_ENDPROC | ||
58 | |||
59 | ENDPROC(this_cpu_cmpxchg16b_emu) | ||
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index a460158b5ac5..99e482615195 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -117,7 +117,7 @@ ENDPROC(bad_from_user) | |||
117 | * rdx count | 117 | * rdx count |
118 | * | 118 | * |
119 | * Output: | 119 | * Output: |
120 | * eax uncopied bytes or 0 if successfull. | 120 | * eax uncopied bytes or 0 if successful. |
121 | */ | 121 | */ |
122 | ENTRY(copy_user_generic_unrolled) | 122 | ENTRY(copy_user_generic_unrolled) |
123 | CFI_STARTPROC | 123 | CFI_STARTPROC |
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index f0dba36578ea..fb903b758da8 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | 2 | * Copyright 2002, 2003 Andi Kleen, SuSE Labs. |
3 | * | 3 | * |
4 | * This file is subject to the terms and conditions of the GNU General Public | 4 | * This file is subject to the terms and conditions of the GNU General Public |
5 | * License. See the file COPYING in the main directory of this archive | 5 | * License. See the file COPYING in the main directory of this archive |
6 | * for more details. No warranty for anything given at all. | 6 | * for more details. No warranty for anything given at all. |
@@ -11,82 +11,82 @@ | |||
11 | 11 | ||
12 | /* | 12 | /* |
13 | * Checksum copy with exception handling. | 13 | * Checksum copy with exception handling. |
14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | 14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
15 | * destination is zeroed. | 15 | * destination is zeroed. |
16 | * | 16 | * |
17 | * Input | 17 | * Input |
18 | * rdi source | 18 | * rdi source |
19 | * rsi destination | 19 | * rsi destination |
20 | * edx len (32bit) | 20 | * edx len (32bit) |
21 | * ecx sum (32bit) | 21 | * ecx sum (32bit) |
22 | * r8 src_err_ptr (int) | 22 | * r8 src_err_ptr (int) |
23 | * r9 dst_err_ptr (int) | 23 | * r9 dst_err_ptr (int) |
24 | * | 24 | * |
25 | * Output | 25 | * Output |
26 | * eax 64bit sum. undefined in case of exception. | 26 | * eax 64bit sum. undefined in case of exception. |
27 | * | 27 | * |
28 | * Wrappers need to take care of valid exception sum and zeroing. | 28 | * Wrappers need to take care of valid exception sum and zeroing. |
29 | * They also should align source or destination to 8 bytes. | 29 | * They also should align source or destination to 8 bytes. |
30 | */ | 30 | */ |
31 | 31 | ||
32 | .macro source | 32 | .macro source |
33 | 10: | 33 | 10: |
34 | .section __ex_table,"a" | 34 | .section __ex_table, "a" |
35 | .align 8 | 35 | .align 8 |
36 | .quad 10b,.Lbad_source | 36 | .quad 10b, .Lbad_source |
37 | .previous | 37 | .previous |
38 | .endm | 38 | .endm |
39 | 39 | ||
40 | .macro dest | 40 | .macro dest |
41 | 20: | 41 | 20: |
42 | .section __ex_table,"a" | 42 | .section __ex_table, "a" |
43 | .align 8 | 43 | .align 8 |
44 | .quad 20b,.Lbad_dest | 44 | .quad 20b, .Lbad_dest |
45 | .previous | 45 | .previous |
46 | .endm | 46 | .endm |
47 | 47 | ||
48 | .macro ignore L=.Lignore | 48 | .macro ignore L=.Lignore |
49 | 30: | 49 | 30: |
50 | .section __ex_table,"a" | 50 | .section __ex_table, "a" |
51 | .align 8 | 51 | .align 8 |
52 | .quad 30b,\L | 52 | .quad 30b, \L |
53 | .previous | 53 | .previous |
54 | .endm | 54 | .endm |
55 | 55 | ||
56 | 56 | ||
57 | ENTRY(csum_partial_copy_generic) | 57 | ENTRY(csum_partial_copy_generic) |
58 | CFI_STARTPROC | 58 | CFI_STARTPROC |
59 | cmpl $3*64,%edx | 59 | cmpl $3*64, %edx |
60 | jle .Lignore | 60 | jle .Lignore |
61 | 61 | ||
62 | .Lignore: | 62 | .Lignore: |
63 | subq $7*8,%rsp | 63 | subq $7*8, %rsp |
64 | CFI_ADJUST_CFA_OFFSET 7*8 | 64 | CFI_ADJUST_CFA_OFFSET 7*8 |
65 | movq %rbx,2*8(%rsp) | 65 | movq %rbx, 2*8(%rsp) |
66 | CFI_REL_OFFSET rbx, 2*8 | 66 | CFI_REL_OFFSET rbx, 2*8 |
67 | movq %r12,3*8(%rsp) | 67 | movq %r12, 3*8(%rsp) |
68 | CFI_REL_OFFSET r12, 3*8 | 68 | CFI_REL_OFFSET r12, 3*8 |
69 | movq %r14,4*8(%rsp) | 69 | movq %r14, 4*8(%rsp) |
70 | CFI_REL_OFFSET r14, 4*8 | 70 | CFI_REL_OFFSET r14, 4*8 |
71 | movq %r13,5*8(%rsp) | 71 | movq %r13, 5*8(%rsp) |
72 | CFI_REL_OFFSET r13, 5*8 | 72 | CFI_REL_OFFSET r13, 5*8 |
73 | movq %rbp,6*8(%rsp) | 73 | movq %rbp, 6*8(%rsp) |
74 | CFI_REL_OFFSET rbp, 6*8 | 74 | CFI_REL_OFFSET rbp, 6*8 |
75 | 75 | ||
76 | movq %r8,(%rsp) | 76 | movq %r8, (%rsp) |
77 | movq %r9,1*8(%rsp) | 77 | movq %r9, 1*8(%rsp) |
78 | |||
79 | movl %ecx,%eax | ||
80 | movl %edx,%ecx | ||
81 | 78 | ||
82 | xorl %r9d,%r9d | 79 | movl %ecx, %eax |
83 | movq %rcx,%r12 | 80 | movl %edx, %ecx |
84 | 81 | ||
85 | shrq $6,%r12 | 82 | xorl %r9d, %r9d |
86 | jz .Lhandle_tail /* < 64 */ | 83 | movq %rcx, %r12 |
84 | |||
85 | shrq $6, %r12 | ||
86 | jz .Lhandle_tail /* < 64 */ | ||
87 | 87 | ||
88 | clc | 88 | clc |
89 | 89 | ||
90 | /* main loop. clear in 64 byte blocks */ | 90 | /* main loop. clear in 64 byte blocks */ |
91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | 91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ |
92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ | 92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ |
@@ -94,156 +94,156 @@ ENTRY(csum_partial_copy_generic) | |||
94 | .p2align 4 | 94 | .p2align 4 |
95 | .Lloop: | 95 | .Lloop: |
96 | source | 96 | source |
97 | movq (%rdi),%rbx | 97 | movq (%rdi), %rbx |
98 | source | 98 | source |
99 | movq 8(%rdi),%r8 | 99 | movq 8(%rdi), %r8 |
100 | source | 100 | source |
101 | movq 16(%rdi),%r11 | 101 | movq 16(%rdi), %r11 |
102 | source | 102 | source |
103 | movq 24(%rdi),%rdx | 103 | movq 24(%rdi), %rdx |
104 | 104 | ||
105 | source | 105 | source |
106 | movq 32(%rdi),%r10 | 106 | movq 32(%rdi), %r10 |
107 | source | 107 | source |
108 | movq 40(%rdi),%rbp | 108 | movq 40(%rdi), %rbp |
109 | source | 109 | source |
110 | movq 48(%rdi),%r14 | 110 | movq 48(%rdi), %r14 |
111 | source | 111 | source |
112 | movq 56(%rdi),%r13 | 112 | movq 56(%rdi), %r13 |
113 | 113 | ||
114 | ignore 2f | 114 | ignore 2f |
115 | prefetcht0 5*64(%rdi) | 115 | prefetcht0 5*64(%rdi) |
116 | 2: | 116 | 2: |
117 | adcq %rbx,%rax | 117 | adcq %rbx, %rax |
118 | adcq %r8,%rax | 118 | adcq %r8, %rax |
119 | adcq %r11,%rax | 119 | adcq %r11, %rax |
120 | adcq %rdx,%rax | 120 | adcq %rdx, %rax |
121 | adcq %r10,%rax | 121 | adcq %r10, %rax |
122 | adcq %rbp,%rax | 122 | adcq %rbp, %rax |
123 | adcq %r14,%rax | 123 | adcq %r14, %rax |
124 | adcq %r13,%rax | 124 | adcq %r13, %rax |
125 | 125 | ||
126 | decl %r12d | 126 | decl %r12d |
127 | 127 | ||
128 | dest | 128 | dest |
129 | movq %rbx,(%rsi) | 129 | movq %rbx, (%rsi) |
130 | dest | 130 | dest |
131 | movq %r8,8(%rsi) | 131 | movq %r8, 8(%rsi) |
132 | dest | 132 | dest |
133 | movq %r11,16(%rsi) | 133 | movq %r11, 16(%rsi) |
134 | dest | 134 | dest |
135 | movq %rdx,24(%rsi) | 135 | movq %rdx, 24(%rsi) |
136 | 136 | ||
137 | dest | 137 | dest |
138 | movq %r10,32(%rsi) | 138 | movq %r10, 32(%rsi) |
139 | dest | 139 | dest |
140 | movq %rbp,40(%rsi) | 140 | movq %rbp, 40(%rsi) |
141 | dest | 141 | dest |
142 | movq %r14,48(%rsi) | 142 | movq %r14, 48(%rsi) |
143 | dest | 143 | dest |
144 | movq %r13,56(%rsi) | 144 | movq %r13, 56(%rsi) |
145 | 145 | ||
146 | 3: | 146 | 3: |
147 | |||
148 | leaq 64(%rdi),%rdi | ||
149 | leaq 64(%rsi),%rsi | ||
150 | 147 | ||
151 | jnz .Lloop | 148 | leaq 64(%rdi), %rdi |
149 | leaq 64(%rsi), %rsi | ||
152 | 150 | ||
153 | adcq %r9,%rax | 151 | jnz .Lloop |
154 | 152 | ||
155 | /* do last upto 56 bytes */ | 153 | adcq %r9, %rax |
154 | |||
155 | /* do last up to 56 bytes */ | ||
156 | .Lhandle_tail: | 156 | .Lhandle_tail: |
157 | /* ecx: count */ | 157 | /* ecx: count */ |
158 | movl %ecx,%r10d | 158 | movl %ecx, %r10d |
159 | andl $63,%ecx | 159 | andl $63, %ecx |
160 | shrl $3,%ecx | 160 | shrl $3, %ecx |
161 | jz .Lfold | 161 | jz .Lfold |
162 | clc | 162 | clc |
163 | .p2align 4 | 163 | .p2align 4 |
164 | .Lloop_8: | 164 | .Lloop_8: |
165 | source | 165 | source |
166 | movq (%rdi),%rbx | 166 | movq (%rdi), %rbx |
167 | adcq %rbx,%rax | 167 | adcq %rbx, %rax |
168 | decl %ecx | 168 | decl %ecx |
169 | dest | 169 | dest |
170 | movq %rbx,(%rsi) | 170 | movq %rbx, (%rsi) |
171 | leaq 8(%rsi),%rsi /* preserve carry */ | 171 | leaq 8(%rsi), %rsi /* preserve carry */ |
172 | leaq 8(%rdi),%rdi | 172 | leaq 8(%rdi), %rdi |
173 | jnz .Lloop_8 | 173 | jnz .Lloop_8 |
174 | adcq %r9,%rax /* add in carry */ | 174 | adcq %r9, %rax /* add in carry */ |
175 | 175 | ||
176 | .Lfold: | 176 | .Lfold: |
177 | /* reduce checksum to 32bits */ | 177 | /* reduce checksum to 32bits */ |
178 | movl %eax,%ebx | 178 | movl %eax, %ebx |
179 | shrq $32,%rax | 179 | shrq $32, %rax |
180 | addl %ebx,%eax | 180 | addl %ebx, %eax |
181 | adcl %r9d,%eax | 181 | adcl %r9d, %eax |
182 | 182 | ||
183 | /* do last upto 6 bytes */ | 183 | /* do last up to 6 bytes */ |
184 | .Lhandle_7: | 184 | .Lhandle_7: |
185 | movl %r10d,%ecx | 185 | movl %r10d, %ecx |
186 | andl $7,%ecx | 186 | andl $7, %ecx |
187 | shrl $1,%ecx | 187 | shrl $1, %ecx |
188 | jz .Lhandle_1 | 188 | jz .Lhandle_1 |
189 | movl $2,%edx | 189 | movl $2, %edx |
190 | xorl %ebx,%ebx | 190 | xorl %ebx, %ebx |
191 | clc | 191 | clc |
192 | .p2align 4 | 192 | .p2align 4 |
193 | .Lloop_1: | 193 | .Lloop_1: |
194 | source | 194 | source |
195 | movw (%rdi),%bx | 195 | movw (%rdi), %bx |
196 | adcl %ebx,%eax | 196 | adcl %ebx, %eax |
197 | decl %ecx | 197 | decl %ecx |
198 | dest | 198 | dest |
199 | movw %bx,(%rsi) | 199 | movw %bx, (%rsi) |
200 | leaq 2(%rdi),%rdi | 200 | leaq 2(%rdi), %rdi |
201 | leaq 2(%rsi),%rsi | 201 | leaq 2(%rsi), %rsi |
202 | jnz .Lloop_1 | 202 | jnz .Lloop_1 |
203 | adcl %r9d,%eax /* add in carry */ | 203 | adcl %r9d, %eax /* add in carry */ |
204 | 204 | ||
205 | /* handle last odd byte */ | 205 | /* handle last odd byte */ |
206 | .Lhandle_1: | 206 | .Lhandle_1: |
207 | testl $1,%r10d | 207 | testl $1, %r10d |
208 | jz .Lende | 208 | jz .Lende |
209 | xorl %ebx,%ebx | 209 | xorl %ebx, %ebx |
210 | source | 210 | source |
211 | movb (%rdi),%bl | 211 | movb (%rdi), %bl |
212 | dest | 212 | dest |
213 | movb %bl,(%rsi) | 213 | movb %bl, (%rsi) |
214 | addl %ebx,%eax | 214 | addl %ebx, %eax |
215 | adcl %r9d,%eax /* carry */ | 215 | adcl %r9d, %eax /* carry */ |
216 | 216 | ||
217 | CFI_REMEMBER_STATE | 217 | CFI_REMEMBER_STATE |
218 | .Lende: | 218 | .Lende: |
219 | movq 2*8(%rsp),%rbx | 219 | movq 2*8(%rsp), %rbx |
220 | CFI_RESTORE rbx | 220 | CFI_RESTORE rbx |
221 | movq 3*8(%rsp),%r12 | 221 | movq 3*8(%rsp), %r12 |
222 | CFI_RESTORE r12 | 222 | CFI_RESTORE r12 |
223 | movq 4*8(%rsp),%r14 | 223 | movq 4*8(%rsp), %r14 |
224 | CFI_RESTORE r14 | 224 | CFI_RESTORE r14 |
225 | movq 5*8(%rsp),%r13 | 225 | movq 5*8(%rsp), %r13 |
226 | CFI_RESTORE r13 | 226 | CFI_RESTORE r13 |
227 | movq 6*8(%rsp),%rbp | 227 | movq 6*8(%rsp), %rbp |
228 | CFI_RESTORE rbp | 228 | CFI_RESTORE rbp |
229 | addq $7*8,%rsp | 229 | addq $7*8, %rsp |
230 | CFI_ADJUST_CFA_OFFSET -7*8 | 230 | CFI_ADJUST_CFA_OFFSET -7*8 |
231 | ret | 231 | ret |
232 | CFI_RESTORE_STATE | 232 | CFI_RESTORE_STATE |
233 | 233 | ||
234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | 234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ |
235 | .Lbad_source: | 235 | .Lbad_source: |
236 | movq (%rsp),%rax | 236 | movq (%rsp), %rax |
237 | testq %rax,%rax | 237 | testq %rax, %rax |
238 | jz .Lende | 238 | jz .Lende |
239 | movl $-EFAULT,(%rax) | 239 | movl $-EFAULT, (%rax) |
240 | jmp .Lende | 240 | jmp .Lende |
241 | 241 | ||
242 | .Lbad_dest: | 242 | .Lbad_dest: |
243 | movq 8(%rsp),%rax | 243 | movq 8(%rsp), %rax |
244 | testq %rax,%rax | 244 | testq %rax, %rax |
245 | jz .Lende | 245 | jz .Lende |
246 | movl $-EFAULT,(%rax) | 246 | movl $-EFAULT, (%rax) |
247 | jmp .Lende | 247 | jmp .Lende |
248 | CFI_ENDPROC | 248 | CFI_ENDPROC |
249 | ENDPROC(csum_partial_copy_generic) | 249 | ENDPROC(csum_partial_copy_generic) |
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index bf51144d97e1..9845371c5c36 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c | |||
@@ -84,7 +84,7 @@ static unsigned do_csum(const unsigned char *buff, unsigned len) | |||
84 | count64--; | 84 | count64--; |
85 | } | 85 | } |
86 | 86 | ||
87 | /* last upto 7 8byte blocks */ | 87 | /* last up to 7 8byte blocks */ |
88 | count %= 8; | 88 | count %= 8; |
89 | while (count) { | 89 | while (count) { |
90 | asm("addq %1,%0\n\t" | 90 | asm("addq %1,%0\n\t" |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 069ce7c37c01..d4203988504a 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -326,7 +326,7 @@ try_again: | |||
326 | if (mm->free_area_cache < len) | 326 | if (mm->free_area_cache < len) |
327 | goto fail; | 327 | goto fail; |
328 | 328 | ||
329 | /* either no address requested or cant fit in requested address hole */ | 329 | /* either no address requested or can't fit in requested address hole */ |
330 | addr = (mm->free_area_cache - len) & huge_page_mask(h); | 330 | addr = (mm->free_area_cache - len) & huge_page_mask(h); |
331 | do { | 331 | do { |
332 | /* | 332 | /* |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 73ad7ebd6e9c..80088f994193 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -917,7 +917,7 @@ static void mark_nxdata_nx(void) | |||
917 | { | 917 | { |
918 | /* | 918 | /* |
919 | * When this called, init has already been executed and released, | 919 | * When this called, init has already been executed and released, |
920 | * so everything past _etext sould be NX. | 920 | * so everything past _etext should be NX. |
921 | */ | 921 | */ |
922 | unsigned long start = PFN_ALIGN(_etext); | 922 | unsigned long start = PFN_ALIGN(_etext); |
923 | /* | 923 | /* |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a08a62cb136e..794233587287 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -51,6 +51,8 @@ | |||
51 | #include <asm/numa.h> | 51 | #include <asm/numa.h> |
52 | #include <asm/cacheflush.h> | 52 | #include <asm/cacheflush.h> |
53 | #include <asm/init.h> | 53 | #include <asm/init.h> |
54 | #include <asm/uv/uv.h> | ||
55 | #include <asm/setup.h> | ||
54 | 56 | ||
55 | static int __init parse_direct_gbpages_off(char *arg) | 57 | static int __init parse_direct_gbpages_off(char *arg) |
56 | { | 58 | { |
@@ -293,18 +295,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) | |||
293 | * to the compile time generated pmds. This results in invalid pmds up | 295 | * to the compile time generated pmds. This results in invalid pmds up |
294 | * to the point where we hit the physaddr 0 mapping. | 296 | * to the point where we hit the physaddr 0 mapping. |
295 | * | 297 | * |
296 | * We limit the mappings to the region from _text to _end. _end is | 298 | * We limit the mappings to the region from _text to _brk_end. _brk_end |
297 | * rounded up to the 2MB boundary. This catches the invalid pmds as | 299 | * is rounded up to the 2MB boundary. This catches the invalid pmds as |
298 | * well, as they are located before _text: | 300 | * well, as they are located before _text: |
299 | */ | 301 | */ |
300 | void __init cleanup_highmap(void) | 302 | void __init cleanup_highmap(void) |
301 | { | 303 | { |
302 | unsigned long vaddr = __START_KERNEL_map; | 304 | unsigned long vaddr = __START_KERNEL_map; |
303 | unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1; | 305 | unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); |
306 | unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; | ||
304 | pmd_t *pmd = level2_kernel_pgt; | 307 | pmd_t *pmd = level2_kernel_pgt; |
305 | pmd_t *last_pmd = pmd + PTRS_PER_PMD; | ||
306 | 308 | ||
307 | for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { | 309 | for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { |
308 | if (pmd_none(*pmd)) | 310 | if (pmd_none(*pmd)) |
309 | continue; | 311 | continue; |
310 | if (vaddr < (unsigned long) _text || vaddr > end) | 312 | if (vaddr < (unsigned long) _text || vaddr > end) |
@@ -860,18 +862,18 @@ static struct vm_area_struct gate_vma = { | |||
860 | .vm_flags = VM_READ | VM_EXEC | 862 | .vm_flags = VM_READ | VM_EXEC |
861 | }; | 863 | }; |
862 | 864 | ||
863 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | 865 | struct vm_area_struct *get_gate_vma(struct mm_struct *mm) |
864 | { | 866 | { |
865 | #ifdef CONFIG_IA32_EMULATION | 867 | #ifdef CONFIG_IA32_EMULATION |
866 | if (test_tsk_thread_flag(tsk, TIF_IA32)) | 868 | if (!mm || mm->context.ia32_compat) |
867 | return NULL; | 869 | return NULL; |
868 | #endif | 870 | #endif |
869 | return &gate_vma; | 871 | return &gate_vma; |
870 | } | 872 | } |
871 | 873 | ||
872 | int in_gate_area(struct task_struct *task, unsigned long addr) | 874 | int in_gate_area(struct mm_struct *mm, unsigned long addr) |
873 | { | 875 | { |
874 | struct vm_area_struct *vma = get_gate_vma(task); | 876 | struct vm_area_struct *vma = get_gate_vma(mm); |
875 | 877 | ||
876 | if (!vma) | 878 | if (!vma) |
877 | return 0; | 879 | return 0; |
@@ -880,11 +882,11 @@ int in_gate_area(struct task_struct *task, unsigned long addr) | |||
880 | } | 882 | } |
881 | 883 | ||
882 | /* | 884 | /* |
883 | * Use this when you have no reliable task/vma, typically from interrupt | 885 | * Use this when you have no reliable mm, typically from interrupt |
884 | * context. It is less reliable than using the task's vma and may give | 886 | * context. It is less reliable than using a task's mm and may give |
885 | * false positives: | 887 | * false positives. |
886 | */ | 888 | */ |
887 | int in_gate_area_no_task(unsigned long addr) | 889 | int in_gate_area_no_mm(unsigned long addr) |
888 | { | 890 | { |
889 | return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); | 891 | return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); |
890 | } | 892 | } |
@@ -898,6 +900,19 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
898 | return NULL; | 900 | return NULL; |
899 | } | 901 | } |
900 | 902 | ||
903 | #ifdef CONFIG_X86_UV | ||
904 | #define MIN_MEMORY_BLOCK_SIZE (1 << SECTION_SIZE_BITS) | ||
905 | |||
906 | unsigned long memory_block_size_bytes(void) | ||
907 | { | ||
908 | if (is_uv_system()) { | ||
909 | printk(KERN_INFO "UV: memory block size 2GB\n"); | ||
910 | return 2UL * 1024 * 1024 * 1024; | ||
911 | } | ||
912 | return MIN_MEMORY_BLOCK_SIZE; | ||
913 | } | ||
914 | #endif | ||
915 | |||
901 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 916 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
902 | /* | 917 | /* |
903 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. | 918 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9ec0f209a6a4..e8c00cc72033 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -446,7 +446,7 @@ static int __init numa_alloc_distance(void) | |||
446 | * @distance: NUMA distance | 446 | * @distance: NUMA distance |
447 | * | 447 | * |
448 | * Set the distance from node @from to @to to @distance. If distance table | 448 | * Set the distance from node @from to @to to @distance. If distance table |
449 | * doesn't exist, one which is large enough to accomodate all the currently | 449 | * doesn't exist, one which is large enough to accommodate all the currently |
450 | * known nodes will be created. | 450 | * known nodes will be created. |
451 | * | 451 | * |
452 | * If such table cannot be allocated, a warning is printed and further | 452 | * If such table cannot be allocated, a warning is printed and further |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 90825f2eb0f4..f9e526742fa1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -310,7 +310,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
310 | * these shared mappings are made of small page mappings. | 310 | * these shared mappings are made of small page mappings. |
311 | * Thus this don't enforce !RW mapping for small page kernel | 311 | * Thus this don't enforce !RW mapping for small page kernel |
312 | * text mapping logic will help Linux Xen parvirt guest boot | 312 | * text mapping logic will help Linux Xen parvirt guest boot |
313 | * aswell. | 313 | * as well. |
314 | */ | 314 | */ |
315 | if (lookup_address(address, &level) && (level != PG_LEVEL_4K)) | 315 | if (lookup_address(address, &level) && (level != PG_LEVEL_4K)) |
316 | pgprot_val(forbidden) |= _PAGE_RW; | 316 | pgprot_val(forbidden) |= _PAGE_RW; |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 0113d19c8aa6..8573b83a63d0 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -168,8 +168,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) | |||
168 | * section 8.1: in PAE mode we explicitly have to flush the | 168 | * section 8.1: in PAE mode we explicitly have to flush the |
169 | * TLB via cr3 if the top-level pgd is changed... | 169 | * TLB via cr3 if the top-level pgd is changed... |
170 | */ | 170 | */ |
171 | if (mm == current->active_mm) | 171 | flush_tlb_mm(mm); |
172 | write_cr3(read_cr3()); | ||
173 | } | 172 | } |
174 | #else /* !CONFIG_X86_PAE */ | 173 | #else /* !CONFIG_X86_PAE */ |
175 | 174 | ||
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 72cbec14d783..2d49d4e19a36 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) | |||
126 | if (!user_mode_vm(regs)) { | 126 | if (!user_mode_vm(regs)) { |
127 | unsigned long stack = kernel_stack_pointer(regs); | 127 | unsigned long stack = kernel_stack_pointer(regs); |
128 | if (depth) | 128 | if (depth) |
129 | dump_trace(NULL, regs, (unsigned long *)stack, | 129 | dump_trace(NULL, regs, (unsigned long *)stack, 0, |
130 | &backtrace_ops, &depth); | 130 | &backtrace_ops, &depth); |
131 | return; | 131 | return; |
132 | } | 132 | } |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 9fadec074142..98ab13058f89 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -50,7 +50,7 @@ static inline void setup_num_counters(void) | |||
50 | #endif | 50 | #endif |
51 | } | 51 | } |
52 | 52 | ||
53 | static int inline addr_increment(void) | 53 | static inline int addr_increment(void) |
54 | { | 54 | { |
55 | #ifdef CONFIG_SMP | 55 | #ifdef CONFIG_SMP |
56 | return smp_num_siblings == 2 ? 2 : 1; | 56 | return smp_num_siblings == 2 ? 2 : 1; |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index b1805b78842f..494f2e7ea2b4 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -241,7 +241,7 @@ void __init pcibios_resource_survey(void) | |||
241 | e820_reserve_resources_late(); | 241 | e820_reserve_resources_late(); |
242 | /* | 242 | /* |
243 | * Insert the IO APIC resources after PCI initialization has | 243 | * Insert the IO APIC resources after PCI initialization has |
244 | * occured to handle IO APICS that are mapped in on a BAR in | 244 | * occurred to handle IO APICS that are mapped in on a BAR in |
245 | * PCI space, but before trying to assign unassigned pci res. | 245 | * PCI space, but before trying to assign unassigned pci res. |
246 | */ | 246 | */ |
247 | ioapic_insert_resources(); | 247 | ioapic_insert_resources(); |
@@ -304,7 +304,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | |||
304 | /* | 304 | /* |
305 | * ioremap() and ioremap_nocache() defaults to UC MINUS for now. | 305 | * ioremap() and ioremap_nocache() defaults to UC MINUS for now. |
306 | * To avoid attribute conflicts, request UC MINUS here | 306 | * To avoid attribute conflicts, request UC MINUS here |
307 | * aswell. | 307 | * as well. |
308 | */ | 308 | */ |
309 | prot |= _PAGE_CACHE_UC_MINUS; | 309 | prot |= _PAGE_CACHE_UC_MINUS; |
310 | 310 | ||
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 87e6c8323117..8201165bae28 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -597,21 +597,18 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
597 | return 1; | 597 | return 1; |
598 | } | 598 | } |
599 | 599 | ||
600 | if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN) && | 600 | if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN && |
601 | (device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX)) { | 601 | device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX) |
602 | || (device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN && | ||
603 | device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX) | ||
604 | || (device >= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN && | ||
605 | device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX)) { | ||
602 | r->name = "PIIX/ICH"; | 606 | r->name = "PIIX/ICH"; |
603 | r->get = pirq_piix_get; | 607 | r->get = pirq_piix_get; |
604 | r->set = pirq_piix_set; | 608 | r->set = pirq_piix_set; |
605 | return 1; | 609 | return 1; |
606 | } | 610 | } |
607 | 611 | ||
608 | if ((device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN) && | ||
609 | (device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX)) { | ||
610 | r->name = "PIIX/ICH"; | ||
611 | r->get = pirq_piix_get; | ||
612 | r->set = pirq_piix_set; | ||
613 | return 1; | ||
614 | } | ||
615 | return 0; | 612 | return 0; |
616 | } | 613 | } |
617 | 614 | ||
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 8c4085a95ef1..e37b407a0ee8 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c | |||
@@ -50,7 +50,7 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, | |||
50 | name = "ioapic-level"; | 50 | name = "ioapic-level"; |
51 | } | 51 | } |
52 | 52 | ||
53 | irq = xen_map_pirq_gsi(map_irq.pirq, gsi, shareable, name); | 53 | irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name); |
54 | 54 | ||
55 | printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); | 55 | printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); |
56 | 56 | ||
@@ -237,6 +237,7 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev) | |||
237 | { | 237 | { |
238 | int rc; | 238 | int rc; |
239 | int share = 1; | 239 | int share = 1; |
240 | int pirq; | ||
240 | u8 gsi; | 241 | u8 gsi; |
241 | 242 | ||
242 | rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); | 243 | rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); |
@@ -246,13 +247,21 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev) | |||
246 | return rc; | 247 | return rc; |
247 | } | 248 | } |
248 | 249 | ||
250 | rc = xen_allocate_pirq_gsi(gsi); | ||
251 | if (rc < 0) { | ||
252 | dev_warn(&dev->dev, "Xen PCI: failed to allocate a PIRQ for GSI%d: %d\n", | ||
253 | gsi, rc); | ||
254 | return rc; | ||
255 | } | ||
256 | pirq = rc; | ||
257 | |||
249 | if (gsi < NR_IRQS_LEGACY) | 258 | if (gsi < NR_IRQS_LEGACY) |
250 | share = 0; | 259 | share = 0; |
251 | 260 | ||
252 | rc = xen_allocate_pirq(gsi, share, "pcifront"); | 261 | rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront"); |
253 | if (rc < 0) { | 262 | if (rc < 0) { |
254 | dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n", | 263 | dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n", |
255 | gsi, rc); | 264 | gsi, pirq, rc); |
256 | return rc; | 265 | return rc; |
257 | } | 266 | } |
258 | 267 | ||
@@ -309,7 +318,7 @@ int __init pci_xen_hvm_init(void) | |||
309 | #ifdef CONFIG_XEN_DOM0 | 318 | #ifdef CONFIG_XEN_DOM0 |
310 | static int xen_register_pirq(u32 gsi, int triggering) | 319 | static int xen_register_pirq(u32 gsi, int triggering) |
311 | { | 320 | { |
312 | int rc, irq; | 321 | int rc, pirq, irq = -1; |
313 | struct physdev_map_pirq map_irq; | 322 | struct physdev_map_pirq map_irq; |
314 | int shareable = 0; | 323 | int shareable = 0; |
315 | char *name; | 324 | char *name; |
@@ -325,17 +334,20 @@ static int xen_register_pirq(u32 gsi, int triggering) | |||
325 | name = "ioapic-level"; | 334 | name = "ioapic-level"; |
326 | } | 335 | } |
327 | 336 | ||
328 | irq = xen_allocate_pirq(gsi, shareable, name); | 337 | pirq = xen_allocate_pirq_gsi(gsi); |
329 | 338 | if (pirq < 0) | |
330 | printk(KERN_DEBUG "xen: --> irq=%d\n", irq); | 339 | goto out; |
331 | 340 | ||
341 | irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name); | ||
332 | if (irq < 0) | 342 | if (irq < 0) |
333 | goto out; | 343 | goto out; |
334 | 344 | ||
345 | printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d\n", pirq, irq); | ||
346 | |||
335 | map_irq.domid = DOMID_SELF; | 347 | map_irq.domid = DOMID_SELF; |
336 | map_irq.type = MAP_PIRQ_TYPE_GSI; | 348 | map_irq.type = MAP_PIRQ_TYPE_GSI; |
337 | map_irq.index = gsi; | 349 | map_irq.index = gsi; |
338 | map_irq.pirq = irq; | 350 | map_irq.pirq = pirq; |
339 | 351 | ||
340 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); | 352 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); |
341 | if (rc) { | 353 | if (rc) { |
@@ -422,13 +434,18 @@ static int __init pci_xen_initial_domain(void) | |||
422 | 434 | ||
423 | void __init xen_setup_pirqs(void) | 435 | void __init xen_setup_pirqs(void) |
424 | { | 436 | { |
425 | int irq; | 437 | int pirq, irq; |
426 | 438 | ||
427 | pci_xen_initial_domain(); | 439 | pci_xen_initial_domain(); |
428 | 440 | ||
429 | if (0 == nr_ioapics) { | 441 | if (0 == nr_ioapics) { |
430 | for (irq = 0; irq < NR_IRQS_LEGACY; irq++) | 442 | for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { |
431 | xen_allocate_pirq(irq, 0, "xt-pic"); | 443 | pirq = xen_allocate_pirq_gsi(irq); |
444 | if (WARN(pirq < 0, | ||
445 | "Could not allocate PIRQ for legacy interrupt\n")) | ||
446 | break; | ||
447 | irq = xen_bind_pirq_gsi_to_irq(irq, pirq, 0, "xt-pic"); | ||
448 | } | ||
432 | return; | 449 | return; |
433 | } | 450 | } |
434 | 451 | ||
diff --git a/arch/x86/platform/olpc/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c index 127775696d6c..99513642a0e6 100644 --- a/arch/x86/platform/olpc/olpc-xo1.c +++ b/arch/x86/platform/olpc/olpc-xo1.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/platform_device.h> | 16 | #include <linux/platform_device.h> |
17 | #include <linux/pm.h> | 17 | #include <linux/pm.h> |
18 | #include <linux/mfd/core.h> | ||
18 | 19 | ||
19 | #include <asm/io.h> | 20 | #include <asm/io.h> |
20 | #include <asm/olpc.h> | 21 | #include <asm/olpc.h> |
@@ -56,25 +57,24 @@ static void xo1_power_off(void) | |||
56 | static int __devinit olpc_xo1_probe(struct platform_device *pdev) | 57 | static int __devinit olpc_xo1_probe(struct platform_device *pdev) |
57 | { | 58 | { |
58 | struct resource *res; | 59 | struct resource *res; |
60 | int err; | ||
59 | 61 | ||
60 | /* don't run on non-XOs */ | 62 | /* don't run on non-XOs */ |
61 | if (!machine_is_olpc()) | 63 | if (!machine_is_olpc()) |
62 | return -ENODEV; | 64 | return -ENODEV; |
63 | 65 | ||
66 | err = mfd_cell_enable(pdev); | ||
67 | if (err) | ||
68 | return err; | ||
69 | |||
64 | res = platform_get_resource(pdev, IORESOURCE_IO, 0); | 70 | res = platform_get_resource(pdev, IORESOURCE_IO, 0); |
65 | if (!res) { | 71 | if (!res) { |
66 | dev_err(&pdev->dev, "can't fetch device resource info\n"); | 72 | dev_err(&pdev->dev, "can't fetch device resource info\n"); |
67 | return -EIO; | 73 | return -EIO; |
68 | } | 74 | } |
69 | 75 | if (strcmp(pdev->name, "olpc-xo1-pms") == 0) | |
70 | if (!request_region(res->start, resource_size(res), DRV_NAME)) { | ||
71 | dev_err(&pdev->dev, "can't request region\n"); | ||
72 | return -EIO; | ||
73 | } | ||
74 | |||
75 | if (strcmp(pdev->name, "cs5535-pms") == 0) | ||
76 | pms_base = res->start; | 76 | pms_base = res->start; |
77 | else if (strcmp(pdev->name, "cs5535-acpi") == 0) | 77 | else if (strcmp(pdev->name, "olpc-xo1-ac-acpi") == 0) |
78 | acpi_base = res->start; | 78 | acpi_base = res->start; |
79 | 79 | ||
80 | /* If we have both addresses, we can override the poweroff hook */ | 80 | /* If we have both addresses, we can override the poweroff hook */ |
@@ -88,14 +88,11 @@ static int __devinit olpc_xo1_probe(struct platform_device *pdev) | |||
88 | 88 | ||
89 | static int __devexit olpc_xo1_remove(struct platform_device *pdev) | 89 | static int __devexit olpc_xo1_remove(struct platform_device *pdev) |
90 | { | 90 | { |
91 | struct resource *r; | 91 | mfd_cell_disable(pdev); |
92 | |||
93 | r = platform_get_resource(pdev, IORESOURCE_IO, 0); | ||
94 | release_region(r->start, resource_size(r)); | ||
95 | 92 | ||
96 | if (strcmp(pdev->name, "cs5535-pms") == 0) | 93 | if (strcmp(pdev->name, "olpc-xo1-pms") == 0) |
97 | pms_base = 0; | 94 | pms_base = 0; |
98 | else if (strcmp(pdev->name, "cs5535-acpi") == 0) | 95 | else if (strcmp(pdev->name, "olpc-xo1-acpi") == 0) |
99 | acpi_base = 0; | 96 | acpi_base = 0; |
100 | 97 | ||
101 | pm_power_off = NULL; | 98 | pm_power_off = NULL; |
@@ -104,7 +101,7 @@ static int __devexit olpc_xo1_remove(struct platform_device *pdev) | |||
104 | 101 | ||
105 | static struct platform_driver cs5535_pms_drv = { | 102 | static struct platform_driver cs5535_pms_drv = { |
106 | .driver = { | 103 | .driver = { |
107 | .name = "cs5535-pms", | 104 | .name = "olpc-xo1-pms", |
108 | .owner = THIS_MODULE, | 105 | .owner = THIS_MODULE, |
109 | }, | 106 | }, |
110 | .probe = olpc_xo1_probe, | 107 | .probe = olpc_xo1_probe, |
@@ -113,7 +110,7 @@ static struct platform_driver cs5535_pms_drv = { | |||
113 | 110 | ||
114 | static struct platform_driver cs5535_acpi_drv = { | 111 | static struct platform_driver cs5535_acpi_drv = { |
115 | .driver = { | 112 | .driver = { |
116 | .name = "cs5535-acpi", | 113 | .name = "olpc-xo1-acpi", |
117 | .owner = THIS_MODULE, | 114 | .owner = THIS_MODULE, |
118 | }, | 115 | }, |
119 | .probe = olpc_xo1_probe, | 116 | .probe = olpc_xo1_probe, |
@@ -124,26 +121,27 @@ static int __init olpc_xo1_init(void) | |||
124 | { | 121 | { |
125 | int r; | 122 | int r; |
126 | 123 | ||
127 | r = platform_driver_register(&cs5535_pms_drv); | 124 | r = mfd_shared_platform_driver_register(&cs5535_pms_drv, "cs5535-pms"); |
128 | if (r) | 125 | if (r) |
129 | return r; | 126 | return r; |
130 | 127 | ||
131 | r = platform_driver_register(&cs5535_acpi_drv); | 128 | r = mfd_shared_platform_driver_register(&cs5535_acpi_drv, |
129 | "cs5535-acpi"); | ||
132 | if (r) | 130 | if (r) |
133 | platform_driver_unregister(&cs5535_pms_drv); | 131 | mfd_shared_platform_driver_unregister(&cs5535_pms_drv); |
134 | 132 | ||
135 | return r; | 133 | return r; |
136 | } | 134 | } |
137 | 135 | ||
138 | static void __exit olpc_xo1_exit(void) | 136 | static void __exit olpc_xo1_exit(void) |
139 | { | 137 | { |
140 | platform_driver_unregister(&cs5535_acpi_drv); | 138 | mfd_shared_platform_driver_unregister(&cs5535_acpi_drv); |
141 | platform_driver_unregister(&cs5535_pms_drv); | 139 | mfd_shared_platform_driver_unregister(&cs5535_pms_drv); |
142 | } | 140 | } |
143 | 141 | ||
144 | MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); | 142 | MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); |
145 | MODULE_LICENSE("GPL"); | 143 | MODULE_LICENSE("GPL"); |
146 | MODULE_ALIAS("platform:olpc-xo1"); | 144 | MODULE_ALIAS("platform:cs5535-pms"); |
147 | 145 | ||
148 | module_init(olpc_xo1_init); | 146 | module_init(olpc_xo1_init); |
149 | module_exit(olpc_xo1_exit); | 147 | module_exit(olpc_xo1_exit); |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 36df991985b2..468d591dde31 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -417,24 +417,25 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
417 | return NULL; | 417 | return NULL; |
418 | } | 418 | } |
419 | 419 | ||
420 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | 420 | struct vm_area_struct *get_gate_vma(struct mm_struct *mm) |
421 | { | 421 | { |
422 | struct mm_struct *mm = tsk->mm; | 422 | /* |
423 | 423 | * Check to see if the corresponding task was created in compat vdso | |
424 | /* Check to see if this task was created in compat vdso mode */ | 424 | * mode. |
425 | */ | ||
425 | if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) | 426 | if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) |
426 | return &gate_vma; | 427 | return &gate_vma; |
427 | return NULL; | 428 | return NULL; |
428 | } | 429 | } |
429 | 430 | ||
430 | int in_gate_area(struct task_struct *task, unsigned long addr) | 431 | int in_gate_area(struct mm_struct *mm, unsigned long addr) |
431 | { | 432 | { |
432 | const struct vm_area_struct *vma = get_gate_vma(task); | 433 | const struct vm_area_struct *vma = get_gate_vma(mm); |
433 | 434 | ||
434 | return vma && addr >= vma->vm_start && addr < vma->vm_end; | 435 | return vma && addr >= vma->vm_start && addr < vma->vm_end; |
435 | } | 436 | } |
436 | 437 | ||
437 | int in_gate_area_no_task(unsigned long addr) | 438 | int in_gate_area_no_mm(unsigned long addr) |
438 | { | 439 | { |
439 | return 0; | 440 | return 0; |
440 | } | 441 | } |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index e4343fe488ed..1c7121ba18ff 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -38,7 +38,7 @@ config XEN_MAX_DOMAIN_MEMORY | |||
38 | 38 | ||
39 | config XEN_SAVE_RESTORE | 39 | config XEN_SAVE_RESTORE |
40 | bool | 40 | bool |
41 | depends on XEN && PM | 41 | depends on XEN |
42 | default y | 42 | default y |
43 | 43 | ||
44 | config XEN_DEBUG_FS | 44 | config XEN_DEBUG_FS |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3f6f3347aa17..c82df6c9c0f0 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -79,8 +79,7 @@ | |||
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Protects atomic reservation decrease/increase against concurrent increases. | 81 | * Protects atomic reservation decrease/increase against concurrent increases. |
82 | * Also protects non-atomic updates of current_pages and driver_pages, and | 82 | * Also protects non-atomic updates of current_pages and balloon lists. |
83 | * balloon lists. | ||
84 | */ | 83 | */ |
85 | DEFINE_SPINLOCK(xen_reservation_lock); | 84 | DEFINE_SPINLOCK(xen_reservation_lock); |
86 | 85 | ||
@@ -1488,10 +1487,12 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1488 | /* | 1487 | /* |
1489 | * If the new pfn is within the range of the newly allocated | 1488 | * If the new pfn is within the range of the newly allocated |
1490 | * kernel pagetable, and it isn't being mapped into an | 1489 | * kernel pagetable, and it isn't being mapped into an |
1491 | * early_ioremap fixmap slot, make sure it is RO. | 1490 | * early_ioremap fixmap slot as a freshly allocated page, make sure |
1491 | * it is RO. | ||
1492 | */ | 1492 | */ |
1493 | if (!is_early_ioremap_ptep(ptep) && | 1493 | if (((!is_early_ioremap_ptep(ptep) && |
1494 | pfn >= pgt_buf_start && pfn < pgt_buf_end) | 1494 | pfn >= pgt_buf_start && pfn < pgt_buf_end)) || |
1495 | (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) | ||
1495 | pte = pte_wrprotect(pte); | 1496 | pte = pte_wrprotect(pte); |
1496 | 1497 | ||
1497 | return pte; | 1498 | return pte; |
@@ -1701,9 +1702,6 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1701 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { | 1702 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { |
1702 | pte_t pte; | 1703 | pte_t pte; |
1703 | 1704 | ||
1704 | if (pfn > max_pfn_mapped) | ||
1705 | max_pfn_mapped = pfn; | ||
1706 | |||
1707 | if (!pte_none(pte_page[pteidx])) | 1705 | if (!pte_none(pte_page[pteidx])) |
1708 | continue; | 1706 | continue; |
1709 | 1707 | ||
@@ -1745,7 +1743,7 @@ static void convert_pfn_mfn(void *v) | |||
1745 | } | 1743 | } |
1746 | 1744 | ||
1747 | /* | 1745 | /* |
1748 | * Set up the inital kernel pagetable. | 1746 | * Set up the initial kernel pagetable. |
1749 | * | 1747 | * |
1750 | * We can construct this by grafting the Xen provided pagetable into | 1748 | * We can construct this by grafting the Xen provided pagetable into |
1751 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into | 1749 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into |
@@ -1761,6 +1759,12 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1761 | pud_t *l3; | 1759 | pud_t *l3; |
1762 | pmd_t *l2; | 1760 | pmd_t *l2; |
1763 | 1761 | ||
1762 | /* max_pfn_mapped is the last pfn mapped in the initial memory | ||
1763 | * mappings. Considering that on Xen after the kernel mappings we | ||
1764 | * have the mappings of some pages that don't exist in pfn space, we | ||
1765 | * set max_pfn_mapped to the last real pfn mapped. */ | ||
1766 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); | ||
1767 | |||
1764 | /* Zap identity mapping */ | 1768 | /* Zap identity mapping */ |
1765 | init_level4_pgt[0] = __pgd(0); | 1769 | init_level4_pgt[0] = __pgd(0); |
1766 | 1770 | ||
@@ -1865,9 +1869,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1865 | initial_kernel_pmd = | 1869 | initial_kernel_pmd = |
1866 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | 1870 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); |
1867 | 1871 | ||
1868 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + | 1872 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); |
1869 | xen_start_info->nr_pt_frames * PAGE_SIZE + | ||
1870 | 512*1024); | ||
1871 | 1873 | ||
1872 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | 1874 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); |
1873 | memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | 1875 | memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); |