diff options
Diffstat (limited to 'arch/x86')
51 files changed, 1073 insertions, 522 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f1304d38aa21..7d5feb5908dd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -261,6 +261,9 @@ config ARCH_HWEIGHT_CFLAGS | |||
261 | config ARCH_SUPPORTS_UPROBES | 261 | config ARCH_SUPPORTS_UPROBES |
262 | def_bool y | 262 | def_bool y |
263 | 263 | ||
264 | config FIX_EARLYCON_MEM | ||
265 | def_bool y | ||
266 | |||
264 | source "init/Kconfig" | 267 | source "init/Kconfig" |
265 | source "kernel/Kconfig.freezer" | 268 | source "kernel/Kconfig.freezer" |
266 | 269 | ||
@@ -415,7 +418,6 @@ config X86_UV | |||
415 | 418 | ||
416 | config X86_GOLDFISH | 419 | config X86_GOLDFISH |
417 | bool "Goldfish (Virtual Platform)" | 420 | bool "Goldfish (Virtual Platform)" |
418 | depends on X86_32 | ||
419 | depends on X86_EXTENDED_PLATFORM | 421 | depends on X86_EXTENDED_PLATFORM |
420 | ---help--- | 422 | ---help--- |
421 | Enable support for the Goldfish virtual platform used primarily | 423 | Enable support for the Goldfish virtual platform used primarily |
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index f3c57e341402..00e788be1db9 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c | |||
@@ -1,9 +1,5 @@ | |||
1 | #include "misc.h" | ||
2 | #include "../string.c" | 1 | #include "../string.c" |
3 | 2 | ||
4 | /* misc.h might pull in string_32.h which has a macro for memcpy. undef that */ | ||
5 | #undef memcpy | ||
6 | |||
7 | #ifdef CONFIG_X86_32 | 3 | #ifdef CONFIG_X86_32 |
8 | void *memcpy(void *dest, const void *src, size_t n) | 4 | void *memcpy(void *dest, const void *src, size_t n) |
9 | { | 5 | { |
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 5339040ef86e..493f3fd9f139 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c | |||
@@ -12,14 +12,9 @@ | |||
12 | * Very basic string functions | 12 | * Very basic string functions |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include "boot.h" | 15 | #include <linux/types.h> |
16 | #include "ctype.h" | ||
16 | 17 | ||
17 | /* | ||
18 | * This file gets included in compressed/string.c which might pull in | ||
19 | * string_32.h and which in turn maps memcmp to __builtin_memcmp(). Undo | ||
20 | * that first. | ||
21 | */ | ||
22 | #undef memcmp | ||
23 | int memcmp(const void *s1, const void *s2, size_t len) | 18 | int memcmp(const void *s1, const void *s2, size_t len) |
24 | { | 19 | { |
25 | u8 diff; | 20 | u8 diff; |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 619e7f7426c6..32d2e7056c87 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -244,7 +244,6 @@ CONFIG_HID_TOPSEED=y | |||
244 | CONFIG_HID_PID=y | 244 | CONFIG_HID_PID=y |
245 | CONFIG_USB_HIDDEV=y | 245 | CONFIG_USB_HIDDEV=y |
246 | CONFIG_USB=y | 246 | CONFIG_USB=y |
247 | CONFIG_USB_DEBUG=y | ||
248 | CONFIG_USB_ANNOUNCE_NEW_DEVICES=y | 247 | CONFIG_USB_ANNOUNCE_NEW_DEVICES=y |
249 | CONFIG_USB_MON=y | 248 | CONFIG_USB_MON=y |
250 | CONFIG_USB_EHCI_HCD=y | 249 | CONFIG_USB_EHCI_HCD=y |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 6181c69b786b..a481dd4755d5 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -239,7 +239,6 @@ CONFIG_HID_TOPSEED=y | |||
239 | CONFIG_HID_PID=y | 239 | CONFIG_HID_PID=y |
240 | CONFIG_USB_HIDDEV=y | 240 | CONFIG_USB_HIDDEV=y |
241 | CONFIG_USB=y | 241 | CONFIG_USB=y |
242 | CONFIG_USB_DEBUG=y | ||
243 | CONFIG_USB_ANNOUNCE_NEW_DEVICES=y | 242 | CONFIG_USB_ANNOUNCE_NEW_DEVICES=y |
244 | CONFIG_USB_MON=y | 243 | CONFIG_USB_MON=y |
245 | CONFIG_USB_EHCI_HCD=y | 244 | CONFIG_USB_EHCI_HCD=y |
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index b17f4f48ecd7..6dd1c7dd0473 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <asm/alternative.h> | 7 | #include <asm/alternative.h> |
8 | #include <asm/cmpxchg.h> | 8 | #include <asm/cmpxchg.h> |
9 | #include <asm/rmwcc.h> | 9 | #include <asm/rmwcc.h> |
10 | #include <asm/barrier.h> | ||
10 | 11 | ||
11 | /* | 12 | /* |
12 | * Atomic operations that C can't guarantee us. Useful for | 13 | * Atomic operations that C can't guarantee us. Useful for |
@@ -243,12 +244,6 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2) | |||
243 | : : "r" ((unsigned)(mask)), "m" (*(addr)) \ | 244 | : : "r" ((unsigned)(mask)), "m" (*(addr)) \ |
244 | : "memory") | 245 | : "memory") |
245 | 246 | ||
246 | /* Atomic operations are already serializing on x86 */ | ||
247 | #define smp_mb__before_atomic_dec() barrier() | ||
248 | #define smp_mb__after_atomic_dec() barrier() | ||
249 | #define smp_mb__before_atomic_inc() barrier() | ||
250 | #define smp_mb__after_atomic_inc() barrier() | ||
251 | |||
252 | #ifdef CONFIG_X86_32 | 247 | #ifdef CONFIG_X86_32 |
253 | # include <asm/atomic64_32.h> | 248 | # include <asm/atomic64_32.h> |
254 | #else | 249 | #else |
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 69bbb4845020..5c7198cca5ed 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h | |||
@@ -137,6 +137,10 @@ do { \ | |||
137 | 137 | ||
138 | #endif | 138 | #endif |
139 | 139 | ||
140 | /* Atomic operations are already serializing on x86 */ | ||
141 | #define smp_mb__before_atomic() barrier() | ||
142 | #define smp_mb__after_atomic() barrier() | ||
143 | |||
140 | /* | 144 | /* |
141 | * Stop RDTSC speculation. This is needed when you need to use RDTSC | 145 | * Stop RDTSC speculation. This is needed when you need to use RDTSC |
142 | * (or get_cycles or vread that possibly accesses the TSC) in a defined | 146 | * (or get_cycles or vread that possibly accesses the TSC) in a defined |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 9fc1af74dc83..afcd35d331de 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/compiler.h> | 15 | #include <linux/compiler.h> |
16 | #include <asm/alternative.h> | 16 | #include <asm/alternative.h> |
17 | #include <asm/rmwcc.h> | 17 | #include <asm/rmwcc.h> |
18 | #include <asm/barrier.h> | ||
18 | 19 | ||
19 | #if BITS_PER_LONG == 32 | 20 | #if BITS_PER_LONG == 32 |
20 | # define _BITOPS_LONG_SHIFT 5 | 21 | # define _BITOPS_LONG_SHIFT 5 |
@@ -102,7 +103,7 @@ static inline void __set_bit(long nr, volatile unsigned long *addr) | |||
102 | * | 103 | * |
103 | * clear_bit() is atomic and may not be reordered. However, it does | 104 | * clear_bit() is atomic and may not be reordered. However, it does |
104 | * not contain a memory barrier, so if it is used for locking purposes, | 105 | * not contain a memory barrier, so if it is used for locking purposes, |
105 | * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() | 106 | * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() |
106 | * in order to ensure changes are visible on other processors. | 107 | * in order to ensure changes are visible on other processors. |
107 | */ | 108 | */ |
108 | static __always_inline void | 109 | static __always_inline void |
@@ -156,9 +157,6 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) | |||
156 | __clear_bit(nr, addr); | 157 | __clear_bit(nr, addr); |
157 | } | 158 | } |
158 | 159 | ||
159 | #define smp_mb__before_clear_bit() barrier() | ||
160 | #define smp_mb__after_clear_bit() barrier() | ||
161 | |||
162 | /** | 160 | /** |
163 | * __change_bit - Toggle a bit in memory | 161 | * __change_bit - Toggle a bit in memory |
164 | * @nr: the bit to change | 162 | * @nr: the bit to change |
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h new file mode 100644 index 000000000000..e01f7f7ccb0c --- /dev/null +++ b/arch/x86/include/asm/cmdline.h | |||
@@ -0,0 +1,6 @@ | |||
1 | #ifndef _ASM_X86_CMDLINE_H | ||
2 | #define _ASM_X86_CMDLINE_H | ||
3 | |||
4 | int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); | ||
5 | |||
6 | #endif /* _ASM_X86_CMDLINE_H */ | ||
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index a8091216963b..68c05398bba9 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h | |||
@@ -52,6 +52,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, | |||
52 | static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, | 52 | static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, |
53 | unsigned long addr, pte_t *ptep) | 53 | unsigned long addr, pte_t *ptep) |
54 | { | 54 | { |
55 | ptep_clear_flush(vma, addr, ptep); | ||
55 | } | 56 | } |
56 | 57 | ||
57 | static inline int huge_pte_none(pte_t pte) | 58 | static inline int huge_pte_none(pte_t pte) |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index a307b7530e54..4615906d83df 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -190,8 +190,8 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); | |||
190 | #define trace_interrupt interrupt | 190 | #define trace_interrupt interrupt |
191 | #endif | 191 | #endif |
192 | 192 | ||
193 | #define VECTOR_UNDEFINED -1 | 193 | #define VECTOR_UNDEFINED (-1) |
194 | #define VECTOR_RETRIGGERED -2 | 194 | #define VECTOR_RETRIGGERED (-2) |
195 | 195 | ||
196 | typedef int vector_irq_t[NR_VECTORS]; | 196 | typedef int vector_irq_t[NR_VECTORS]; |
197 | DECLARE_PER_CPU(vector_irq_t, vector_irq); | 197 | DECLARE_PER_CPU(vector_irq_t, vector_irq); |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index b59827e76529..64dc362506b7 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -25,6 +25,7 @@ struct cpu_signature { | |||
25 | struct device; | 25 | struct device; |
26 | 26 | ||
27 | enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; | 27 | enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; |
28 | extern bool dis_ucode_ldr; | ||
28 | 29 | ||
29 | struct microcode_ops { | 30 | struct microcode_ops { |
30 | enum ucode_state (*request_microcode_user) (int cpu, | 31 | enum ucode_state (*request_microcode_user) (int cpu, |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 8de6d9cf3b95..678205195ae1 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _ASM_X86_PAGE_64_DEFS_H | 1 | #ifndef _ASM_X86_PAGE_64_DEFS_H |
2 | #define _ASM_X86_PAGE_64_DEFS_H | 2 | #define _ASM_X86_PAGE_64_DEFS_H |
3 | 3 | ||
4 | #define THREAD_SIZE_ORDER 1 | 4 | #define THREAD_SIZE_ORDER 2 |
5 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) | 5 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) |
6 | #define CURRENT_MASK (~(THREAD_SIZE - 1)) | 6 | #define CURRENT_MASK (~(THREAD_SIZE - 1)) |
7 | 7 | ||
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 96ae4f4040bb..0892ea0e683f 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -68,7 +68,6 @@ void pcibios_config_init(void); | |||
68 | void pcibios_scan_root(int bus); | 68 | void pcibios_scan_root(int bus); |
69 | 69 | ||
70 | void pcibios_set_master(struct pci_dev *dev); | 70 | void pcibios_set_master(struct pci_dev *dev); |
71 | void pcibios_penalize_isa_irq(int irq, int active); | ||
72 | struct irq_routing_table *pcibios_get_irq_routing_table(void); | 71 | struct irq_routing_table *pcibios_get_irq_routing_table(void); |
73 | int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); | 72 | int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); |
74 | 73 | ||
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h index 05af3b31d522..f28a24b51dc7 100644 --- a/arch/x86/include/asm/sync_bitops.h +++ b/arch/x86/include/asm/sync_bitops.h | |||
@@ -41,7 +41,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr) | |||
41 | * | 41 | * |
42 | * sync_clear_bit() is atomic and may not be reordered. However, it does | 42 | * sync_clear_bit() is atomic and may not be reordered. However, it does |
43 | * not contain a memory barrier, so if it is used for locking purposes, | 43 | * not contain a memory barrier, so if it is used for locking purposes, |
44 | * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() | 44 | * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() |
45 | * in order to ensure changes are visible on other processors. | 45 | * in order to ensure changes are visible on other processors. |
46 | */ | 46 | */ |
47 | static inline void sync_clear_bit(long nr, volatile unsigned long *addr) | 47 | static inline void sync_clear_bit(long nr, volatile unsigned long *addr) |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 47e5de25ba79..854053889d4d 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -83,6 +83,7 @@ struct thread_info { | |||
83 | #define TIF_FORK 18 /* ret_from_fork */ | 83 | #define TIF_FORK 18 /* ret_from_fork */ |
84 | #define TIF_NOHZ 19 /* in adaptive nohz mode */ | 84 | #define TIF_NOHZ 19 /* in adaptive nohz mode */ |
85 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ | 85 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ |
86 | #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ | ||
86 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ | 87 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ |
87 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 88 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
88 | #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ | 89 | #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ |
@@ -106,6 +107,7 @@ struct thread_info { | |||
106 | #define _TIF_IA32 (1 << TIF_IA32) | 107 | #define _TIF_IA32 (1 << TIF_IA32) |
107 | #define _TIF_FORK (1 << TIF_FORK) | 108 | #define _TIF_FORK (1 << TIF_FORK) |
108 | #define _TIF_NOHZ (1 << TIF_NOHZ) | 109 | #define _TIF_NOHZ (1 << TIF_NOHZ) |
110 | #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) | ||
109 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) | 111 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
110 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 112 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
111 | #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) | 113 | #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) |
@@ -191,8 +193,6 @@ static inline struct thread_info *current_thread_info(void) | |||
191 | * have to worry about atomic accesses. | 193 | * have to worry about atomic accesses. |
192 | */ | 194 | */ |
193 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ | 195 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ |
194 | #define TS_POLLING 0x0004 /* idle task polling need_resched, | ||
195 | skip sending interrupt */ | ||
196 | #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ | 196 | #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ |
197 | 197 | ||
198 | #ifndef __ASSEMBLY__ | 198 | #ifndef __ASSEMBLY__ |
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 3087ea9c5f2e..93bee7b93854 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h | |||
@@ -33,15 +33,27 @@ typedef u8 uprobe_opcode_t; | |||
33 | #define UPROBE_SWBP_INSN 0xcc | 33 | #define UPROBE_SWBP_INSN 0xcc |
34 | #define UPROBE_SWBP_INSN_SIZE 1 | 34 | #define UPROBE_SWBP_INSN_SIZE 1 |
35 | 35 | ||
36 | struct uprobe_xol_ops; | ||
37 | |||
36 | struct arch_uprobe { | 38 | struct arch_uprobe { |
37 | u16 fixups; | ||
38 | union { | 39 | union { |
39 | u8 insn[MAX_UINSN_BYTES]; | 40 | u8 insn[MAX_UINSN_BYTES]; |
40 | u8 ixol[MAX_UINSN_BYTES]; | 41 | u8 ixol[MAX_UINSN_BYTES]; |
41 | }; | 42 | }; |
43 | |||
44 | u16 fixups; | ||
45 | const struct uprobe_xol_ops *ops; | ||
46 | |||
47 | union { | ||
42 | #ifdef CONFIG_X86_64 | 48 | #ifdef CONFIG_X86_64 |
43 | unsigned long rip_rela_target_address; | 49 | unsigned long rip_rela_target_address; |
44 | #endif | 50 | #endif |
51 | struct { | ||
52 | s32 offs; | ||
53 | u8 ilen; | ||
54 | u8 opc1; | ||
55 | } branch; | ||
56 | }; | ||
45 | }; | 57 | }; |
46 | 58 | ||
47 | struct arch_uprobe_task { | 59 | struct arch_uprobe_task { |
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index e709884d0ef9..ca08a27b90b3 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -343,7 +343,7 @@ HYPERVISOR_memory_op(unsigned int cmd, void *arg) | |||
343 | } | 343 | } |
344 | 344 | ||
345 | static inline int | 345 | static inline int |
346 | HYPERVISOR_multicall(void *call_list, int nr_calls) | 346 | HYPERVISOR_multicall(void *call_list, uint32_t nr_calls) |
347 | { | 347 | { |
348 | return _hypercall2(int, multicall, call_list, nr_calls); | 348 | return _hypercall2(int, multicall, call_list, nr_calls); |
349 | } | 349 | } |
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index fd9cb7695b5f..3400dbaec3c3 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h | |||
@@ -54,6 +54,9 @@ typedef unsigned long xen_pfn_t; | |||
54 | #define PRI_xen_pfn "lx" | 54 | #define PRI_xen_pfn "lx" |
55 | typedef unsigned long xen_ulong_t; | 55 | typedef unsigned long xen_ulong_t; |
56 | #define PRI_xen_ulong "lx" | 56 | #define PRI_xen_ulong "lx" |
57 | typedef long xen_long_t; | ||
58 | #define PRI_xen_long "lx" | ||
59 | |||
57 | /* Guest handles for primitive C types. */ | 60 | /* Guest handles for primitive C types. */ |
58 | __DEFINE_GUEST_HANDLE(uchar, unsigned char); | 61 | __DEFINE_GUEST_HANDLE(uchar, unsigned char); |
59 | __DEFINE_GUEST_HANDLE(uint, unsigned int); | 62 | __DEFINE_GUEST_HANDLE(uint, unsigned int); |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9fa8aa051f54..76164e173a24 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -10,6 +10,8 @@ | |||
10 | * | 10 | * |
11 | * Copyright 2002 Andi Kleen, SuSE Labs. | 11 | * Copyright 2002 Andi Kleen, SuSE Labs. |
12 | */ | 12 | */ |
13 | #define pr_fmt(fmt) "AGP: " fmt | ||
14 | |||
13 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
14 | #include <linux/types.h> | 16 | #include <linux/types.h> |
15 | #include <linux/init.h> | 17 | #include <linux/init.h> |
@@ -75,14 +77,13 @@ static u32 __init allocate_aperture(void) | |||
75 | addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, | 77 | addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, |
76 | aper_size, aper_size); | 78 | aper_size, aper_size); |
77 | if (!addr) { | 79 | if (!addr) { |
78 | printk(KERN_ERR | 80 | pr_err("Cannot allocate aperture memory hole [mem %#010lx-%#010lx] (%uKB)\n", |
79 | "Cannot allocate aperture memory hole (%lx,%uK)\n", | 81 | addr, addr + aper_size - 1, aper_size >> 10); |
80 | addr, aper_size>>10); | ||
81 | return 0; | 82 | return 0; |
82 | } | 83 | } |
83 | memblock_reserve(addr, aper_size); | 84 | memblock_reserve(addr, aper_size); |
84 | printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", | 85 | pr_info("Mapping aperture over RAM [mem %#010lx-%#010lx] (%uKB)\n", |
85 | aper_size >> 10, addr); | 86 | addr, addr + aper_size - 1, aper_size >> 10); |
86 | register_nosave_region(addr >> PAGE_SHIFT, | 87 | register_nosave_region(addr >> PAGE_SHIFT, |
87 | (addr+aper_size) >> PAGE_SHIFT); | 88 | (addr+aper_size) >> PAGE_SHIFT); |
88 | 89 | ||
@@ -126,10 +127,11 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) | |||
126 | u64 aper; | 127 | u64 aper; |
127 | u32 old_order; | 128 | u32 old_order; |
128 | 129 | ||
129 | printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func); | 130 | pr_info("pci 0000:%02x:%02x:%02x: AGP bridge\n", bus, slot, func); |
130 | apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14); | 131 | apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14); |
131 | if (apsizereg == 0xffffffff) { | 132 | if (apsizereg == 0xffffffff) { |
132 | printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); | 133 | pr_err("pci 0000:%02x:%02x.%d: APSIZE unreadable\n", |
134 | bus, slot, func); | ||
133 | return 0; | 135 | return 0; |
134 | } | 136 | } |
135 | 137 | ||
@@ -153,16 +155,18 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) | |||
153 | * On some sick chips, APSIZE is 0. It means it wants 4G | 155 | * On some sick chips, APSIZE is 0. It means it wants 4G |
154 | * so let double check that order, and lets trust AMD NB settings: | 156 | * so let double check that order, and lets trust AMD NB settings: |
155 | */ | 157 | */ |
156 | printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n", | 158 | pr_info("pci 0000:%02x:%02x.%d: AGP aperture [bus addr %#010Lx-%#010Lx] (old size %uMB)\n", |
157 | aper, 32 << old_order); | 159 | bus, slot, func, aper, aper + (32ULL << (old_order + 20)) - 1, |
160 | 32 << old_order); | ||
158 | if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) { | 161 | if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) { |
159 | printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n", | 162 | pr_info("pci 0000:%02x:%02x.%d: AGP aperture size %uMB (APSIZE %#x) is not right, using settings from NB\n", |
160 | 32 << *order, apsizereg); | 163 | bus, slot, func, 32 << *order, apsizereg); |
161 | *order = old_order; | 164 | *order = old_order; |
162 | } | 165 | } |
163 | 166 | ||
164 | printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", | 167 | pr_info("pci 0000:%02x:%02x.%d: AGP aperture [bus addr %#010Lx-%#010Lx] (%uMB, APSIZE %#x)\n", |
165 | aper, 32 << *order, apsizereg); | 168 | bus, slot, func, aper, aper + (32ULL << (*order + 20)) - 1, |
169 | 32 << *order, apsizereg); | ||
166 | 170 | ||
167 | if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20)) | 171 | if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20)) |
168 | return 0; | 172 | return 0; |
@@ -218,7 +222,7 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp) | |||
218 | } | 222 | } |
219 | } | 223 | } |
220 | } | 224 | } |
221 | printk(KERN_INFO "No AGP bridge found\n"); | 225 | pr_info("No AGP bridge found\n"); |
222 | 226 | ||
223 | return 0; | 227 | return 0; |
224 | } | 228 | } |
@@ -310,7 +314,8 @@ void __init early_gart_iommu_check(void) | |||
310 | if (e820_any_mapped(aper_base, aper_base + aper_size, | 314 | if (e820_any_mapped(aper_base, aper_base + aper_size, |
311 | E820_RAM)) { | 315 | E820_RAM)) { |
312 | /* reserve it, so we can reuse it in second kernel */ | 316 | /* reserve it, so we can reuse it in second kernel */ |
313 | printk(KERN_INFO "update e820 for GART\n"); | 317 | pr_info("e820: reserve [mem %#010Lx-%#010Lx] for GART\n", |
318 | aper_base, aper_base + aper_size - 1); | ||
314 | e820_add_region(aper_base, aper_size, E820_RESERVED); | 319 | e820_add_region(aper_base, aper_size, E820_RESERVED); |
315 | update_e820(); | 320 | update_e820(); |
316 | } | 321 | } |
@@ -354,7 +359,7 @@ int __init gart_iommu_hole_init(void) | |||
354 | !early_pci_allowed()) | 359 | !early_pci_allowed()) |
355 | return -ENODEV; | 360 | return -ENODEV; |
356 | 361 | ||
357 | printk(KERN_INFO "Checking aperture...\n"); | 362 | pr_info("Checking aperture...\n"); |
358 | 363 | ||
359 | if (!fallback_aper_force) | 364 | if (!fallback_aper_force) |
360 | agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); | 365 | agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); |
@@ -395,8 +400,9 @@ int __init gart_iommu_hole_init(void) | |||
395 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; | 400 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; |
396 | aper_base <<= 25; | 401 | aper_base <<= 25; |
397 | 402 | ||
398 | printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", | 403 | pr_info("Node %d: aperture [bus addr %#010Lx-%#010Lx] (%uMB)\n", |
399 | node, aper_base, aper_size >> 20); | 404 | node, aper_base, aper_base + aper_size - 1, |
405 | aper_size >> 20); | ||
400 | node++; | 406 | node++; |
401 | 407 | ||
402 | if (!aperture_valid(aper_base, aper_size, 64<<20)) { | 408 | if (!aperture_valid(aper_base, aper_size, 64<<20)) { |
@@ -407,9 +413,9 @@ int __init gart_iommu_hole_init(void) | |||
407 | if (!no_iommu && | 413 | if (!no_iommu && |
408 | max_pfn > MAX_DMA32_PFN && | 414 | max_pfn > MAX_DMA32_PFN && |
409 | !printed_gart_size_msg) { | 415 | !printed_gart_size_msg) { |
410 | printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); | 416 | pr_err("you are using iommu with agp, but GART size is less than 64MB\n"); |
411 | printk(KERN_ERR "please increase GART size in your BIOS setup\n"); | 417 | pr_err("please increase GART size in your BIOS setup\n"); |
412 | printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); | 418 | pr_err("if BIOS doesn't have that option, contact your HW vendor!\n"); |
413 | printed_gart_size_msg = 1; | 419 | printed_gart_size_msg = 1; |
414 | } | 420 | } |
415 | } else { | 421 | } else { |
@@ -446,13 +452,10 @@ out: | |||
446 | force_iommu || | 452 | force_iommu || |
447 | valid_agp || | 453 | valid_agp || |
448 | fallback_aper_force) { | 454 | fallback_aper_force) { |
449 | printk(KERN_INFO | 455 | pr_info("Your BIOS doesn't leave a aperture memory hole\n"); |
450 | "Your BIOS doesn't leave a aperture memory hole\n"); | 456 | pr_info("Please enable the IOMMU option in the BIOS setup\n"); |
451 | printk(KERN_INFO | 457 | pr_info("This costs you %dMB of RAM\n", |
452 | "Please enable the IOMMU option in the BIOS setup\n"); | 458 | 32 << fallback_aper_order); |
453 | printk(KERN_INFO | ||
454 | "This costs you %d MB of RAM\n", | ||
455 | 32 << fallback_aper_order); | ||
456 | 459 | ||
457 | aper_order = fallback_aper_order; | 460 | aper_order = fallback_aper_order; |
458 | aper_alloc = allocate_aperture(); | 461 | aper_alloc = allocate_aperture(); |
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index a698d7165c96..eab67047dec3 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -57,7 +57,7 @@ void arch_trigger_all_cpu_backtrace(void) | |||
57 | } | 57 | } |
58 | 58 | ||
59 | clear_bit(0, &backtrace_flag); | 59 | clear_bit(0, &backtrace_flag); |
60 | smp_mb__after_clear_bit(); | 60 | smp_mb__after_atomic(); |
61 | } | 61 | } |
62 | 62 | ||
63 | static int __kprobes | 63 | static int __kprobes |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 3ab03430211d..f3a1f04ed4cb 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -844,21 +844,10 @@ static int apm_do_idle(void) | |||
844 | int polling; | 844 | int polling; |
845 | int err = 0; | 845 | int err = 0; |
846 | 846 | ||
847 | polling = !!(current_thread_info()->status & TS_POLLING); | ||
848 | if (polling) { | ||
849 | current_thread_info()->status &= ~TS_POLLING; | ||
850 | /* | ||
851 | * TS_POLLING-cleared state must be visible before we | ||
852 | * test NEED_RESCHED: | ||
853 | */ | ||
854 | smp_mb(); | ||
855 | } | ||
856 | if (!need_resched()) { | 847 | if (!need_resched()) { |
857 | idled = 1; | 848 | idled = 1; |
858 | ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err); | 849 | ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err); |
859 | } | 850 | } |
860 | if (polling) | ||
861 | current_thread_info()->status |= TS_POLLING; | ||
862 | 851 | ||
863 | if (!idled) | 852 | if (!idled) |
864 | return 0; | 853 | return 0; |
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 15c987698b0f..dd9d6190b08d 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c | |||
@@ -97,6 +97,9 @@ MODULE_LICENSE("GPL"); | |||
97 | 97 | ||
98 | static struct microcode_ops *microcode_ops; | 98 | static struct microcode_ops *microcode_ops; |
99 | 99 | ||
100 | bool dis_ucode_ldr; | ||
101 | module_param(dis_ucode_ldr, bool, 0); | ||
102 | |||
100 | /* | 103 | /* |
101 | * Synchronization. | 104 | * Synchronization. |
102 | * | 105 | * |
@@ -546,6 +549,9 @@ static int __init microcode_init(void) | |||
546 | struct cpuinfo_x86 *c = &cpu_data(0); | 549 | struct cpuinfo_x86 *c = &cpu_data(0); |
547 | int error; | 550 | int error; |
548 | 551 | ||
552 | if (dis_ucode_ldr) | ||
553 | return 0; | ||
554 | |||
549 | if (c->x86_vendor == X86_VENDOR_INTEL) | 555 | if (c->x86_vendor == X86_VENDOR_INTEL) |
550 | microcode_ops = init_intel_microcode(); | 556 | microcode_ops = init_intel_microcode(); |
551 | else if (c->x86_vendor == X86_VENDOR_AMD) | 557 | else if (c->x86_vendor == X86_VENDOR_AMD) |
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c index be7f8514f577..5f28a64e71ea 100644 --- a/arch/x86/kernel/cpu/microcode/core_early.c +++ b/arch/x86/kernel/cpu/microcode/core_early.c | |||
@@ -17,9 +17,11 @@ | |||
17 | * 2 of the License, or (at your option) any later version. | 17 | * 2 of the License, or (at your option) any later version. |
18 | */ | 18 | */ |
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <asm/microcode.h> | ||
20 | #include <asm/microcode_intel.h> | 21 | #include <asm/microcode_intel.h> |
21 | #include <asm/microcode_amd.h> | 22 | #include <asm/microcode_amd.h> |
22 | #include <asm/processor.h> | 23 | #include <asm/processor.h> |
24 | #include <asm/cmdline.h> | ||
23 | 25 | ||
24 | #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) | 26 | #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) |
25 | #define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') | 27 | #define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') |
@@ -72,10 +74,33 @@ static int x86_family(void) | |||
72 | return x86; | 74 | return x86; |
73 | } | 75 | } |
74 | 76 | ||
77 | static bool __init check_loader_disabled_bsp(void) | ||
78 | { | ||
79 | #ifdef CONFIG_X86_32 | ||
80 | const char *cmdline = (const char *)__pa_nodebug(boot_command_line); | ||
81 | const char *opt = "dis_ucode_ldr"; | ||
82 | const char *option = (const char *)__pa_nodebug(opt); | ||
83 | bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr); | ||
84 | |||
85 | #else /* CONFIG_X86_64 */ | ||
86 | const char *cmdline = boot_command_line; | ||
87 | const char *option = "dis_ucode_ldr"; | ||
88 | bool *res = &dis_ucode_ldr; | ||
89 | #endif | ||
90 | |||
91 | if (cmdline_find_option_bool(cmdline, option)) | ||
92 | *res = true; | ||
93 | |||
94 | return *res; | ||
95 | } | ||
96 | |||
75 | void __init load_ucode_bsp(void) | 97 | void __init load_ucode_bsp(void) |
76 | { | 98 | { |
77 | int vendor, x86; | 99 | int vendor, x86; |
78 | 100 | ||
101 | if (check_loader_disabled_bsp()) | ||
102 | return; | ||
103 | |||
79 | if (!have_cpuid_p()) | 104 | if (!have_cpuid_p()) |
80 | return; | 105 | return; |
81 | 106 | ||
@@ -96,10 +121,22 @@ void __init load_ucode_bsp(void) | |||
96 | } | 121 | } |
97 | } | 122 | } |
98 | 123 | ||
124 | static bool check_loader_disabled_ap(void) | ||
125 | { | ||
126 | #ifdef CONFIG_X86_32 | ||
127 | return __pa_nodebug(dis_ucode_ldr); | ||
128 | #else | ||
129 | return dis_ucode_ldr; | ||
130 | #endif | ||
131 | } | ||
132 | |||
99 | void load_ucode_ap(void) | 133 | void load_ucode_ap(void) |
100 | { | 134 | { |
101 | int vendor, x86; | 135 | int vendor, x86; |
102 | 136 | ||
137 | if (check_loader_disabled_ap()) | ||
138 | return; | ||
139 | |||
103 | if (!have_cpuid_p()) | 140 | if (!have_cpuid_p()) |
104 | return; | 141 | return; |
105 | 142 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ae407f7226c8..89f3b7c1af20 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -721,6 +721,7 @@ int perf_assign_events(struct perf_event **events, int n, | |||
721 | 721 | ||
722 | return sched.state.unassigned; | 722 | return sched.state.unassigned; |
723 | } | 723 | } |
724 | EXPORT_SYMBOL_GPL(perf_assign_events); | ||
724 | 725 | ||
725 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | 726 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
726 | { | 727 | { |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index aa333d966886..adb02aa62af5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly = | |||
169 | { | 169 | { |
170 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 170 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
171 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 171 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
172 | FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */ | ||
173 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ | 172 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ |
174 | EVENT_CONSTRAINT_END | 173 | EVENT_CONSTRAINT_END |
175 | }; | 174 | }; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index ae96cfa5eddd..980970cb744d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status) | |||
108 | return val; | 108 | return val; |
109 | } | 109 | } |
110 | 110 | ||
111 | static u64 precise_store_data_hsw(u64 status) | 111 | static u64 precise_store_data_hsw(struct perf_event *event, u64 status) |
112 | { | 112 | { |
113 | union perf_mem_data_src dse; | 113 | union perf_mem_data_src dse; |
114 | u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
114 | 115 | ||
115 | dse.val = 0; | 116 | dse.val = 0; |
116 | dse.mem_op = PERF_MEM_OP_STORE; | 117 | dse.mem_op = PERF_MEM_OP_STORE; |
117 | dse.mem_lvl = PERF_MEM_LVL_NA; | 118 | dse.mem_lvl = PERF_MEM_LVL_NA; |
119 | |||
120 | /* | ||
121 | * L1 info only valid for following events: | ||
122 | * | ||
123 | * MEM_UOPS_RETIRED.STLB_MISS_STORES | ||
124 | * MEM_UOPS_RETIRED.LOCK_STORES | ||
125 | * MEM_UOPS_RETIRED.SPLIT_STORES | ||
126 | * MEM_UOPS_RETIRED.ALL_STORES | ||
127 | */ | ||
128 | if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0) | ||
129 | return dse.mem_lvl; | ||
130 | |||
118 | if (status & 1) | 131 | if (status & 1) |
119 | dse.mem_lvl = PERF_MEM_LVL_L1; | 132 | dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; |
133 | else | ||
134 | dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; | ||
135 | |||
120 | /* Nothing else supported. Sorry. */ | 136 | /* Nothing else supported. Sorry. */ |
121 | return dse.val; | 137 | return dse.val; |
122 | } | 138 | } |
@@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
887 | data.data_src.val = load_latency_data(pebs->dse); | 903 | data.data_src.val = load_latency_data(pebs->dse); |
888 | else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) | 904 | else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) |
889 | data.data_src.val = | 905 | data.data_src.val = |
890 | precise_store_data_hsw(pebs->dse); | 906 | precise_store_data_hsw(event, pebs->dse); |
891 | else | 907 | else |
892 | data.data_src.val = precise_store_data(pebs->dse); | 908 | data.data_src.val = precise_store_data(pebs->dse); |
893 | } | 909 | } |
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 384df5105fbc..136ac74dee82 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c | |||
@@ -27,6 +27,7 @@ | |||
27 | static int __init x86_rdrand_setup(char *s) | 27 | static int __init x86_rdrand_setup(char *s) |
28 | { | 28 | { |
29 | setup_clear_cpu_cap(X86_FEATURE_RDRAND); | 29 | setup_clear_cpu_cap(X86_FEATURE_RDRAND); |
30 | setup_clear_cpu_cap(X86_FEATURE_RDSEED); | ||
30 | return 1; | 31 | return 1; |
31 | } | 32 | } |
32 | __setup("nordrand", x86_rdrand_setup); | 33 | __setup("nordrand", x86_rdrand_setup); |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1e96c3628bf2..be846d2468f7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -36,7 +36,7 @@ | |||
36 | * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack | 36 | * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack |
37 | * frame that is otherwise undefined after a SYSCALL | 37 | * frame that is otherwise undefined after a SYSCALL |
38 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. | 38 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. |
39 | * - errorentry/paranoidentry/zeroentry - Define exception entry points. | 39 | * - idtentry - Define exception entry points. |
40 | */ | 40 | */ |
41 | 41 | ||
42 | #include <linux/linkage.h> | 42 | #include <linux/linkage.h> |
@@ -1203,125 +1203,100 @@ apicinterrupt IRQ_WORK_VECTOR \ | |||
1203 | /* | 1203 | /* |
1204 | * Exception entry points. | 1204 | * Exception entry points. |
1205 | */ | 1205 | */ |
1206 | .macro zeroentry sym do_sym | 1206 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) |
1207 | ENTRY(\sym) | ||
1208 | INTR_FRAME | ||
1209 | ASM_CLAC | ||
1210 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1211 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | ||
1212 | subq $ORIG_RAX-R15, %rsp | ||
1213 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | ||
1214 | call error_entry | ||
1215 | DEFAULT_FRAME 0 | ||
1216 | movq %rsp,%rdi /* pt_regs pointer */ | ||
1217 | xorl %esi,%esi /* no error code */ | ||
1218 | call \do_sym | ||
1219 | jmp error_exit /* %ebx: no swapgs flag */ | ||
1220 | CFI_ENDPROC | ||
1221 | END(\sym) | ||
1222 | .endm | ||
1223 | 1207 | ||
1224 | .macro paranoidzeroentry sym do_sym | 1208 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 |
1225 | ENTRY(\sym) | 1209 | ENTRY(\sym) |
1226 | INTR_FRAME | 1210 | /* Sanity check */ |
1227 | ASM_CLAC | 1211 | .if \shift_ist != -1 && \paranoid == 0 |
1228 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1212 | .error "using shift_ist requires paranoid=1" |
1229 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1213 | .endif |
1230 | subq $ORIG_RAX-R15, %rsp | ||
1231 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | ||
1232 | call save_paranoid | ||
1233 | TRACE_IRQS_OFF | ||
1234 | movq %rsp,%rdi /* pt_regs pointer */ | ||
1235 | xorl %esi,%esi /* no error code */ | ||
1236 | call \do_sym | ||
1237 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
1238 | CFI_ENDPROC | ||
1239 | END(\sym) | ||
1240 | .endm | ||
1241 | 1214 | ||
1242 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) | 1215 | .if \has_error_code |
1243 | .macro paranoidzeroentry_ist sym do_sym ist | 1216 | XCPT_FRAME |
1244 | ENTRY(\sym) | 1217 | .else |
1245 | INTR_FRAME | 1218 | INTR_FRAME |
1246 | ASM_CLAC | 1219 | .endif |
1247 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1248 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | ||
1249 | subq $ORIG_RAX-R15, %rsp | ||
1250 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | ||
1251 | call save_paranoid | ||
1252 | TRACE_IRQS_OFF_DEBUG | ||
1253 | movq %rsp,%rdi /* pt_regs pointer */ | ||
1254 | xorl %esi,%esi /* no error code */ | ||
1255 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | ||
1256 | call \do_sym | ||
1257 | addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | ||
1258 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
1259 | CFI_ENDPROC | ||
1260 | END(\sym) | ||
1261 | .endm | ||
1262 | 1220 | ||
1263 | .macro errorentry sym do_sym | ||
1264 | ENTRY(\sym) | ||
1265 | XCPT_FRAME | ||
1266 | ASM_CLAC | 1221 | ASM_CLAC |
1267 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1222 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1223 | |||
1224 | .ifeq \has_error_code | ||
1225 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | ||
1226 | .endif | ||
1227 | |||
1268 | subq $ORIG_RAX-R15, %rsp | 1228 | subq $ORIG_RAX-R15, %rsp |
1269 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1229 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1230 | |||
1231 | .if \paranoid | ||
1232 | call save_paranoid | ||
1233 | .else | ||
1270 | call error_entry | 1234 | call error_entry |
1235 | .endif | ||
1236 | |||
1271 | DEFAULT_FRAME 0 | 1237 | DEFAULT_FRAME 0 |
1238 | |||
1239 | .if \paranoid | ||
1240 | .if \shift_ist != -1 | ||
1241 | TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ | ||
1242 | .else | ||
1243 | TRACE_IRQS_OFF | ||
1244 | .endif | ||
1245 | .endif | ||
1246 | |||
1272 | movq %rsp,%rdi /* pt_regs pointer */ | 1247 | movq %rsp,%rdi /* pt_regs pointer */ |
1248 | |||
1249 | .if \has_error_code | ||
1273 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | 1250 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
1274 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ | 1251 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
1252 | .else | ||
1253 | xorl %esi,%esi /* no error code */ | ||
1254 | .endif | ||
1255 | |||
1256 | .if \shift_ist != -1 | ||
1257 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist) | ||
1258 | .endif | ||
1259 | |||
1275 | call \do_sym | 1260 | call \do_sym |
1261 | |||
1262 | .if \shift_ist != -1 | ||
1263 | addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist) | ||
1264 | .endif | ||
1265 | |||
1266 | .if \paranoid | ||
1267 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
1268 | .else | ||
1276 | jmp error_exit /* %ebx: no swapgs flag */ | 1269 | jmp error_exit /* %ebx: no swapgs flag */ |
1270 | .endif | ||
1271 | |||
1277 | CFI_ENDPROC | 1272 | CFI_ENDPROC |
1278 | END(\sym) | 1273 | END(\sym) |
1279 | .endm | 1274 | .endm |
1280 | 1275 | ||
1281 | #ifdef CONFIG_TRACING | 1276 | #ifdef CONFIG_TRACING |
1282 | .macro trace_errorentry sym do_sym | 1277 | .macro trace_idtentry sym do_sym has_error_code:req |
1283 | errorentry trace(\sym) trace(\do_sym) | 1278 | idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code |
1284 | errorentry \sym \do_sym | 1279 | idtentry \sym \do_sym has_error_code=\has_error_code |
1285 | .endm | 1280 | .endm |
1286 | #else | 1281 | #else |
1287 | .macro trace_errorentry sym do_sym | 1282 | .macro trace_idtentry sym do_sym has_error_code:req |
1288 | errorentry \sym \do_sym | 1283 | idtentry \sym \do_sym has_error_code=\has_error_code |
1289 | .endm | 1284 | .endm |
1290 | #endif | 1285 | #endif |
1291 | 1286 | ||
1292 | /* error code is on the stack already */ | 1287 | idtentry divide_error do_divide_error has_error_code=0 |
1293 | .macro paranoiderrorentry sym do_sym | 1288 | idtentry overflow do_overflow has_error_code=0 |
1294 | ENTRY(\sym) | 1289 | idtentry bounds do_bounds has_error_code=0 |
1295 | XCPT_FRAME | 1290 | idtentry invalid_op do_invalid_op has_error_code=0 |
1296 | ASM_CLAC | 1291 | idtentry device_not_available do_device_not_available has_error_code=0 |
1297 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1292 | idtentry double_fault do_double_fault has_error_code=1 paranoid=1 |
1298 | subq $ORIG_RAX-R15, %rsp | 1293 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 |
1299 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1294 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 |
1300 | call save_paranoid | 1295 | idtentry segment_not_present do_segment_not_present has_error_code=1 |
1301 | DEFAULT_FRAME 0 | 1296 | idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 |
1302 | TRACE_IRQS_OFF | 1297 | idtentry coprocessor_error do_coprocessor_error has_error_code=0 |
1303 | movq %rsp,%rdi /* pt_regs pointer */ | 1298 | idtentry alignment_check do_alignment_check has_error_code=1 |
1304 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | 1299 | idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 |
1305 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ | ||
1306 | call \do_sym | ||
1307 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
1308 | CFI_ENDPROC | ||
1309 | END(\sym) | ||
1310 | .endm | ||
1311 | |||
1312 | zeroentry divide_error do_divide_error | ||
1313 | zeroentry overflow do_overflow | ||
1314 | zeroentry bounds do_bounds | ||
1315 | zeroentry invalid_op do_invalid_op | ||
1316 | zeroentry device_not_available do_device_not_available | ||
1317 | paranoiderrorentry double_fault do_double_fault | ||
1318 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun | ||
1319 | errorentry invalid_TSS do_invalid_TSS | ||
1320 | errorentry segment_not_present do_segment_not_present | ||
1321 | zeroentry spurious_interrupt_bug do_spurious_interrupt_bug | ||
1322 | zeroentry coprocessor_error do_coprocessor_error | ||
1323 | errorentry alignment_check do_alignment_check | ||
1324 | zeroentry simd_coprocessor_error do_simd_coprocessor_error | ||
1325 | 1300 | ||
1326 | 1301 | ||
1327 | /* Reload gs selector with exception handling */ | 1302 | /* Reload gs selector with exception handling */ |
@@ -1371,7 +1346,7 @@ ENTRY(do_softirq_own_stack) | |||
1371 | END(do_softirq_own_stack) | 1346 | END(do_softirq_own_stack) |
1372 | 1347 | ||
1373 | #ifdef CONFIG_XEN | 1348 | #ifdef CONFIG_XEN |
1374 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback | 1349 | idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 |
1375 | 1350 | ||
1376 | /* | 1351 | /* |
1377 | * A note on the "critical region" in our callback handler. | 1352 | * A note on the "critical region" in our callback handler. |
@@ -1482,21 +1457,21 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ | |||
1482 | */ | 1457 | */ |
1483 | .pushsection .kprobes.text, "ax" | 1458 | .pushsection .kprobes.text, "ax" |
1484 | 1459 | ||
1485 | paranoidzeroentry_ist debug do_debug DEBUG_STACK | 1460 | idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK |
1486 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK | 1461 | idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK |
1487 | paranoiderrorentry stack_segment do_stack_segment | 1462 | idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 |
1488 | #ifdef CONFIG_XEN | 1463 | #ifdef CONFIG_XEN |
1489 | zeroentry xen_debug do_debug | 1464 | idtentry xen_debug do_debug has_error_code=0 |
1490 | zeroentry xen_int3 do_int3 | 1465 | idtentry xen_int3 do_int3 has_error_code=0 |
1491 | errorentry xen_stack_segment do_stack_segment | 1466 | idtentry xen_stack_segment do_stack_segment has_error_code=1 |
1492 | #endif | 1467 | #endif |
1493 | errorentry general_protection do_general_protection | 1468 | idtentry general_protection do_general_protection has_error_code=1 |
1494 | trace_errorentry page_fault do_page_fault | 1469 | trace_idtentry page_fault do_page_fault has_error_code=1 |
1495 | #ifdef CONFIG_KVM_GUEST | 1470 | #ifdef CONFIG_KVM_GUEST |
1496 | errorentry async_page_fault do_async_page_fault | 1471 | idtentry async_page_fault do_async_page_fault has_error_code=1 |
1497 | #endif | 1472 | #endif |
1498 | #ifdef CONFIG_X86_MCE | 1473 | #ifdef CONFIG_X86_MCE |
1499 | paranoidzeroentry machine_check *machine_check_vector(%rip) | 1474 | idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) |
1500 | #endif | 1475 | #endif |
1501 | 1476 | ||
1502 | /* | 1477 | /* |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index af1d14a9ebda..dcbbaa165bde 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
21 | #include <asm/syscalls.h> | 21 | #include <asm/syscalls.h> |
22 | 22 | ||
23 | int sysctl_ldt16 = 0; | ||
24 | |||
23 | #ifdef CONFIG_SMP | 25 | #ifdef CONFIG_SMP |
24 | static void flush_ldt(void *current_mm) | 26 | static void flush_ldt(void *current_mm) |
25 | { | 27 | { |
@@ -234,7 +236,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) | |||
234 | * IRET leaking the high bits of the kernel stack address. | 236 | * IRET leaking the high bits of the kernel stack address. |
235 | */ | 237 | */ |
236 | #ifdef CONFIG_X86_64 | 238 | #ifdef CONFIG_X86_64 |
237 | if (!ldt_info.seg_32bit) { | 239 | if (!ldt_info.seg_32bit && !sysctl_ldt16) { |
238 | error = -EINVAL; | 240 | error = -EINVAL; |
239 | goto out_unlock; | 241 | goto out_unlock; |
240 | } | 242 | } |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 2ed845928b5f..ace22916ade3 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -53,7 +53,7 @@ | |||
53 | #define OPCODE1(insn) ((insn)->opcode.bytes[0]) | 53 | #define OPCODE1(insn) ((insn)->opcode.bytes[0]) |
54 | #define OPCODE2(insn) ((insn)->opcode.bytes[1]) | 54 | #define OPCODE2(insn) ((insn)->opcode.bytes[1]) |
55 | #define OPCODE3(insn) ((insn)->opcode.bytes[2]) | 55 | #define OPCODE3(insn) ((insn)->opcode.bytes[2]) |
56 | #define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value) | 56 | #define MODRM_REG(insn) X86_MODRM_REG((insn)->modrm.value) |
57 | 57 | ||
58 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | 58 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ |
59 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | 59 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ |
@@ -229,63 +229,6 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) | |||
229 | return -ENOTSUPP; | 229 | return -ENOTSUPP; |
230 | } | 230 | } |
231 | 231 | ||
232 | /* | ||
233 | * Figure out which fixups arch_uprobe_post_xol() will need to perform, and | ||
234 | * annotate arch_uprobe->fixups accordingly. To start with, | ||
235 | * arch_uprobe->fixups is either zero or it reflects rip-related fixups. | ||
236 | */ | ||
237 | static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) | ||
238 | { | ||
239 | bool fix_ip = true, fix_call = false; /* defaults */ | ||
240 | int reg; | ||
241 | |||
242 | insn_get_opcode(insn); /* should be a nop */ | ||
243 | |||
244 | switch (OPCODE1(insn)) { | ||
245 | case 0x9d: | ||
246 | /* popf */ | ||
247 | auprobe->fixups |= UPROBE_FIX_SETF; | ||
248 | break; | ||
249 | case 0xc3: /* ret/lret */ | ||
250 | case 0xcb: | ||
251 | case 0xc2: | ||
252 | case 0xca: | ||
253 | /* ip is correct */ | ||
254 | fix_ip = false; | ||
255 | break; | ||
256 | case 0xe8: /* call relative - Fix return addr */ | ||
257 | fix_call = true; | ||
258 | break; | ||
259 | case 0x9a: /* call absolute - Fix return addr, not ip */ | ||
260 | fix_call = true; | ||
261 | fix_ip = false; | ||
262 | break; | ||
263 | case 0xff: | ||
264 | insn_get_modrm(insn); | ||
265 | reg = MODRM_REG(insn); | ||
266 | if (reg == 2 || reg == 3) { | ||
267 | /* call or lcall, indirect */ | ||
268 | /* Fix return addr; ip is correct. */ | ||
269 | fix_call = true; | ||
270 | fix_ip = false; | ||
271 | } else if (reg == 4 || reg == 5) { | ||
272 | /* jmp or ljmp, indirect */ | ||
273 | /* ip is correct. */ | ||
274 | fix_ip = false; | ||
275 | } | ||
276 | break; | ||
277 | case 0xea: /* jmp absolute -- ip is correct */ | ||
278 | fix_ip = false; | ||
279 | break; | ||
280 | default: | ||
281 | break; | ||
282 | } | ||
283 | if (fix_ip) | ||
284 | auprobe->fixups |= UPROBE_FIX_IP; | ||
285 | if (fix_call) | ||
286 | auprobe->fixups |= UPROBE_FIX_CALL; | ||
287 | } | ||
288 | |||
289 | #ifdef CONFIG_X86_64 | 232 | #ifdef CONFIG_X86_64 |
290 | /* | 233 | /* |
291 | * If arch_uprobe->insn doesn't use rip-relative addressing, return | 234 | * If arch_uprobe->insn doesn't use rip-relative addressing, return |
@@ -310,15 +253,11 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) | |||
310 | * - The displacement is always 4 bytes. | 253 | * - The displacement is always 4 bytes. |
311 | */ | 254 | */ |
312 | static void | 255 | static void |
313 | handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | 256 | handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) |
314 | { | 257 | { |
315 | u8 *cursor; | 258 | u8 *cursor; |
316 | u8 reg; | 259 | u8 reg; |
317 | 260 | ||
318 | if (mm->context.ia32_compat) | ||
319 | return; | ||
320 | |||
321 | auprobe->rip_rela_target_address = 0x0; | ||
322 | if (!insn_rip_relative(insn)) | 261 | if (!insn_rip_relative(insn)) |
323 | return; | 262 | return; |
324 | 263 | ||
@@ -372,7 +311,48 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct ins | |||
372 | cursor++; | 311 | cursor++; |
373 | memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); | 312 | memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); |
374 | } | 313 | } |
375 | return; | 314 | } |
315 | |||
316 | /* | ||
317 | * If we're emulating a rip-relative instruction, save the contents | ||
318 | * of the scratch register and store the target address in that register. | ||
319 | */ | ||
320 | static void | ||
321 | pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, | ||
322 | struct arch_uprobe_task *autask) | ||
323 | { | ||
324 | if (auprobe->fixups & UPROBE_FIX_RIP_AX) { | ||
325 | autask->saved_scratch_register = regs->ax; | ||
326 | regs->ax = current->utask->vaddr; | ||
327 | regs->ax += auprobe->rip_rela_target_address; | ||
328 | } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { | ||
329 | autask->saved_scratch_register = regs->cx; | ||
330 | regs->cx = current->utask->vaddr; | ||
331 | regs->cx += auprobe->rip_rela_target_address; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | static void | ||
336 | handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) | ||
337 | { | ||
338 | if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { | ||
339 | struct arch_uprobe_task *autask; | ||
340 | |||
341 | autask = ¤t->utask->autask; | ||
342 | if (auprobe->fixups & UPROBE_FIX_RIP_AX) | ||
343 | regs->ax = autask->saved_scratch_register; | ||
344 | else | ||
345 | regs->cx = autask->saved_scratch_register; | ||
346 | |||
347 | /* | ||
348 | * The original instruction includes a displacement, and so | ||
349 | * is 4 bytes longer than what we've just single-stepped. | ||
350 | * Caller may need to apply other fixups to handle stuff | ||
351 | * like "jmpq *...(%rip)" and "callq *...(%rip)". | ||
352 | */ | ||
353 | if (correction) | ||
354 | *correction += 4; | ||
355 | } | ||
376 | } | 356 | } |
377 | 357 | ||
378 | static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) | 358 | static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) |
@@ -401,9 +381,19 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
401 | return validate_insn_64bits(auprobe, insn); | 381 | return validate_insn_64bits(auprobe, insn); |
402 | } | 382 | } |
403 | #else /* 32-bit: */ | 383 | #else /* 32-bit: */ |
404 | static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | 384 | /* |
385 | * No RIP-relative addressing on 32-bit | ||
386 | */ | ||
387 | static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) | ||
388 | { | ||
389 | } | ||
390 | static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, | ||
391 | struct arch_uprobe_task *autask) | ||
392 | { | ||
393 | } | ||
394 | static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, | ||
395 | long *correction) | ||
405 | { | 396 | { |
406 | /* No RIP-relative addressing on 32-bit */ | ||
407 | } | 397 | } |
408 | 398 | ||
409 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | 399 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) |
@@ -412,141 +402,311 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
412 | } | 402 | } |
413 | #endif /* CONFIG_X86_64 */ | 403 | #endif /* CONFIG_X86_64 */ |
414 | 404 | ||
415 | /** | 405 | struct uprobe_xol_ops { |
416 | * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. | 406 | bool (*emulate)(struct arch_uprobe *, struct pt_regs *); |
417 | * @mm: the probed address space. | 407 | int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); |
418 | * @arch_uprobe: the probepoint information. | 408 | int (*post_xol)(struct arch_uprobe *, struct pt_regs *); |
419 | * @addr: virtual address at which to install the probepoint | 409 | }; |
420 | * Return 0 on success or a -ve number on error. | 410 | |
411 | static inline int sizeof_long(void) | ||
412 | { | ||
413 | return is_ia32_task() ? 4 : 8; | ||
414 | } | ||
415 | |||
416 | static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) | ||
417 | { | ||
418 | pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); | ||
419 | return 0; | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * Adjust the return address pushed by a call insn executed out of line. | ||
421 | */ | 424 | */ |
422 | int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) | 425 | static int adjust_ret_addr(unsigned long sp, long correction) |
423 | { | 426 | { |
424 | int ret; | 427 | int rasize = sizeof_long(); |
425 | struct insn insn; | 428 | long ra; |
426 | 429 | ||
427 | auprobe->fixups = 0; | 430 | if (copy_from_user(&ra, (void __user *)sp, rasize)) |
428 | ret = validate_insn_bits(auprobe, mm, &insn); | 431 | return -EFAULT; |
429 | if (ret != 0) | ||
430 | return ret; | ||
431 | 432 | ||
432 | handle_riprel_insn(auprobe, mm, &insn); | 433 | ra += correction; |
433 | prepare_fixups(auprobe, &insn); | 434 | if (copy_to_user((void __user *)sp, &ra, rasize)) |
435 | return -EFAULT; | ||
434 | 436 | ||
435 | return 0; | 437 | return 0; |
436 | } | 438 | } |
437 | 439 | ||
438 | #ifdef CONFIG_X86_64 | 440 | static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) |
439 | /* | ||
440 | * If we're emulating a rip-relative instruction, save the contents | ||
441 | * of the scratch register and store the target address in that register. | ||
442 | */ | ||
443 | static void | ||
444 | pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, | ||
445 | struct arch_uprobe_task *autask) | ||
446 | { | 441 | { |
447 | if (auprobe->fixups & UPROBE_FIX_RIP_AX) { | 442 | struct uprobe_task *utask = current->utask; |
448 | autask->saved_scratch_register = regs->ax; | 443 | long correction = (long)(utask->vaddr - utask->xol_vaddr); |
449 | regs->ax = current->utask->vaddr; | 444 | |
450 | regs->ax += auprobe->rip_rela_target_address; | 445 | handle_riprel_post_xol(auprobe, regs, &correction); |
451 | } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { | 446 | if (auprobe->fixups & UPROBE_FIX_IP) |
452 | autask->saved_scratch_register = regs->cx; | 447 | regs->ip += correction; |
453 | regs->cx = current->utask->vaddr; | 448 | |
454 | regs->cx += auprobe->rip_rela_target_address; | 449 | if (auprobe->fixups & UPROBE_FIX_CALL) { |
450 | if (adjust_ret_addr(regs->sp, correction)) { | ||
451 | regs->sp += sizeof_long(); | ||
452 | return -ERESTART; | ||
453 | } | ||
455 | } | 454 | } |
455 | |||
456 | return 0; | ||
456 | } | 457 | } |
457 | #else | 458 | |
458 | static void | 459 | static struct uprobe_xol_ops default_xol_ops = { |
459 | pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, | 460 | .pre_xol = default_pre_xol_op, |
460 | struct arch_uprobe_task *autask) | 461 | .post_xol = default_post_xol_op, |
462 | }; | ||
463 | |||
464 | static bool branch_is_call(struct arch_uprobe *auprobe) | ||
461 | { | 465 | { |
462 | /* No RIP-relative addressing on 32-bit */ | 466 | return auprobe->branch.opc1 == 0xe8; |
463 | } | 467 | } |
464 | #endif | ||
465 | 468 | ||
466 | /* | 469 | #define CASE_COND \ |
467 | * arch_uprobe_pre_xol - prepare to execute out of line. | 470 | COND(70, 71, XF(OF)) \ |
468 | * @auprobe: the probepoint information. | 471 | COND(72, 73, XF(CF)) \ |
469 | * @regs: reflects the saved user state of current task. | 472 | COND(74, 75, XF(ZF)) \ |
470 | */ | 473 | COND(78, 79, XF(SF)) \ |
471 | int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 474 | COND(7a, 7b, XF(PF)) \ |
472 | { | 475 | COND(76, 77, XF(CF) || XF(ZF)) \ |
473 | struct arch_uprobe_task *autask; | 476 | COND(7c, 7d, XF(SF) != XF(OF)) \ |
477 | COND(7e, 7f, XF(ZF) || XF(SF) != XF(OF)) | ||
474 | 478 | ||
475 | autask = ¤t->utask->autask; | 479 | #define COND(op_y, op_n, expr) \ |
476 | autask->saved_trap_nr = current->thread.trap_nr; | 480 | case 0x ## op_y: DO((expr) != 0) \ |
477 | current->thread.trap_nr = UPROBE_TRAP_NR; | 481 | case 0x ## op_n: DO((expr) == 0) |
478 | regs->ip = current->utask->xol_vaddr; | ||
479 | pre_xol_rip_insn(auprobe, regs, autask); | ||
480 | 482 | ||
481 | autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); | 483 | #define XF(xf) (!!(flags & X86_EFLAGS_ ## xf)) |
482 | regs->flags |= X86_EFLAGS_TF; | ||
483 | if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) | ||
484 | set_task_blockstep(current, false); | ||
485 | 484 | ||
486 | return 0; | 485 | static bool is_cond_jmp_opcode(u8 opcode) |
486 | { | ||
487 | switch (opcode) { | ||
488 | #define DO(expr) \ | ||
489 | return true; | ||
490 | CASE_COND | ||
491 | #undef DO | ||
492 | |||
493 | default: | ||
494 | return false; | ||
495 | } | ||
487 | } | 496 | } |
488 | 497 | ||
489 | /* | 498 | static bool check_jmp_cond(struct arch_uprobe *auprobe, struct pt_regs *regs) |
490 | * This function is called by arch_uprobe_post_xol() to adjust the return | ||
491 | * address pushed by a call instruction executed out of line. | ||
492 | */ | ||
493 | static int adjust_ret_addr(unsigned long sp, long correction) | ||
494 | { | 499 | { |
495 | int rasize, ncopied; | 500 | unsigned long flags = regs->flags; |
496 | long ra = 0; | ||
497 | 501 | ||
498 | if (is_ia32_task()) | 502 | switch (auprobe->branch.opc1) { |
499 | rasize = 4; | 503 | #define DO(expr) \ |
500 | else | 504 | return expr; |
501 | rasize = 8; | 505 | CASE_COND |
506 | #undef DO | ||
502 | 507 | ||
503 | ncopied = copy_from_user(&ra, (void __user *)sp, rasize); | 508 | default: /* not a conditional jmp */ |
504 | if (unlikely(ncopied)) | 509 | return true; |
505 | return -EFAULT; | 510 | } |
511 | } | ||
506 | 512 | ||
507 | ra += correction; | 513 | #undef XF |
508 | ncopied = copy_to_user((void __user *)sp, &ra, rasize); | 514 | #undef COND |
509 | if (unlikely(ncopied)) | 515 | #undef CASE_COND |
510 | return -EFAULT; | ||
511 | 516 | ||
512 | return 0; | 517 | static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) |
518 | { | ||
519 | unsigned long new_ip = regs->ip += auprobe->branch.ilen; | ||
520 | unsigned long offs = (long)auprobe->branch.offs; | ||
521 | |||
522 | if (branch_is_call(auprobe)) { | ||
523 | unsigned long new_sp = regs->sp - sizeof_long(); | ||
524 | /* | ||
525 | * If it fails we execute this (mangled, see the comment in | ||
526 | * branch_clear_offset) insn out-of-line. In the likely case | ||
527 | * this should trigger the trap, and the probed application | ||
528 | * should die or restart the same insn after it handles the | ||
529 | * signal, arch_uprobe_post_xol() won't be even called. | ||
530 | * | ||
531 | * But there is corner case, see the comment in ->post_xol(). | ||
532 | */ | ||
533 | if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) | ||
534 | return false; | ||
535 | regs->sp = new_sp; | ||
536 | } else if (!check_jmp_cond(auprobe, regs)) { | ||
537 | offs = 0; | ||
538 | } | ||
539 | |||
540 | regs->ip = new_ip + offs; | ||
541 | return true; | ||
513 | } | 542 | } |
514 | 543 | ||
515 | #ifdef CONFIG_X86_64 | 544 | static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) |
516 | static bool is_riprel_insn(struct arch_uprobe *auprobe) | ||
517 | { | 545 | { |
518 | return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0); | 546 | BUG_ON(!branch_is_call(auprobe)); |
547 | /* | ||
548 | * We can only get here if branch_emulate_op() failed to push the ret | ||
549 | * address _and_ another thread expanded our stack before the (mangled) | ||
550 | * "call" insn was executed out-of-line. Just restore ->sp and restart. | ||
551 | * We could also restore ->ip and try to call branch_emulate_op() again. | ||
552 | */ | ||
553 | regs->sp += sizeof_long(); | ||
554 | return -ERESTART; | ||
519 | } | 555 | } |
520 | 556 | ||
521 | static void | 557 | static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn) |
522 | handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) | ||
523 | { | 558 | { |
524 | if (is_riprel_insn(auprobe)) { | 559 | /* |
525 | struct arch_uprobe_task *autask; | 560 | * Turn this insn into "call 1f; 1:", this is what we will execute |
561 | * out-of-line if ->emulate() fails. We only need this to generate | ||
562 | * a trap, so that the probed task receives the correct signal with | ||
563 | * the properly filled siginfo. | ||
564 | * | ||
565 | * But see the comment in ->post_xol(), in the unlikely case it can | ||
566 | * succeed. So we need to ensure that the new ->ip can not fall into | ||
567 | * the non-canonical area and trigger #GP. | ||
568 | * | ||
569 | * We could turn it into (say) "pushf", but then we would need to | ||
570 | * divorce ->insn[] and ->ixol[]. We need to preserve the 1st byte | ||
571 | * of ->insn[] for set_orig_insn(). | ||
572 | */ | ||
573 | memset(auprobe->insn + insn_offset_immediate(insn), | ||
574 | 0, insn->immediate.nbytes); | ||
575 | } | ||
526 | 576 | ||
527 | autask = ¤t->utask->autask; | 577 | static struct uprobe_xol_ops branch_xol_ops = { |
528 | if (auprobe->fixups & UPROBE_FIX_RIP_AX) | 578 | .emulate = branch_emulate_op, |
529 | regs->ax = autask->saved_scratch_register; | 579 | .post_xol = branch_post_xol_op, |
530 | else | 580 | }; |
531 | regs->cx = autask->saved_scratch_register; | 581 | |
582 | /* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */ | ||
583 | static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) | ||
584 | { | ||
585 | u8 opc1 = OPCODE1(insn); | ||
586 | |||
587 | /* has the side-effect of processing the entire instruction */ | ||
588 | insn_get_length(insn); | ||
589 | if (WARN_ON_ONCE(!insn_complete(insn))) | ||
590 | return -ENOEXEC; | ||
591 | |||
592 | switch (opc1) { | ||
593 | case 0xeb: /* jmp 8 */ | ||
594 | case 0xe9: /* jmp 32 */ | ||
595 | case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */ | ||
596 | break; | ||
597 | |||
598 | case 0xe8: /* call relative */ | ||
599 | branch_clear_offset(auprobe, insn); | ||
600 | break; | ||
532 | 601 | ||
602 | case 0x0f: | ||
603 | if (insn->opcode.nbytes != 2) | ||
604 | return -ENOSYS; | ||
533 | /* | 605 | /* |
534 | * The original instruction includes a displacement, and so | 606 | * If it is a "near" conditional jmp, OPCODE2() - 0x10 matches |
535 | * is 4 bytes longer than what we've just single-stepped. | 607 | * OPCODE1() of the "short" jmp which checks the same condition. |
536 | * Fall through to handle stuff like "jmpq *...(%rip)" and | ||
537 | * "callq *...(%rip)". | ||
538 | */ | 608 | */ |
539 | if (correction) | 609 | opc1 = OPCODE2(insn) - 0x10; |
540 | *correction += 4; | 610 | default: |
611 | if (!is_cond_jmp_opcode(opc1)) | ||
612 | return -ENOSYS; | ||
541 | } | 613 | } |
614 | |||
615 | auprobe->branch.opc1 = opc1; | ||
616 | auprobe->branch.ilen = insn->length; | ||
617 | auprobe->branch.offs = insn->immediate.value; | ||
618 | |||
619 | auprobe->ops = &branch_xol_ops; | ||
620 | return 0; | ||
542 | } | 621 | } |
543 | #else | 622 | |
544 | static void | 623 | /** |
545 | handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) | 624 | * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. |
625 | * @mm: the probed address space. | ||
626 | * @arch_uprobe: the probepoint information. | ||
627 | * @addr: virtual address at which to install the probepoint | ||
628 | * Return 0 on success or a -ve number on error. | ||
629 | */ | ||
630 | int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) | ||
631 | { | ||
632 | struct insn insn; | ||
633 | bool fix_ip = true, fix_call = false; | ||
634 | int ret; | ||
635 | |||
636 | ret = validate_insn_bits(auprobe, mm, &insn); | ||
637 | if (ret) | ||
638 | return ret; | ||
639 | |||
640 | ret = branch_setup_xol_ops(auprobe, &insn); | ||
641 | if (ret != -ENOSYS) | ||
642 | return ret; | ||
643 | |||
644 | /* | ||
645 | * Figure out which fixups arch_uprobe_post_xol() will need to perform, | ||
646 | * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups | ||
647 | * is either zero or it reflects rip-related fixups. | ||
648 | */ | ||
649 | switch (OPCODE1(&insn)) { | ||
650 | case 0x9d: /* popf */ | ||
651 | auprobe->fixups |= UPROBE_FIX_SETF; | ||
652 | break; | ||
653 | case 0xc3: /* ret or lret -- ip is correct */ | ||
654 | case 0xcb: | ||
655 | case 0xc2: | ||
656 | case 0xca: | ||
657 | fix_ip = false; | ||
658 | break; | ||
659 | case 0x9a: /* call absolute - Fix return addr, not ip */ | ||
660 | fix_call = true; | ||
661 | fix_ip = false; | ||
662 | break; | ||
663 | case 0xea: /* jmp absolute -- ip is correct */ | ||
664 | fix_ip = false; | ||
665 | break; | ||
666 | case 0xff: | ||
667 | insn_get_modrm(&insn); | ||
668 | switch (MODRM_REG(&insn)) { | ||
669 | case 2: case 3: /* call or lcall, indirect */ | ||
670 | fix_call = true; | ||
671 | case 4: case 5: /* jmp or ljmp, indirect */ | ||
672 | fix_ip = false; | ||
673 | } | ||
674 | /* fall through */ | ||
675 | default: | ||
676 | handle_riprel_insn(auprobe, &insn); | ||
677 | } | ||
678 | |||
679 | if (fix_ip) | ||
680 | auprobe->fixups |= UPROBE_FIX_IP; | ||
681 | if (fix_call) | ||
682 | auprobe->fixups |= UPROBE_FIX_CALL; | ||
683 | |||
684 | auprobe->ops = &default_xol_ops; | ||
685 | return 0; | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * arch_uprobe_pre_xol - prepare to execute out of line. | ||
690 | * @auprobe: the probepoint information. | ||
691 | * @regs: reflects the saved user state of current task. | ||
692 | */ | ||
693 | int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | ||
546 | { | 694 | { |
547 | /* No RIP-relative addressing on 32-bit */ | 695 | struct uprobe_task *utask = current->utask; |
696 | |||
697 | regs->ip = utask->xol_vaddr; | ||
698 | utask->autask.saved_trap_nr = current->thread.trap_nr; | ||
699 | current->thread.trap_nr = UPROBE_TRAP_NR; | ||
700 | |||
701 | utask->autask.saved_tf = !!(regs->flags & X86_EFLAGS_TF); | ||
702 | regs->flags |= X86_EFLAGS_TF; | ||
703 | if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) | ||
704 | set_task_blockstep(current, false); | ||
705 | |||
706 | if (auprobe->ops->pre_xol) | ||
707 | return auprobe->ops->pre_xol(auprobe, regs); | ||
708 | return 0; | ||
548 | } | 709 | } |
549 | #endif | ||
550 | 710 | ||
551 | /* | 711 | /* |
552 | * If xol insn itself traps and generates a signal(Say, | 712 | * If xol insn itself traps and generates a signal(Say, |
@@ -592,22 +752,25 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) | |||
592 | */ | 752 | */ |
593 | int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 753 | int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
594 | { | 754 | { |
595 | struct uprobe_task *utask; | 755 | struct uprobe_task *utask = current->utask; |
596 | long correction; | ||
597 | int result = 0; | ||
598 | 756 | ||
599 | WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); | 757 | WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); |
600 | 758 | ||
601 | utask = current->utask; | 759 | if (auprobe->ops->post_xol) { |
602 | current->thread.trap_nr = utask->autask.saved_trap_nr; | 760 | int err = auprobe->ops->post_xol(auprobe, regs); |
603 | correction = (long)(utask->vaddr - utask->xol_vaddr); | 761 | if (err) { |
604 | handle_riprel_post_xol(auprobe, regs, &correction); | 762 | arch_uprobe_abort_xol(auprobe, regs); |
605 | if (auprobe->fixups & UPROBE_FIX_IP) | 763 | /* |
606 | regs->ip += correction; | 764 | * Restart the probed insn. ->post_xol() must ensure |
607 | 765 | * this is really possible if it returns -ERESTART. | |
608 | if (auprobe->fixups & UPROBE_FIX_CALL) | 766 | */ |
609 | result = adjust_ret_addr(regs->sp, correction); | 767 | if (err == -ERESTART) |
768 | return 0; | ||
769 | return err; | ||
770 | } | ||
771 | } | ||
610 | 772 | ||
773 | current->thread.trap_nr = utask->autask.saved_trap_nr; | ||
611 | /* | 774 | /* |
612 | * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP | 775 | * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP |
613 | * so we can get an extra SIGTRAP if we do not clear TF. We need | 776 | * so we can get an extra SIGTRAP if we do not clear TF. We need |
@@ -618,7 +781,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
618 | else if (!(auprobe->fixups & UPROBE_FIX_SETF)) | 781 | else if (!(auprobe->fixups & UPROBE_FIX_SETF)) |
619 | regs->flags &= ~X86_EFLAGS_TF; | 782 | regs->flags &= ~X86_EFLAGS_TF; |
620 | 783 | ||
621 | return result; | 784 | return 0; |
622 | } | 785 | } |
623 | 786 | ||
624 | /* callback routine for handling exceptions. */ | 787 | /* callback routine for handling exceptions. */ |
@@ -652,8 +815,9 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, | |||
652 | 815 | ||
653 | /* | 816 | /* |
654 | * This function gets called when XOL instruction either gets trapped or | 817 | * This function gets called when XOL instruction either gets trapped or |
655 | * the thread has a fatal signal, so reset the instruction pointer to its | 818 | * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. |
656 | * probed address. | 819 | * Reset the instruction pointer to its probed address for the potential |
820 | * restart or for post mortem analysis. | ||
657 | */ | 821 | */ |
658 | void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | 822 | void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) |
659 | { | 823 | { |
@@ -668,25 +832,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
668 | regs->flags &= ~X86_EFLAGS_TF; | 832 | regs->flags &= ~X86_EFLAGS_TF; |
669 | } | 833 | } |
670 | 834 | ||
671 | /* | ||
672 | * Skip these instructions as per the currently known x86 ISA. | ||
673 | * rep=0x66*; nop=0x90 | ||
674 | */ | ||
675 | static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | 835 | static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) |
676 | { | 836 | { |
677 | int i; | 837 | if (auprobe->ops->emulate) |
678 | 838 | return auprobe->ops->emulate(auprobe, regs); | |
679 | for (i = 0; i < MAX_UINSN_BYTES; i++) { | ||
680 | if (auprobe->insn[i] == 0x66) | ||
681 | continue; | ||
682 | |||
683 | if (auprobe->insn[i] == 0x90) { | ||
684 | regs->ip += i + 1; | ||
685 | return true; | ||
686 | } | ||
687 | |||
688 | break; | ||
689 | } | ||
690 | return false; | 839 | return false; |
691 | } | 840 | } |
692 | 841 | ||
@@ -701,23 +850,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
701 | unsigned long | 850 | unsigned long |
702 | arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) | 851 | arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) |
703 | { | 852 | { |
704 | int rasize, ncopied; | 853 | int rasize = sizeof_long(), nleft; |
705 | unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ | 854 | unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ |
706 | 855 | ||
707 | rasize = is_ia32_task() ? 4 : 8; | 856 | if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize)) |
708 | ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize); | ||
709 | if (unlikely(ncopied)) | ||
710 | return -1; | 857 | return -1; |
711 | 858 | ||
712 | /* check whether address has been already hijacked */ | 859 | /* check whether address has been already hijacked */ |
713 | if (orig_ret_vaddr == trampoline_vaddr) | 860 | if (orig_ret_vaddr == trampoline_vaddr) |
714 | return orig_ret_vaddr; | 861 | return orig_ret_vaddr; |
715 | 862 | ||
716 | ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); | 863 | nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); |
717 | if (likely(!ncopied)) | 864 | if (likely(!nleft)) |
718 | return orig_ret_vaddr; | 865 | return orig_ret_vaddr; |
719 | 866 | ||
720 | if (ncopied != rasize) { | 867 | if (nleft != rasize) { |
721 | pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " | 868 | pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " |
722 | "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); | 869 | "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); |
723 | 870 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 33e8c028842f..138ceffc6377 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -7778,7 +7778,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7778 | 7778 | ||
7779 | exec_control = vmcs12->pin_based_vm_exec_control; | 7779 | exec_control = vmcs12->pin_based_vm_exec_control; |
7780 | exec_control |= vmcs_config.pin_based_exec_ctrl; | 7780 | exec_control |= vmcs_config.pin_based_exec_ctrl; |
7781 | exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; | 7781 | exec_control &= ~(PIN_BASED_VMX_PREEMPTION_TIMER | |
7782 | PIN_BASED_POSTED_INTR); | ||
7782 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); | 7783 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); |
7783 | 7784 | ||
7784 | vmx->nested.preemption_timer_expired = false; | 7785 | vmx->nested.preemption_timer_expired = false; |
@@ -7815,7 +7816,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7815 | if (!vmx->rdtscp_enabled) | 7816 | if (!vmx->rdtscp_enabled) |
7816 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | 7817 | exec_control &= ~SECONDARY_EXEC_RDTSCP; |
7817 | /* Take the following fields only from vmcs12 */ | 7818 | /* Take the following fields only from vmcs12 */ |
7818 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 7819 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
7820 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | ||
7821 | SECONDARY_EXEC_APIC_REGISTER_VIRT); | ||
7819 | if (nested_cpu_has(vmcs12, | 7822 | if (nested_cpu_has(vmcs12, |
7820 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) | 7823 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) |
7821 | exec_control |= vmcs12->secondary_vm_exec_control; | 7824 | exec_control |= vmcs12->secondary_vm_exec_control; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b6c0bacca9bd..20316c67b824 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -106,6 +106,8 @@ EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); | |||
106 | static u32 tsc_tolerance_ppm = 250; | 106 | static u32 tsc_tolerance_ppm = 250; |
107 | module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); | 107 | module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); |
108 | 108 | ||
109 | static bool backwards_tsc_observed = false; | ||
110 | |||
109 | #define KVM_NR_SHARED_MSRS 16 | 111 | #define KVM_NR_SHARED_MSRS 16 |
110 | 112 | ||
111 | struct kvm_shared_msrs_global { | 113 | struct kvm_shared_msrs_global { |
@@ -1486,7 +1488,8 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) | |||
1486 | &ka->master_kernel_ns, | 1488 | &ka->master_kernel_ns, |
1487 | &ka->master_cycle_now); | 1489 | &ka->master_cycle_now); |
1488 | 1490 | ||
1489 | ka->use_master_clock = host_tsc_clocksource & vcpus_matched; | 1491 | ka->use_master_clock = host_tsc_clocksource && vcpus_matched |
1492 | && !backwards_tsc_observed; | ||
1490 | 1493 | ||
1491 | if (ka->use_master_clock) | 1494 | if (ka->use_master_clock) |
1492 | atomic_set(&kvm_guest_has_master_clock, 1); | 1495 | atomic_set(&kvm_guest_has_master_clock, 1); |
@@ -6945,6 +6948,7 @@ int kvm_arch_hardware_enable(void *garbage) | |||
6945 | */ | 6948 | */ |
6946 | if (backwards_tsc) { | 6949 | if (backwards_tsc) { |
6947 | u64 delta_cyc = max_tsc - local_tsc; | 6950 | u64 delta_cyc = max_tsc - local_tsc; |
6951 | backwards_tsc_observed = true; | ||
6948 | list_for_each_entry(kvm, &vm_list, vm_list) { | 6952 | list_for_each_entry(kvm, &vm_list, vm_list) { |
6949 | kvm_for_each_vcpu(i, vcpu, kvm) { | 6953 | kvm_for_each_vcpu(i, vcpu, kvm) { |
6950 | vcpu->arch.tsc_offset_adjustment += delta_cyc; | 6954 | vcpu->arch.tsc_offset_adjustment += delta_cyc; |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index eabcb6e6a900..4d4f96a27638 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -16,7 +16,7 @@ clean-files := inat-tables.c | |||
16 | 16 | ||
17 | obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o | 17 | obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o |
18 | 18 | ||
19 | lib-y := delay.o misc.o | 19 | lib-y := delay.o misc.o cmdline.o |
20 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
21 | lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o |
22 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c new file mode 100644 index 000000000000..422db000d727 --- /dev/null +++ b/arch/x86/lib/cmdline.c | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | * This file is part of the Linux kernel, and is made available under | ||
3 | * the terms of the GNU General Public License version 2. | ||
4 | * | ||
5 | * Misc librarized functions for cmdline poking. | ||
6 | */ | ||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/string.h> | ||
9 | #include <linux/ctype.h> | ||
10 | #include <asm/setup.h> | ||
11 | |||
12 | static inline int myisspace(u8 c) | ||
13 | { | ||
14 | return c <= ' '; /* Close enough approximation */ | ||
15 | } | ||
16 | |||
17 | /** | ||
18 | * Find a boolean option (like quiet,noapic,nosmp....) | ||
19 | * | ||
20 | * @cmdline: the cmdline string | ||
21 | * @option: option string to look for | ||
22 | * | ||
23 | * Returns the position of that @option (starts counting with 1) | ||
24 | * or 0 on not found. | ||
25 | */ | ||
26 | int cmdline_find_option_bool(const char *cmdline, const char *option) | ||
27 | { | ||
28 | char c; | ||
29 | int len, pos = 0, wstart = 0; | ||
30 | const char *opptr = NULL; | ||
31 | enum { | ||
32 | st_wordstart = 0, /* Start of word/after whitespace */ | ||
33 | st_wordcmp, /* Comparing this word */ | ||
34 | st_wordskip, /* Miscompare, skip */ | ||
35 | } state = st_wordstart; | ||
36 | |||
37 | if (!cmdline) | ||
38 | return -1; /* No command line */ | ||
39 | |||
40 | len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE); | ||
41 | if (!len) | ||
42 | return 0; | ||
43 | |||
44 | while (len--) { | ||
45 | c = *(char *)cmdline++; | ||
46 | pos++; | ||
47 | |||
48 | switch (state) { | ||
49 | case st_wordstart: | ||
50 | if (!c) | ||
51 | return 0; | ||
52 | else if (myisspace(c)) | ||
53 | break; | ||
54 | |||
55 | state = st_wordcmp; | ||
56 | opptr = option; | ||
57 | wstart = pos; | ||
58 | /* fall through */ | ||
59 | |||
60 | case st_wordcmp: | ||
61 | if (!*opptr) | ||
62 | if (!c || myisspace(c)) | ||
63 | return wstart; | ||
64 | else | ||
65 | state = st_wordskip; | ||
66 | else if (!c) | ||
67 | return 0; | ||
68 | else if (c != *opptr++) | ||
69 | state = st_wordskip; | ||
70 | else if (!len) /* last word and is matching */ | ||
71 | return wstart; | ||
72 | break; | ||
73 | |||
74 | case st_wordskip: | ||
75 | if (!c) | ||
76 | return 0; | ||
77 | else if (myisspace(c)) | ||
78 | state = st_wordstart; | ||
79 | break; | ||
80 | } | ||
81 | } | ||
82 | |||
83 | return 0; /* Buffer overrun */ | ||
84 | } | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 597ac155c91c..bc7527e109c8 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -50,6 +50,21 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, | |||
50 | return err; | 50 | return err; |
51 | } | 51 | } |
52 | 52 | ||
53 | static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, | ||
54 | void *arg) | ||
55 | { | ||
56 | unsigned long i; | ||
57 | |||
58 | for (i = 0; i < nr_pages; ++i) | ||
59 | if (pfn_valid(start_pfn + i) && | ||
60 | !PageReserved(pfn_to_page(start_pfn + i))) | ||
61 | return 1; | ||
62 | |||
63 | WARN_ONCE(1, "ioremap on RAM pfn 0x%lx\n", start_pfn); | ||
64 | |||
65 | return 0; | ||
66 | } | ||
67 | |||
53 | /* | 68 | /* |
54 | * Remap an arbitrary physical address space into the kernel virtual | 69 | * Remap an arbitrary physical address space into the kernel virtual |
55 | * address space. Needed when the kernel wants to access high addresses | 70 | * address space. Needed when the kernel wants to access high addresses |
@@ -93,14 +108,11 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
93 | /* | 108 | /* |
94 | * Don't allow anybody to remap normal RAM that we're using.. | 109 | * Don't allow anybody to remap normal RAM that we're using.. |
95 | */ | 110 | */ |
111 | pfn = phys_addr >> PAGE_SHIFT; | ||
96 | last_pfn = last_addr >> PAGE_SHIFT; | 112 | last_pfn = last_addr >> PAGE_SHIFT; |
97 | for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) { | 113 | if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, |
98 | int is_ram = page_is_ram(pfn); | 114 | __ioremap_check_ram) == 1) |
99 | 115 | return NULL; | |
100 | if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) | ||
101 | return NULL; | ||
102 | WARN_ON_ONCE(is_ram); | ||
103 | } | ||
104 | 116 | ||
105 | /* | 117 | /* |
106 | * Mappings have to be page-aligned | 118 | * Mappings have to be page-aligned |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index c96314abd144..0004ac72dbdd 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -399,13 +399,20 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, | |||
399 | int ptep_clear_flush_young(struct vm_area_struct *vma, | 399 | int ptep_clear_flush_young(struct vm_area_struct *vma, |
400 | unsigned long address, pte_t *ptep) | 400 | unsigned long address, pte_t *ptep) |
401 | { | 401 | { |
402 | int young; | 402 | /* |
403 | 403 | * On x86 CPUs, clearing the accessed bit without a TLB flush | |
404 | young = ptep_test_and_clear_young(vma, address, ptep); | 404 | * doesn't cause data corruption. [ It could cause incorrect |
405 | if (young) | 405 | * page aging and the (mistaken) reclaim of hot pages, but the |
406 | flush_tlb_page(vma, address); | 406 | * chance of that should be relatively low. ] |
407 | 407 | * | |
408 | return young; | 408 | * So as a performance optimization don't flush the TLB when |
409 | * clearing the accessed bit, it will eventually be flushed by | ||
410 | * a context switch or a VM operation anyway. [ In the rare | ||
411 | * event of it not getting flushed for a long time the delay | ||
412 | * shouldn't really matter because there's no real memory | ||
413 | * pressure for swapout to react to. ] | ||
414 | */ | ||
415 | return ptep_test_and_clear_young(vma, address, ptep); | ||
409 | } | 416 | } |
410 | 417 | ||
411 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 418 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index dc017735bb91..6d5663a599a7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -171,7 +171,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen, | |||
171 | memset(header, 0xcc, sz); /* fill whole space with int3 instructions */ | 171 | memset(header, 0xcc, sz); /* fill whole space with int3 instructions */ |
172 | 172 | ||
173 | header->pages = sz / PAGE_SIZE; | 173 | header->pages = sz / PAGE_SIZE; |
174 | hole = sz - (proglen + sizeof(*header)); | 174 | hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header)); |
175 | 175 | ||
176 | /* insert a random number of int3 instructions before BPF code */ | 176 | /* insert a random number of int3 instructions before BPF code */ |
177 | *image_ptr = &header->image[prandom_u32() % hole]; | 177 | *image_ptr = &header->image[prandom_u32() % hole]; |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 01edac6c5e18..5075371ab593 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -489,8 +489,12 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) | |||
489 | } | 489 | } |
490 | 490 | ||
491 | node = acpi_get_node(device->handle); | 491 | node = acpi_get_node(device->handle); |
492 | if (node == NUMA_NO_NODE) | 492 | if (node == NUMA_NO_NODE) { |
493 | node = x86_pci_root_bus_node(busnum); | 493 | node = x86_pci_root_bus_node(busnum); |
494 | if (node != 0 && node != NUMA_NO_NODE) | ||
495 | dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n", | ||
496 | node); | ||
497 | } | ||
494 | 498 | ||
495 | if (node != NUMA_NO_NODE && !node_online(node)) | 499 | if (node != NUMA_NO_NODE && !node_online(node)) |
496 | node = NUMA_NO_NODE; | 500 | node = NUMA_NO_NODE; |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index e88f4c53d7f6..c20d2cc7ef64 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -11,27 +11,33 @@ | |||
11 | 11 | ||
12 | #include "bus_numa.h" | 12 | #include "bus_numa.h" |
13 | 13 | ||
14 | /* | 14 | #define AMD_NB_F0_NODE_ID 0x60 |
15 | * This discovers the pcibus <-> node mapping on AMD K8. | 15 | #define AMD_NB_F0_UNIT_ID 0x64 |
16 | * also get peer root bus resource for io,mmio | 16 | #define AMD_NB_F1_CONFIG_MAP_REG 0xe0 |
17 | */ | 17 | |
18 | #define RANGE_NUM 16 | ||
19 | #define AMD_NB_F1_CONFIG_MAP_RANGES 4 | ||
18 | 20 | ||
19 | struct pci_hostbridge_probe { | 21 | struct amd_hostbridge { |
20 | u32 bus; | 22 | u32 bus; |
21 | u32 slot; | 23 | u32 slot; |
22 | u32 vendor; | ||
23 | u32 device; | 24 | u32 device; |
24 | }; | 25 | }; |
25 | 26 | ||
26 | static struct pci_hostbridge_probe pci_probes[] __initdata = { | 27 | /* |
27 | { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 }, | 28 | * IMPORTANT NOTE: |
28 | { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, | 29 | * hb_probes[] and early_root_info_init() is in maintenance mode. |
29 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, | 30 | * It only supports K8, Fam10h, Fam11h, and Fam15h_00h-0fh . |
30 | { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, | 31 | * Future processor will rely on information in ACPI. |
32 | */ | ||
33 | static struct amd_hostbridge hb_probes[] __initdata = { | ||
34 | { 0, 0x18, 0x1100 }, /* K8 */ | ||
35 | { 0, 0x18, 0x1200 }, /* Family10h */ | ||
36 | { 0xff, 0, 0x1200 }, /* Family10h */ | ||
37 | { 0, 0x18, 0x1300 }, /* Family11h */ | ||
38 | { 0, 0x18, 0x1600 }, /* Family15h */ | ||
31 | }; | 39 | }; |
32 | 40 | ||
33 | #define RANGE_NUM 16 | ||
34 | |||
35 | static struct pci_root_info __init *find_pci_root_info(int node, int link) | 41 | static struct pci_root_info __init *find_pci_root_info(int node, int link) |
36 | { | 42 | { |
37 | struct pci_root_info *info; | 43 | struct pci_root_info *info; |
@@ -45,12 +51,12 @@ static struct pci_root_info __init *find_pci_root_info(int node, int link) | |||
45 | } | 51 | } |
46 | 52 | ||
47 | /** | 53 | /** |
48 | * early_fill_mp_bus_to_node() | 54 | * early_root_info_init() |
49 | * called before pcibios_scan_root and pci_scan_bus | 55 | * called before pcibios_scan_root and pci_scan_bus |
50 | * fills the mp_bus_to_cpumask array based according to the LDT Bus Number | 56 | * fills the mp_bus_to_cpumask array based according |
51 | * Registers found in the K8 northbridge | 57 | * to the LDT Bus Number Registers found in the northbridge. |
52 | */ | 58 | */ |
53 | static int __init early_fill_mp_bus_info(void) | 59 | static int __init early_root_info_init(void) |
54 | { | 60 | { |
55 | int i; | 61 | int i; |
56 | unsigned bus; | 62 | unsigned bus; |
@@ -75,19 +81,21 @@ static int __init early_fill_mp_bus_info(void) | |||
75 | return -1; | 81 | return -1; |
76 | 82 | ||
77 | found = false; | 83 | found = false; |
78 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { | 84 | for (i = 0; i < ARRAY_SIZE(hb_probes); i++) { |
79 | u32 id; | 85 | u32 id; |
80 | u16 device; | 86 | u16 device; |
81 | u16 vendor; | 87 | u16 vendor; |
82 | 88 | ||
83 | bus = pci_probes[i].bus; | 89 | bus = hb_probes[i].bus; |
84 | slot = pci_probes[i].slot; | 90 | slot = hb_probes[i].slot; |
85 | id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); | 91 | id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); |
86 | |||
87 | vendor = id & 0xffff; | 92 | vendor = id & 0xffff; |
88 | device = (id>>16) & 0xffff; | 93 | device = (id>>16) & 0xffff; |
89 | if (pci_probes[i].vendor == vendor && | 94 | |
90 | pci_probes[i].device == device) { | 95 | if (vendor != PCI_VENDOR_ID_AMD) |
96 | continue; | ||
97 | |||
98 | if (hb_probes[i].device == device) { | ||
91 | found = true; | 99 | found = true; |
92 | break; | 100 | break; |
93 | } | 101 | } |
@@ -96,10 +104,16 @@ static int __init early_fill_mp_bus_info(void) | |||
96 | if (!found) | 104 | if (!found) |
97 | return 0; | 105 | return 0; |
98 | 106 | ||
99 | for (i = 0; i < 4; i++) { | 107 | /* |
108 | * We should learn topology and routing information from _PXM and | ||
109 | * _CRS methods in the ACPI namespace. We extract node numbers | ||
110 | * here to work around BIOSes that don't supply _PXM. | ||
111 | */ | ||
112 | for (i = 0; i < AMD_NB_F1_CONFIG_MAP_RANGES; i++) { | ||
100 | int min_bus; | 113 | int min_bus; |
101 | int max_bus; | 114 | int max_bus; |
102 | reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2)); | 115 | reg = read_pci_config(bus, slot, 1, |
116 | AMD_NB_F1_CONFIG_MAP_REG + (i << 2)); | ||
103 | 117 | ||
104 | /* Check if that register is enabled for bus range */ | 118 | /* Check if that register is enabled for bus range */ |
105 | if ((reg & 7) != 3) | 119 | if ((reg & 7) != 3) |
@@ -113,10 +127,21 @@ static int __init early_fill_mp_bus_info(void) | |||
113 | info = alloc_pci_root_info(min_bus, max_bus, node, link); | 127 | info = alloc_pci_root_info(min_bus, max_bus, node, link); |
114 | } | 128 | } |
115 | 129 | ||
130 | /* | ||
131 | * The following code extracts routing information for use on old | ||
132 | * systems where Linux doesn't automatically use host bridge _CRS | ||
133 | * methods (or when the user specifies "pci=nocrs"). | ||
134 | * | ||
135 | * We only do this through Fam11h, because _CRS should be enough on | ||
136 | * newer systems. | ||
137 | */ | ||
138 | if (boot_cpu_data.x86 > 0x11) | ||
139 | return 0; | ||
140 | |||
116 | /* get the default node and link for left over res */ | 141 | /* get the default node and link for left over res */ |
117 | reg = read_pci_config(bus, slot, 0, 0x60); | 142 | reg = read_pci_config(bus, slot, 0, AMD_NB_F0_NODE_ID); |
118 | def_node = (reg >> 8) & 0x07; | 143 | def_node = (reg >> 8) & 0x07; |
119 | reg = read_pci_config(bus, slot, 0, 0x64); | 144 | reg = read_pci_config(bus, slot, 0, AMD_NB_F0_UNIT_ID); |
120 | def_link = (reg >> 8) & 0x03; | 145 | def_link = (reg >> 8) & 0x03; |
121 | 146 | ||
122 | memset(range, 0, sizeof(range)); | 147 | memset(range, 0, sizeof(range)); |
@@ -363,7 +388,7 @@ static int __init pci_io_ecs_init(void) | |||
363 | int cpu; | 388 | int cpu; |
364 | 389 | ||
365 | /* assume all cpus from fam10h have IO ECS */ | 390 | /* assume all cpus from fam10h have IO ECS */ |
366 | if (boot_cpu_data.x86 < 0x10) | 391 | if (boot_cpu_data.x86 < 0x10) |
367 | return 0; | 392 | return 0; |
368 | 393 | ||
369 | /* Try the PCI method first. */ | 394 | /* Try the PCI method first. */ |
@@ -387,7 +412,7 @@ static int __init amd_postcore_init(void) | |||
387 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | 412 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) |
388 | return 0; | 413 | return 0; |
389 | 414 | ||
390 | early_fill_mp_bus_info(); | 415 | early_root_info_init(); |
391 | pci_io_ecs_init(); | 416 | pci_io_ecs_init(); |
392 | 417 | ||
393 | return 0; | 418 | return 0; |
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index 614392ced7d6..bb461cfd01ab 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c | |||
@@ -60,8 +60,8 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func) | |||
60 | word1 = read_pci_config_16(bus, slot, func, 0xc4); | 60 | word1 = read_pci_config_16(bus, slot, func, 0xc4); |
61 | word2 = read_pci_config_16(bus, slot, func, 0xc6); | 61 | word2 = read_pci_config_16(bus, slot, func, 0xc6); |
62 | if (word1 != word2) { | 62 | if (word1 != word2) { |
63 | res.start = (word1 << 16) | 0x0000; | 63 | res.start = ((resource_size_t) word1 << 16) | 0x0000; |
64 | res.end = (word2 << 16) | 0xffff; | 64 | res.end = ((resource_size_t) word2 << 16) | 0xffff; |
65 | res.flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; | 65 | res.flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; |
66 | update_res(info, res.start, res.end, res.flags, 0); | 66 | update_res(info, res.start, res.end, res.flags, 0); |
67 | } | 67 | } |
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 94ae9ae9574f..b5e60268d93f 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/pci.h> | 7 | #include <linux/pci.h> |
8 | #include <linux/vgaarb.h> | 8 | #include <linux/vgaarb.h> |
9 | #include <asm/hpet.h> | ||
9 | #include <asm/pci_x86.h> | 10 | #include <asm/pci_x86.h> |
10 | 11 | ||
11 | static void pci_fixup_i450nx(struct pci_dev *d) | 12 | static void pci_fixup_i450nx(struct pci_dev *d) |
@@ -337,9 +338,7 @@ static void pci_fixup_video(struct pci_dev *pdev) | |||
337 | * type BRIDGE, or CARDBUS. Host to PCI controllers use | 338 | * type BRIDGE, or CARDBUS. Host to PCI controllers use |
338 | * PCI header type NORMAL. | 339 | * PCI header type NORMAL. |
339 | */ | 340 | */ |
340 | if (bridge | 341 | if (bridge && (pci_is_bridge(bridge))) { |
341 | && ((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE) | ||
342 | || (bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) { | ||
343 | pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, | 342 | pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, |
344 | &config); | 343 | &config); |
345 | if (!(config & PCI_BRIDGE_CTL_VGA)) | 344 | if (!(config & PCI_BRIDGE_CTL_VGA)) |
@@ -526,6 +525,19 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev) | |||
526 | } | 525 | } |
527 | DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar); | 526 | DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar); |
528 | 527 | ||
528 | #ifdef CONFIG_HPET_TIMER | ||
529 | static void sb600_hpet_quirk(struct pci_dev *dev) | ||
530 | { | ||
531 | struct resource *r = &dev->resource[1]; | ||
532 | |||
533 | if (r->flags & IORESOURCE_MEM && r->start == hpet_address) { | ||
534 | r->flags |= IORESOURCE_PCI_FIXED; | ||
535 | dev_info(&dev->dev, "reg 0x14 contains HPET; making it immovable\n"); | ||
536 | } | ||
537 | } | ||
538 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, 0x4385, sb600_hpet_quirk); | ||
539 | #endif | ||
540 | |||
529 | /* | 541 | /* |
530 | * Twinhead H12Y needs us to block out a region otherwise we map devices | 542 | * Twinhead H12Y needs us to block out a region otherwise we map devices |
531 | * there and any access kills the box. | 543 | * there and any access kills the box. |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index db6b1ab43255..a19ed92e74e4 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -271,11 +271,16 @@ static void pcibios_allocate_dev_resources(struct pci_dev *dev, int pass) | |||
271 | "BAR %d: reserving %pr (d=%d, p=%d)\n", | 271 | "BAR %d: reserving %pr (d=%d, p=%d)\n", |
272 | idx, r, disabled, pass); | 272 | idx, r, disabled, pass); |
273 | if (pci_claim_resource(dev, idx) < 0) { | 273 | if (pci_claim_resource(dev, idx) < 0) { |
274 | /* We'll assign a new address later */ | 274 | if (r->flags & IORESOURCE_PCI_FIXED) { |
275 | pcibios_save_fw_addr(dev, | 275 | dev_info(&dev->dev, "BAR %d %pR is immovable\n", |
276 | idx, r->start); | 276 | idx, r); |
277 | r->end -= r->start; | 277 | } else { |
278 | r->start = 0; | 278 | /* We'll assign a new address later */ |
279 | pcibios_save_fw_addr(dev, | ||
280 | idx, r->start); | ||
281 | r->end -= r->start; | ||
282 | r->start = 0; | ||
283 | } | ||
279 | } | 284 | } |
280 | } | 285 | } |
281 | } | 286 | } |
@@ -356,6 +361,12 @@ static int __init pcibios_assign_resources(void) | |||
356 | return 0; | 361 | return 0; |
357 | } | 362 | } |
358 | 363 | ||
364 | /** | ||
365 | * called in fs_initcall (one below subsys_initcall), | ||
366 | * give a chance for motherboard reserve resources | ||
367 | */ | ||
368 | fs_initcall(pcibios_assign_resources); | ||
369 | |||
359 | void pcibios_resource_survey_bus(struct pci_bus *bus) | 370 | void pcibios_resource_survey_bus(struct pci_bus *bus) |
360 | { | 371 | { |
361 | dev_printk(KERN_DEBUG, &bus->dev, "Allocating resources\n"); | 372 | dev_printk(KERN_DEBUG, &bus->dev, "Allocating resources\n"); |
@@ -392,12 +403,6 @@ void __init pcibios_resource_survey(void) | |||
392 | ioapic_insert_resources(); | 403 | ioapic_insert_resources(); |
393 | } | 404 | } |
394 | 405 | ||
395 | /** | ||
396 | * called in fs_initcall (one below subsys_initcall), | ||
397 | * give a chance for motherboard reserve resources | ||
398 | */ | ||
399 | fs_initcall(pcibios_assign_resources); | ||
400 | |||
401 | static const struct vm_operations_struct pci_mmap_ops = { | 406 | static const struct vm_operations_struct pci_mmap_ops = { |
402 | .access = generic_access_phys, | 407 | .access = generic_access_phys, |
403 | }; | 408 | }; |
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 3497f14e4dea..7c0d7be176a5 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile | |||
@@ -52,8 +52,9 @@ $(obj)/realmode.elf: $(obj)/realmode.lds $(REALMODE_OBJS) FORCE | |||
52 | OBJCOPYFLAGS_realmode.bin := -O binary | 52 | OBJCOPYFLAGS_realmode.bin := -O binary |
53 | 53 | ||
54 | targets += realmode.bin | 54 | targets += realmode.bin |
55 | $(obj)/realmode.bin: $(obj)/realmode.elf $(obj)/realmode.relocs | 55 | $(obj)/realmode.bin: $(obj)/realmode.elf $(obj)/realmode.relocs FORCE |
56 | $(call if_changed,objcopy) | 56 | $(call if_changed,objcopy) |
57 | @: | ||
57 | 58 | ||
58 | quiet_cmd_relocs = RELOCS $@ | 59 | quiet_cmd_relocs = RELOCS $@ |
59 | cmd_relocs = arch/x86/tools/relocs --realmode $< > $@ | 60 | cmd_relocs = arch/x86/tools/relocs --realmode $< > $@ |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 00348980a3a6..e1f220e3ca68 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #ifdef CONFIG_X86_64 | 39 | #ifdef CONFIG_X86_64 |
40 | #define vdso_enabled sysctl_vsyscall32 | 40 | #define vdso_enabled sysctl_vsyscall32 |
41 | #define arch_setup_additional_pages syscall32_setup_pages | 41 | #define arch_setup_additional_pages syscall32_setup_pages |
42 | extern int sysctl_ldt16; | ||
42 | #endif | 43 | #endif |
43 | 44 | ||
44 | /* | 45 | /* |
@@ -249,6 +250,13 @@ static struct ctl_table abi_table2[] = { | |||
249 | .mode = 0644, | 250 | .mode = 0644, |
250 | .proc_handler = proc_dointvec | 251 | .proc_handler = proc_dointvec |
251 | }, | 252 | }, |
253 | { | ||
254 | .procname = "ldt16", | ||
255 | .data = &sysctl_ldt16, | ||
256 | .maxlen = sizeof(int), | ||
257 | .mode = 0644, | ||
258 | .proc_handler = proc_dointvec | ||
259 | }, | ||
252 | {} | 260 | {} |
253 | }; | 261 | }; |
254 | 262 | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c34bfc4bbe7f..f17b29210ac4 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1339,6 +1339,7 @@ xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) | |||
1339 | 1339 | ||
1340 | static struct notifier_block xen_panic_block = { | 1340 | static struct notifier_block xen_panic_block = { |
1341 | .notifier_call= xen_panic_event, | 1341 | .notifier_call= xen_panic_event, |
1342 | .priority = INT_MIN | ||
1342 | }; | 1343 | }; |
1343 | 1344 | ||
1344 | int xen_panic_handler_init(void) | 1345 | int xen_panic_handler_init(void) |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 86e02eabb640..6f6e15d28466 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -2510,6 +2510,95 @@ void __init xen_hvm_init_mmu_ops(void) | |||
2510 | } | 2510 | } |
2511 | #endif | 2511 | #endif |
2512 | 2512 | ||
2513 | #ifdef CONFIG_XEN_PVH | ||
2514 | /* | ||
2515 | * Map foreign gfn (fgfn), to local pfn (lpfn). This for the user | ||
2516 | * space creating new guest on pvh dom0 and needing to map domU pages. | ||
2517 | */ | ||
2518 | static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn, | ||
2519 | unsigned int domid) | ||
2520 | { | ||
2521 | int rc, err = 0; | ||
2522 | xen_pfn_t gpfn = lpfn; | ||
2523 | xen_ulong_t idx = fgfn; | ||
2524 | |||
2525 | struct xen_add_to_physmap_range xatp = { | ||
2526 | .domid = DOMID_SELF, | ||
2527 | .foreign_domid = domid, | ||
2528 | .size = 1, | ||
2529 | .space = XENMAPSPACE_gmfn_foreign, | ||
2530 | }; | ||
2531 | set_xen_guest_handle(xatp.idxs, &idx); | ||
2532 | set_xen_guest_handle(xatp.gpfns, &gpfn); | ||
2533 | set_xen_guest_handle(xatp.errs, &err); | ||
2534 | |||
2535 | rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); | ||
2536 | if (rc < 0) | ||
2537 | return rc; | ||
2538 | return err; | ||
2539 | } | ||
2540 | |||
2541 | static int xlate_remove_from_p2m(unsigned long spfn, int count) | ||
2542 | { | ||
2543 | struct xen_remove_from_physmap xrp; | ||
2544 | int i, rc; | ||
2545 | |||
2546 | for (i = 0; i < count; i++) { | ||
2547 | xrp.domid = DOMID_SELF; | ||
2548 | xrp.gpfn = spfn+i; | ||
2549 | rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); | ||
2550 | if (rc) | ||
2551 | break; | ||
2552 | } | ||
2553 | return rc; | ||
2554 | } | ||
2555 | |||
2556 | struct xlate_remap_data { | ||
2557 | unsigned long fgfn; /* foreign domain's gfn */ | ||
2558 | pgprot_t prot; | ||
2559 | domid_t domid; | ||
2560 | int index; | ||
2561 | struct page **pages; | ||
2562 | }; | ||
2563 | |||
2564 | static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, | ||
2565 | void *data) | ||
2566 | { | ||
2567 | int rc; | ||
2568 | struct xlate_remap_data *remap = data; | ||
2569 | unsigned long pfn = page_to_pfn(remap->pages[remap->index++]); | ||
2570 | pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot)); | ||
2571 | |||
2572 | rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid); | ||
2573 | if (rc) | ||
2574 | return rc; | ||
2575 | native_set_pte(ptep, pteval); | ||
2576 | |||
2577 | return 0; | ||
2578 | } | ||
2579 | |||
2580 | static int xlate_remap_gfn_range(struct vm_area_struct *vma, | ||
2581 | unsigned long addr, unsigned long mfn, | ||
2582 | int nr, pgprot_t prot, unsigned domid, | ||
2583 | struct page **pages) | ||
2584 | { | ||
2585 | int err; | ||
2586 | struct xlate_remap_data pvhdata; | ||
2587 | |||
2588 | BUG_ON(!pages); | ||
2589 | |||
2590 | pvhdata.fgfn = mfn; | ||
2591 | pvhdata.prot = prot; | ||
2592 | pvhdata.domid = domid; | ||
2593 | pvhdata.index = 0; | ||
2594 | pvhdata.pages = pages; | ||
2595 | err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, | ||
2596 | xlate_map_pte_fn, &pvhdata); | ||
2597 | flush_tlb_all(); | ||
2598 | return err; | ||
2599 | } | ||
2600 | #endif | ||
2601 | |||
2513 | #define REMAP_BATCH_SIZE 16 | 2602 | #define REMAP_BATCH_SIZE 16 |
2514 | 2603 | ||
2515 | struct remap_data { | 2604 | struct remap_data { |
@@ -2522,7 +2611,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, | |||
2522 | unsigned long addr, void *data) | 2611 | unsigned long addr, void *data) |
2523 | { | 2612 | { |
2524 | struct remap_data *rmd = data; | 2613 | struct remap_data *rmd = data; |
2525 | pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); | 2614 | pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot)); |
2526 | 2615 | ||
2527 | rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; | 2616 | rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; |
2528 | rmd->mmu_update->val = pte_val_ma(pte); | 2617 | rmd->mmu_update->val = pte_val_ma(pte); |
@@ -2544,13 +2633,18 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | |||
2544 | unsigned long range; | 2633 | unsigned long range; |
2545 | int err = 0; | 2634 | int err = 0; |
2546 | 2635 | ||
2547 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
2548 | return -EINVAL; | ||
2549 | |||
2550 | prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); | ||
2551 | |||
2552 | BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); | 2636 | BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); |
2553 | 2637 | ||
2638 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
2639 | #ifdef CONFIG_XEN_PVH | ||
2640 | /* We need to update the local page tables and the xen HAP */ | ||
2641 | return xlate_remap_gfn_range(vma, addr, mfn, nr, prot, | ||
2642 | domid, pages); | ||
2643 | #else | ||
2644 | return -EINVAL; | ||
2645 | #endif | ||
2646 | } | ||
2647 | |||
2554 | rmd.mfn = mfn; | 2648 | rmd.mfn = mfn; |
2555 | rmd.prot = prot; | 2649 | rmd.prot = prot; |
2556 | 2650 | ||
@@ -2588,6 +2682,25 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, | |||
2588 | if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) | 2682 | if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) |
2589 | return 0; | 2683 | return 0; |
2590 | 2684 | ||
2685 | #ifdef CONFIG_XEN_PVH | ||
2686 | while (numpgs--) { | ||
2687 | /* | ||
2688 | * The mmu has already cleaned up the process mmu | ||
2689 | * resources at this point (lookup_address will return | ||
2690 | * NULL). | ||
2691 | */ | ||
2692 | unsigned long pfn = page_to_pfn(pages[numpgs]); | ||
2693 | |||
2694 | xlate_remove_from_p2m(pfn, 1); | ||
2695 | } | ||
2696 | /* | ||
2697 | * We don't need to flush tlbs because as part of | ||
2698 | * xlate_remove_from_p2m, the hypervisor will do tlb flushes | ||
2699 | * after removing the p2m entries from the EPT/NPT | ||
2700 | */ | ||
2701 | return 0; | ||
2702 | #else | ||
2591 | return -EINVAL; | 2703 | return -EINVAL; |
2704 | #endif | ||
2592 | } | 2705 | } |
2593 | EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); | 2706 | EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 85e5d78c9874..9bb3d82ffec8 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -36,7 +36,7 @@ | |||
36 | * pfn_to_mfn(0xc0000)=0xc0000 | 36 | * pfn_to_mfn(0xc0000)=0xc0000 |
37 | * | 37 | * |
38 | * The benefit of this is, that we can assume for non-RAM regions (think | 38 | * The benefit of this is, that we can assume for non-RAM regions (think |
39 | * PCI BARs, or ACPI spaces), we can create mappings easily b/c we | 39 | * PCI BARs, or ACPI spaces), we can create mappings easily because we |
40 | * get the PFN value to match the MFN. | 40 | * get the PFN value to match the MFN. |
41 | * | 41 | * |
42 | * For this to work efficiently we have one new page p2m_identity and | 42 | * For this to work efficiently we have one new page p2m_identity and |
@@ -60,7 +60,7 @@ | |||
60 | * There is also a digram of the P2M at the end that can help. | 60 | * There is also a digram of the P2M at the end that can help. |
61 | * Imagine your E820 looking as so: | 61 | * Imagine your E820 looking as so: |
62 | * | 62 | * |
63 | * 1GB 2GB | 63 | * 1GB 2GB 4GB |
64 | * /-------------------+---------\/----\ /----------\ /---+-----\ | 64 | * /-------------------+---------\/----\ /----------\ /---+-----\ |
65 | * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | | 65 | * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | |
66 | * \-------------------+---------/\----/ \----------/ \---+-----/ | 66 | * \-------------------+---------/\----/ \----------/ \---+-----/ |
@@ -77,9 +77,8 @@ | |||
77 | * of the PFN and the end PFN (263424 and 512256 respectively). The first step | 77 | * of the PFN and the end PFN (263424 and 512256 respectively). The first step |
78 | * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page | 78 | * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page |
79 | * covers 512^2 of page estate (1GB) and in case the start or end PFN is not | 79 | * covers 512^2 of page estate (1GB) and in case the start or end PFN is not |
80 | * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn | 80 | * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as |
81 | * to end pfn. We reserve_brk top leaf pages if they are missing (means they | 81 | * required to split any existing p2m_mid_missing middle pages. |
82 | * point to p2m_mid_missing). | ||
83 | * | 82 | * |
84 | * With the E820 example above, 263424 is not 1GB aligned so we allocate a | 83 | * With the E820 example above, 263424 is not 1GB aligned so we allocate a |
85 | * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. | 84 | * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. |
@@ -88,7 +87,7 @@ | |||
88 | * Next stage is to determine if we need to do a more granular boundary check | 87 | * Next stage is to determine if we need to do a more granular boundary check |
89 | * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. | 88 | * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. |
90 | * We check if the start pfn and end pfn violate that boundary check, and if | 89 | * We check if the start pfn and end pfn violate that boundary check, and if |
91 | * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer | 90 | * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer |
92 | * granularity of setting which PFNs are missing and which ones are identity. | 91 | * granularity of setting which PFNs are missing and which ones are identity. |
93 | * In our example 263424 and 512256 both fail the check so we reserve_brk two | 92 | * In our example 263424 and 512256 both fail the check so we reserve_brk two |
94 | * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" | 93 | * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" |
@@ -102,9 +101,10 @@ | |||
102 | * | 101 | * |
103 | * The next step is to walk from the start pfn to the end pfn setting | 102 | * The next step is to walk from the start pfn to the end pfn setting |
104 | * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. | 103 | * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. |
105 | * If we find that the middle leaf is pointing to p2m_missing we can swap it | 104 | * If we find that the middle entry is pointing to p2m_missing we can swap it |
106 | * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this | 105 | * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and |
107 | * point we do not need to worry about boundary aligment (so no need to | 106 | * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions). |
107 | * At this point we do not need to worry about boundary aligment (so no need to | ||
108 | * reserve_brk a middle page, figure out which PFNs are "missing" and which | 108 | * reserve_brk a middle page, figure out which PFNs are "missing" and which |
109 | * ones are identity), as that has been done earlier. If we find that the | 109 | * ones are identity), as that has been done earlier. If we find that the |
110 | * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference | 110 | * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference |
@@ -118,6 +118,9 @@ | |||
118 | * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] | 118 | * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] |
119 | * contain the INVALID_P2M_ENTRY value and are considered "missing." | 119 | * contain the INVALID_P2M_ENTRY value and are considered "missing." |
120 | * | 120 | * |
121 | * Finally, the region beyond the end of of the E820 (4 GB in this example) | ||
122 | * is set to be identity (in case there are MMIO regions placed here). | ||
123 | * | ||
121 | * This is what the p2m ends up looking (for the E820 above) with this | 124 | * This is what the p2m ends up looking (for the E820 above) with this |
122 | * fabulous drawing: | 125 | * fabulous drawing: |
123 | * | 126 | * |
@@ -129,21 +132,27 @@ | |||
129 | * |-----| \ | [p2m_identity]+\\ | .... | | 132 | * |-----| \ | [p2m_identity]+\\ | .... | |
130 | * | 2 |--\ \-------------------->| ... | \\ \----------------/ | 133 | * | 2 |--\ \-------------------->| ... | \\ \----------------/ |
131 | * |-----| \ \---------------/ \\ | 134 | * |-----| \ \---------------/ \\ |
132 | * | 3 |\ \ \\ p2m_identity | 135 | * | 3 |-\ \ \\ p2m_identity [1] |
133 | * |-----| \ \-------------------->/---------------\ /-----------------\ | 136 | * |-----| \ \-------------------->/---------------\ /-----------------\ |
134 | * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... | | 137 | * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... | |
135 | * \-----/ / | [p2m_identity]+-->| ..., ~0 | | 138 | * \-----/ | | | [p2m_identity]+-->| ..., ~0 | |
136 | * / /---------------\ | .... | \-----------------/ | 139 | * | | | .... | \-----------------/ |
137 | * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. | | 140 | * | | +-[x], ~0, ~0.. +\ |
138 | * / | IDENTITY[@256]|<----/ \---------------/ | 141 | * | | \---------------/ \ |
139 | * / | ~0, ~0, .... | | 142 | * | | \-> /---------------\ |
140 | * | \---------------/ | 143 | * | V p2m_mid_missing p2m_missing | IDENTITY[@0] | |
141 | * | | 144 | * | /-----------------\ /------------\ | IDENTITY[@256]| |
142 | * p2m_mid_missing p2m_missing | 145 | * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... | |
143 | * /-----------------\ /------------\ | 146 | * | | [p2m_missing] +---->| ..., ~0 | \---------------/ |
144 | * | [p2m_missing] +---->| ~0, ~0, ~0 | | 147 | * | | ... | \------------/ |
145 | * | [p2m_missing] +---->| ..., ~0 | | 148 | * | \-----------------/ |
146 | * \-----------------/ \------------/ | 149 | * | |
150 | * | p2m_mid_identity | ||
151 | * | /-----------------\ | ||
152 | * \-->| [p2m_identity] +---->[1] | ||
153 | * | [p2m_identity] +---->[1] | ||
154 | * | ... | | ||
155 | * \-----------------/ | ||
147 | * | 156 | * |
148 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) | 157 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) |
149 | */ | 158 | */ |
@@ -187,13 +196,15 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | |||
187 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | 196 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); |
188 | 197 | ||
189 | static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); | 198 | static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); |
199 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); | ||
200 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE); | ||
190 | 201 | ||
191 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | 202 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); |
192 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | 203 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); |
193 | 204 | ||
194 | /* We might hit two boundary violations at the start and end, at max each | 205 | /* We might hit two boundary violations at the start and end, at max each |
195 | * boundary violation will require three middle nodes. */ | 206 | * boundary violation will require three middle nodes. */ |
196 | RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); | 207 | RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3); |
197 | 208 | ||
198 | /* When we populate back during bootup, the amount of pages can vary. The | 209 | /* When we populate back during bootup, the amount of pages can vary. The |
199 | * max we have is seen is 395979, but that does not mean it can't be more. | 210 | * max we have is seen is 395979, but that does not mean it can't be more. |
@@ -242,20 +253,20 @@ static void p2m_top_mfn_p_init(unsigned long **top) | |||
242 | top[i] = p2m_mid_missing_mfn; | 253 | top[i] = p2m_mid_missing_mfn; |
243 | } | 254 | } |
244 | 255 | ||
245 | static void p2m_mid_init(unsigned long **mid) | 256 | static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) |
246 | { | 257 | { |
247 | unsigned i; | 258 | unsigned i; |
248 | 259 | ||
249 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | 260 | for (i = 0; i < P2M_MID_PER_PAGE; i++) |
250 | mid[i] = p2m_missing; | 261 | mid[i] = leaf; |
251 | } | 262 | } |
252 | 263 | ||
253 | static void p2m_mid_mfn_init(unsigned long *mid) | 264 | static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) |
254 | { | 265 | { |
255 | unsigned i; | 266 | unsigned i; |
256 | 267 | ||
257 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | 268 | for (i = 0; i < P2M_MID_PER_PAGE; i++) |
258 | mid[i] = virt_to_mfn(p2m_missing); | 269 | mid[i] = virt_to_mfn(leaf); |
259 | } | 270 | } |
260 | 271 | ||
261 | static void p2m_init(unsigned long *p2m) | 272 | static void p2m_init(unsigned long *p2m) |
@@ -286,7 +297,9 @@ void __ref xen_build_mfn_list_list(void) | |||
286 | /* Pre-initialize p2m_top_mfn to be completely missing */ | 297 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
287 | if (p2m_top_mfn == NULL) { | 298 | if (p2m_top_mfn == NULL) { |
288 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | 299 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); |
289 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | 300 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
301 | p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
302 | p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity); | ||
290 | 303 | ||
291 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | 304 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); |
292 | p2m_top_mfn_p_init(p2m_top_mfn_p); | 305 | p2m_top_mfn_p_init(p2m_top_mfn_p); |
@@ -295,7 +308,8 @@ void __ref xen_build_mfn_list_list(void) | |||
295 | p2m_top_mfn_init(p2m_top_mfn); | 308 | p2m_top_mfn_init(p2m_top_mfn); |
296 | } else { | 309 | } else { |
297 | /* Reinitialise, mfn's all change after migration */ | 310 | /* Reinitialise, mfn's all change after migration */ |
298 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | 311 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
312 | p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity); | ||
299 | } | 313 | } |
300 | 314 | ||
301 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | 315 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { |
@@ -327,7 +341,7 @@ void __ref xen_build_mfn_list_list(void) | |||
327 | * it too late. | 341 | * it too late. |
328 | */ | 342 | */ |
329 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | 343 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); |
330 | p2m_mid_mfn_init(mid_mfn_p); | 344 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
331 | 345 | ||
332 | p2m_top_mfn_p[topidx] = mid_mfn_p; | 346 | p2m_top_mfn_p[topidx] = mid_mfn_p; |
333 | } | 347 | } |
@@ -365,16 +379,17 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
365 | 379 | ||
366 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | 380 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); |
367 | p2m_init(p2m_missing); | 381 | p2m_init(p2m_missing); |
382 | p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
383 | p2m_init(p2m_identity); | ||
368 | 384 | ||
369 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | 385 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); |
370 | p2m_mid_init(p2m_mid_missing); | 386 | p2m_mid_init(p2m_mid_missing, p2m_missing); |
387 | p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
388 | p2m_mid_init(p2m_mid_identity, p2m_identity); | ||
371 | 389 | ||
372 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | 390 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); |
373 | p2m_top_init(p2m_top); | 391 | p2m_top_init(p2m_top); |
374 | 392 | ||
375 | p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
376 | p2m_init(p2m_identity); | ||
377 | |||
378 | /* | 393 | /* |
379 | * The domain builder gives us a pre-constructed p2m array in | 394 | * The domain builder gives us a pre-constructed p2m array in |
380 | * mfn_list for all the pages initially given to us, so we just | 395 | * mfn_list for all the pages initially given to us, so we just |
@@ -386,7 +401,7 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
386 | 401 | ||
387 | if (p2m_top[topidx] == p2m_mid_missing) { | 402 | if (p2m_top[topidx] == p2m_mid_missing) { |
388 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | 403 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); |
389 | p2m_mid_init(mid); | 404 | p2m_mid_init(mid, p2m_missing); |
390 | 405 | ||
391 | p2m_top[topidx] = mid; | 406 | p2m_top[topidx] = mid; |
392 | } | 407 | } |
@@ -492,7 +507,7 @@ unsigned long get_phys_to_machine(unsigned long pfn) | |||
492 | unsigned topidx, mididx, idx; | 507 | unsigned topidx, mididx, idx; |
493 | 508 | ||
494 | if (unlikely(pfn >= MAX_P2M_PFN)) | 509 | if (unlikely(pfn >= MAX_P2M_PFN)) |
495 | return INVALID_P2M_ENTRY; | 510 | return IDENTITY_FRAME(pfn); |
496 | 511 | ||
497 | topidx = p2m_top_index(pfn); | 512 | topidx = p2m_top_index(pfn); |
498 | mididx = p2m_mid_index(pfn); | 513 | mididx = p2m_mid_index(pfn); |
@@ -545,7 +560,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
545 | if (!mid) | 560 | if (!mid) |
546 | return false; | 561 | return false; |
547 | 562 | ||
548 | p2m_mid_init(mid); | 563 | p2m_mid_init(mid, p2m_missing); |
549 | 564 | ||
550 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | 565 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) |
551 | free_p2m_page(mid); | 566 | free_p2m_page(mid); |
@@ -565,7 +580,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
565 | if (!mid_mfn) | 580 | if (!mid_mfn) |
566 | return false; | 581 | return false; |
567 | 582 | ||
568 | p2m_mid_mfn_init(mid_mfn); | 583 | p2m_mid_mfn_init(mid_mfn, p2m_missing); |
569 | 584 | ||
570 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | 585 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); |
571 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | 586 | mid_mfn_mfn = virt_to_mfn(mid_mfn); |
@@ -596,7 +611,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
596 | return true; | 611 | return true; |
597 | } | 612 | } |
598 | 613 | ||
599 | static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary) | 614 | static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) |
600 | { | 615 | { |
601 | unsigned topidx, mididx, idx; | 616 | unsigned topidx, mididx, idx; |
602 | unsigned long *p2m; | 617 | unsigned long *p2m; |
@@ -638,7 +653,7 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary | |||
638 | return true; | 653 | return true; |
639 | } | 654 | } |
640 | 655 | ||
641 | static bool __init early_alloc_p2m(unsigned long pfn) | 656 | static bool __init early_alloc_p2m_middle(unsigned long pfn) |
642 | { | 657 | { |
643 | unsigned topidx = p2m_top_index(pfn); | 658 | unsigned topidx = p2m_top_index(pfn); |
644 | unsigned long *mid_mfn_p; | 659 | unsigned long *mid_mfn_p; |
@@ -649,7 +664,7 @@ static bool __init early_alloc_p2m(unsigned long pfn) | |||
649 | if (mid == p2m_mid_missing) { | 664 | if (mid == p2m_mid_missing) { |
650 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | 665 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); |
651 | 666 | ||
652 | p2m_mid_init(mid); | 667 | p2m_mid_init(mid, p2m_missing); |
653 | 668 | ||
654 | p2m_top[topidx] = mid; | 669 | p2m_top[topidx] = mid; |
655 | 670 | ||
@@ -658,12 +673,12 @@ static bool __init early_alloc_p2m(unsigned long pfn) | |||
658 | /* And the save/restore P2M tables.. */ | 673 | /* And the save/restore P2M tables.. */ |
659 | if (mid_mfn_p == p2m_mid_missing_mfn) { | 674 | if (mid_mfn_p == p2m_mid_missing_mfn) { |
660 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | 675 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); |
661 | p2m_mid_mfn_init(mid_mfn_p); | 676 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
662 | 677 | ||
663 | p2m_top_mfn_p[topidx] = mid_mfn_p; | 678 | p2m_top_mfn_p[topidx] = mid_mfn_p; |
664 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | 679 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); |
665 | /* Note: we don't set mid_mfn_p[midix] here, | 680 | /* Note: we don't set mid_mfn_p[midix] here, |
666 | * look in early_alloc_p2m_middle */ | 681 | * look in early_alloc_p2m() */ |
667 | } | 682 | } |
668 | return true; | 683 | return true; |
669 | } | 684 | } |
@@ -739,7 +754,7 @@ found: | |||
739 | 754 | ||
740 | /* This shouldn't happen */ | 755 | /* This shouldn't happen */ |
741 | if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) | 756 | if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) |
742 | early_alloc_p2m(set_pfn); | 757 | early_alloc_p2m_middle(set_pfn); |
743 | 758 | ||
744 | if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) | 759 | if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) |
745 | return false; | 760 | return false; |
@@ -754,13 +769,13 @@ found: | |||
754 | bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 769 | bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
755 | { | 770 | { |
756 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | 771 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
757 | if (!early_alloc_p2m(pfn)) | 772 | if (!early_alloc_p2m_middle(pfn)) |
758 | return false; | 773 | return false; |
759 | 774 | ||
760 | if (early_can_reuse_p2m_middle(pfn, mfn)) | 775 | if (early_can_reuse_p2m_middle(pfn, mfn)) |
761 | return __set_phys_to_machine(pfn, mfn); | 776 | return __set_phys_to_machine(pfn, mfn); |
762 | 777 | ||
763 | if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) | 778 | if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) |
764 | return false; | 779 | return false; |
765 | 780 | ||
766 | if (!__set_phys_to_machine(pfn, mfn)) | 781 | if (!__set_phys_to_machine(pfn, mfn)) |
@@ -769,12 +784,30 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
769 | 784 | ||
770 | return true; | 785 | return true; |
771 | } | 786 | } |
787 | |||
788 | static void __init early_split_p2m(unsigned long pfn) | ||
789 | { | ||
790 | unsigned long mididx, idx; | ||
791 | |||
792 | mididx = p2m_mid_index(pfn); | ||
793 | idx = p2m_index(pfn); | ||
794 | |||
795 | /* | ||
796 | * Allocate new middle and leaf pages if this pfn lies in the | ||
797 | * middle of one. | ||
798 | */ | ||
799 | if (mididx || idx) | ||
800 | early_alloc_p2m_middle(pfn); | ||
801 | if (idx) | ||
802 | early_alloc_p2m(pfn, false); | ||
803 | } | ||
804 | |||
772 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, | 805 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
773 | unsigned long pfn_e) | 806 | unsigned long pfn_e) |
774 | { | 807 | { |
775 | unsigned long pfn; | 808 | unsigned long pfn; |
776 | 809 | ||
777 | if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN)) | 810 | if (unlikely(pfn_s >= MAX_P2M_PFN)) |
778 | return 0; | 811 | return 0; |
779 | 812 | ||
780 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | 813 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) |
@@ -783,19 +816,30 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, | |||
783 | if (pfn_s > pfn_e) | 816 | if (pfn_s > pfn_e) |
784 | return 0; | 817 | return 0; |
785 | 818 | ||
786 | for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); | 819 | if (pfn_e > MAX_P2M_PFN) |
787 | pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); | 820 | pfn_e = MAX_P2M_PFN; |
788 | pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
789 | { | ||
790 | WARN_ON(!early_alloc_p2m(pfn)); | ||
791 | } | ||
792 | 821 | ||
793 | early_alloc_p2m_middle(pfn_s, true); | 822 | early_split_p2m(pfn_s); |
794 | early_alloc_p2m_middle(pfn_e, true); | 823 | early_split_p2m(pfn_e); |
824 | |||
825 | for (pfn = pfn_s; pfn < pfn_e;) { | ||
826 | unsigned topidx = p2m_top_index(pfn); | ||
827 | unsigned mididx = p2m_mid_index(pfn); | ||
795 | 828 | ||
796 | for (pfn = pfn_s; pfn < pfn_e; pfn++) | ||
797 | if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) | 829 | if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) |
798 | break; | 830 | break; |
831 | pfn++; | ||
832 | |||
833 | /* | ||
834 | * If the PFN was set to a middle or leaf identity | ||
835 | * page the remainder must also be identity, so skip | ||
836 | * ahead to the next middle or leaf entry. | ||
837 | */ | ||
838 | if (p2m_top[topidx] == p2m_mid_identity) | ||
839 | pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
840 | else if (p2m_top[topidx][mididx] == p2m_identity) | ||
841 | pfn = ALIGN(pfn, P2M_PER_PAGE); | ||
842 | } | ||
799 | 843 | ||
800 | if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), | 844 | if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), |
801 | "Identity mapping failed. We are %ld short of 1-1 mappings!\n", | 845 | "Identity mapping failed. We are %ld short of 1-1 mappings!\n", |
@@ -825,8 +869,22 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
825 | 869 | ||
826 | /* For sparse holes were the p2m leaf has real PFN along with | 870 | /* For sparse holes were the p2m leaf has real PFN along with |
827 | * PCI holes, stick in the PFN as the MFN value. | 871 | * PCI holes, stick in the PFN as the MFN value. |
872 | * | ||
873 | * set_phys_range_identity() will have allocated new middle | ||
874 | * and leaf pages as required so an existing p2m_mid_missing | ||
875 | * or p2m_missing mean that whole range will be identity so | ||
876 | * these can be switched to p2m_mid_identity or p2m_identity. | ||
828 | */ | 877 | */ |
829 | if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { | 878 | if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { |
879 | if (p2m_top[topidx] == p2m_mid_identity) | ||
880 | return true; | ||
881 | |||
882 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
883 | WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing, | ||
884 | p2m_mid_identity) != p2m_mid_missing); | ||
885 | return true; | ||
886 | } | ||
887 | |||
830 | if (p2m_top[topidx][mididx] == p2m_identity) | 888 | if (p2m_top[topidx][mididx] == p2m_identity) |
831 | return true; | 889 | return true; |
832 | 890 | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 0982233b9b84..210426a26cc0 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -89,10 +89,10 @@ static void __init xen_add_extra_mem(u64 start, u64 size) | |||
89 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { | 89 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { |
90 | unsigned long mfn = pfn_to_mfn(pfn); | 90 | unsigned long mfn = pfn_to_mfn(pfn); |
91 | 91 | ||
92 | if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) | 92 | if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) |
93 | continue; | 93 | continue; |
94 | WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", | 94 | WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", |
95 | pfn, mfn); | 95 | pfn, mfn); |
96 | 96 | ||
97 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 97 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
98 | } | 98 | } |
@@ -469,6 +469,15 @@ char * __init xen_memory_setup(void) | |||
469 | } | 469 | } |
470 | 470 | ||
471 | /* | 471 | /* |
472 | * Set the rest as identity mapped, in case PCI BARs are | ||
473 | * located here. | ||
474 | * | ||
475 | * PFNs above MAX_P2M_PFN are considered identity mapped as | ||
476 | * well. | ||
477 | */ | ||
478 | set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul); | ||
479 | |||
480 | /* | ||
472 | * In domU, the ISA region is normal, usable memory, but we | 481 | * In domU, the ISA region is normal, usable memory, but we |
473 | * reserve ISA memory anyway because too many things poke | 482 | * reserve ISA memory anyway because too many things poke |
474 | * about in there. | 483 | * about in there. |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 45329c8c226e..c4df9dbd63b7 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -12,8 +12,10 @@ | |||
12 | #include "xen-ops.h" | 12 | #include "xen-ops.h" |
13 | #include "mmu.h" | 13 | #include "mmu.h" |
14 | 14 | ||
15 | void xen_arch_pre_suspend(void) | 15 | static void xen_pv_pre_suspend(void) |
16 | { | 16 | { |
17 | xen_mm_pin_all(); | ||
18 | |||
17 | xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); | 19 | xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); |
18 | xen_start_info->console.domU.mfn = | 20 | xen_start_info->console.domU.mfn = |
19 | mfn_to_pfn(xen_start_info->console.domU.mfn); | 21 | mfn_to_pfn(xen_start_info->console.domU.mfn); |
@@ -26,7 +28,7 @@ void xen_arch_pre_suspend(void) | |||
26 | BUG(); | 28 | BUG(); |
27 | } | 29 | } |
28 | 30 | ||
29 | void xen_arch_hvm_post_suspend(int suspend_cancelled) | 31 | static void xen_hvm_post_suspend(int suspend_cancelled) |
30 | { | 32 | { |
31 | #ifdef CONFIG_XEN_PVHVM | 33 | #ifdef CONFIG_XEN_PVHVM |
32 | int cpu; | 34 | int cpu; |
@@ -41,7 +43,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) | |||
41 | #endif | 43 | #endif |
42 | } | 44 | } |
43 | 45 | ||
44 | void xen_arch_post_suspend(int suspend_cancelled) | 46 | static void xen_pv_post_suspend(int suspend_cancelled) |
45 | { | 47 | { |
46 | xen_build_mfn_list_list(); | 48 | xen_build_mfn_list_list(); |
47 | 49 | ||
@@ -60,6 +62,21 @@ void xen_arch_post_suspend(int suspend_cancelled) | |||
60 | xen_vcpu_restore(); | 62 | xen_vcpu_restore(); |
61 | } | 63 | } |
62 | 64 | ||
65 | xen_mm_unpin_all(); | ||
66 | } | ||
67 | |||
68 | void xen_arch_pre_suspend(void) | ||
69 | { | ||
70 | if (xen_pv_domain()) | ||
71 | xen_pv_pre_suspend(); | ||
72 | } | ||
73 | |||
74 | void xen_arch_post_suspend(int cancelled) | ||
75 | { | ||
76 | if (xen_pv_domain()) | ||
77 | xen_pv_post_suspend(cancelled); | ||
78 | else | ||
79 | xen_hvm_post_suspend(cancelled); | ||
63 | } | 80 | } |
64 | 81 | ||
65 | static void xen_vcpu_notify_restore(void *data) | 82 | static void xen_vcpu_notify_restore(void *data) |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 1cb6f4c37300..c834d4b231f0 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -31,6 +31,8 @@ void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | |||
31 | void xen_reserve_top(void); | 31 | void xen_reserve_top(void); |
32 | extern unsigned long xen_max_p2m_pfn; | 32 | extern unsigned long xen_max_p2m_pfn; |
33 | 33 | ||
34 | void xen_mm_pin_all(void); | ||
35 | void xen_mm_unpin_all(void); | ||
34 | void xen_set_pat(u64); | 36 | void xen_set_pat(u64); |
35 | 37 | ||
36 | char * __init xen_memory_setup(void); | 38 | char * __init xen_memory_setup(void); |