diff options
Diffstat (limited to 'arch/x86')
89 files changed, 2325 insertions, 1015 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fcf12af07427..d1430ef6b4f9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -28,6 +28,7 @@ config X86 | |||
28 | select HAVE_KPROBES | 28 | select HAVE_KPROBES |
29 | select ARCH_WANT_OPTIONAL_GPIOLIB | 29 | select ARCH_WANT_OPTIONAL_GPIOLIB |
30 | select ARCH_WANT_FRAME_POINTERS | 30 | select ARCH_WANT_FRAME_POINTERS |
31 | select HAVE_DMA_ATTRS | ||
31 | select HAVE_KRETPROBES | 32 | select HAVE_KRETPROBES |
32 | select HAVE_FTRACE_MCOUNT_RECORD | 33 | select HAVE_FTRACE_MCOUNT_RECORD |
33 | select HAVE_DYNAMIC_FTRACE | 34 | select HAVE_DYNAMIC_FTRACE |
@@ -47,6 +48,7 @@ config X86 | |||
47 | select HAVE_KERNEL_GZIP | 48 | select HAVE_KERNEL_GZIP |
48 | select HAVE_KERNEL_BZIP2 | 49 | select HAVE_KERNEL_BZIP2 |
49 | select HAVE_KERNEL_LZMA | 50 | select HAVE_KERNEL_LZMA |
51 | select HAVE_ARCH_KMEMCHECK | ||
50 | 52 | ||
51 | config OUTPUT_FORMAT | 53 | config OUTPUT_FORMAT |
52 | string | 54 | string |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index edbd0ca62067..1b68659c41b4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -81,6 +81,11 @@ ifdef CONFIG_CC_STACKPROTECTOR | |||
81 | endif | 81 | endif |
82 | endif | 82 | endif |
83 | 83 | ||
84 | # Don't unroll struct assignments with kmemcheck enabled | ||
85 | ifeq ($(CONFIG_KMEMCHECK),y) | ||
86 | KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) | ||
87 | endif | ||
88 | |||
84 | # Stackpointer is addressed different for 32 bit and 64 bit x86 | 89 | # Stackpointer is addressed different for 32 bit and 64 bit x86 |
85 | sp-$(CONFIG_X86_32) := esp | 90 | sp-$(CONFIG_X86_32) := esp |
86 | sp-$(CONFIG_X86_64) := rsp | 91 | sp-$(CONFIG_X86_64) := rsp |
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 8d16ada25048..ec749c2bfdd7 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -70,6 +70,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ | |||
70 | $(call cc-option, -mpreferred-stack-boundary=2) | 70 | $(call cc-option, -mpreferred-stack-boundary=2) |
71 | KBUILD_CFLAGS += $(call cc-option, -m32) | 71 | KBUILD_CFLAGS += $(call cc-option, -m32) |
72 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 72 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
73 | GCOV_PROFILE := n | ||
73 | 74 | ||
74 | $(obj)/bzImage: asflags-y := $(SVGA_MODE) | 75 | $(obj)/bzImage: asflags-y := $(SVGA_MODE) |
75 | 76 | ||
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S index 507793739ea5..1dfbf64e52a2 100644 --- a/arch/x86/boot/bioscall.S +++ b/arch/x86/boot/bioscall.S | |||
@@ -13,7 +13,7 @@ | |||
13 | * touching registers they shouldn't be. | 13 | * touching registers they shouldn't be. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | .code16 | 16 | .code16gcc |
17 | .text | 17 | .text |
18 | .globl intcall | 18 | .globl intcall |
19 | .type intcall, @function | 19 | .type intcall, @function |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 49c8a4c37d7c..e2ff504b4ddc 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -15,6 +15,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding) | |||
15 | KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) | 15 | KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) |
16 | 16 | ||
17 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 17 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
18 | GCOV_PROFILE := n | ||
18 | 19 | ||
19 | LDFLAGS := -m elf_$(UTS_MACHINE) | 20 | LDFLAGS := -m elf_$(UTS_MACHINE) |
20 | LDFLAGS_vmlinux := -T | 21 | LDFLAGS_vmlinux := -T |
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 262e02820049..bdf96f119f06 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h | |||
@@ -29,9 +29,11 @@ extern void amd_iommu_detect(void); | |||
29 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | 29 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); |
30 | extern void amd_iommu_flush_all_domains(void); | 30 | extern void amd_iommu_flush_all_domains(void); |
31 | extern void amd_iommu_flush_all_devices(void); | 31 | extern void amd_iommu_flush_all_devices(void); |
32 | extern void amd_iommu_shutdown(void); | ||
32 | #else | 33 | #else |
33 | static inline int amd_iommu_init(void) { return -ENODEV; } | 34 | static inline int amd_iommu_init(void) { return -ENODEV; } |
34 | static inline void amd_iommu_detect(void) { } | 35 | static inline void amd_iommu_detect(void) { } |
36 | static inline void amd_iommu_shutdown(void) { } | ||
35 | #endif | 37 | #endif |
36 | 38 | ||
37 | #endif /* _ASM_X86_AMD_IOMMU_H */ | 39 | #endif /* _ASM_X86_AMD_IOMMU_H */ |
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index 8cb9c814e120..2503d4e64c2a 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h | |||
@@ -257,7 +257,7 @@ typedef struct { | |||
257 | 257 | ||
258 | /** | 258 | /** |
259 | * atomic64_read - read atomic64 variable | 259 | * atomic64_read - read atomic64 variable |
260 | * @v: pointer of type atomic64_t | 260 | * @ptr: pointer of type atomic64_t |
261 | * | 261 | * |
262 | * Atomically reads the value of @v. | 262 | * Atomically reads the value of @v. |
263 | * Doesn't imply a read memory barrier. | 263 | * Doesn't imply a read memory barrier. |
@@ -294,7 +294,6 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, | |||
294 | * atomic64_xchg - xchg atomic64 variable | 294 | * atomic64_xchg - xchg atomic64 variable |
295 | * @ptr: pointer to type atomic64_t | 295 | * @ptr: pointer to type atomic64_t |
296 | * @new_val: value to assign | 296 | * @new_val: value to assign |
297 | * @old_val: old value that was there | ||
298 | * | 297 | * |
299 | * Atomically xchgs the value of @ptr to @new_val and returns | 298 | * Atomically xchgs the value of @ptr to @new_val and returns |
300 | * the old value. | 299 | * the old value. |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index c45f415ce315..c993e9e0fed4 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _ASM_X86_DESC_H | 1 | #ifndef _ASM_X86_DESC_H |
2 | #define _ASM_X86_DESC_H | 2 | #define _ASM_X86_DESC_H |
3 | 3 | ||
4 | #ifndef __ASSEMBLY__ | ||
5 | #include <asm/desc_defs.h> | 4 | #include <asm/desc_defs.h> |
6 | #include <asm/ldt.h> | 5 | #include <asm/ldt.h> |
7 | #include <asm/mmu.h> | 6 | #include <asm/mmu.h> |
@@ -380,29 +379,4 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) | |||
380 | _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); | 379 | _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); |
381 | } | 380 | } |
382 | 381 | ||
383 | #else | ||
384 | /* | ||
385 | * GET_DESC_BASE reads the descriptor base of the specified segment. | ||
386 | * | ||
387 | * Args: | ||
388 | * idx - descriptor index | ||
389 | * gdt - GDT pointer | ||
390 | * base - 32bit register to which the base will be written | ||
391 | * lo_w - lo word of the "base" register | ||
392 | * lo_b - lo byte of the "base" register | ||
393 | * hi_b - hi byte of the low word of the "base" register | ||
394 | * | ||
395 | * Example: | ||
396 | * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | ||
397 | * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. | ||
398 | */ | ||
399 | #define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ | ||
400 | movb idx * 8 + 4(gdt), lo_b; \ | ||
401 | movb idx * 8 + 7(gdt), hi_b; \ | ||
402 | shll $16, base; \ | ||
403 | movw idx * 8 + 2(gdt), lo_w; | ||
404 | |||
405 | |||
406 | #endif /* __ASSEMBLY__ */ | ||
407 | |||
408 | #endif /* _ASM_X86_DESC_H */ | 382 | #endif /* _ASM_X86_DESC_H */ |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index f82fdc412c64..1c3f9435f1c9 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -6,6 +6,7 @@ | |||
6 | * Documentation/DMA-API.txt for documentation. | 6 | * Documentation/DMA-API.txt for documentation. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/kmemcheck.h> | ||
9 | #include <linux/scatterlist.h> | 10 | #include <linux/scatterlist.h> |
10 | #include <linux/dma-debug.h> | 11 | #include <linux/dma-debug.h> |
11 | #include <linux/dma-attrs.h> | 12 | #include <linux/dma-attrs.h> |
@@ -32,6 +33,8 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
32 | #endif | 33 | #endif |
33 | } | 34 | } |
34 | 35 | ||
36 | #include <asm-generic/dma-mapping-common.h> | ||
37 | |||
35 | /* Make sure we keep the same behaviour */ | 38 | /* Make sure we keep the same behaviour */ |
36 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | 39 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) |
37 | { | 40 | { |
@@ -52,171 +55,6 @@ extern int dma_set_mask(struct device *dev, u64 mask); | |||
52 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 55 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
53 | dma_addr_t *dma_addr, gfp_t flag); | 56 | dma_addr_t *dma_addr, gfp_t flag); |
54 | 57 | ||
55 | static inline dma_addr_t | ||
56 | dma_map_single(struct device *hwdev, void *ptr, size_t size, | ||
57 | enum dma_data_direction dir) | ||
58 | { | ||
59 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
60 | dma_addr_t addr; | ||
61 | |||
62 | BUG_ON(!valid_dma_direction(dir)); | ||
63 | addr = ops->map_page(hwdev, virt_to_page(ptr), | ||
64 | (unsigned long)ptr & ~PAGE_MASK, size, | ||
65 | dir, NULL); | ||
66 | debug_dma_map_page(hwdev, virt_to_page(ptr), | ||
67 | (unsigned long)ptr & ~PAGE_MASK, size, | ||
68 | dir, addr, true); | ||
69 | return addr; | ||
70 | } | ||
71 | |||
72 | static inline void | ||
73 | dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, | ||
74 | enum dma_data_direction dir) | ||
75 | { | ||
76 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
77 | |||
78 | BUG_ON(!valid_dma_direction(dir)); | ||
79 | if (ops->unmap_page) | ||
80 | ops->unmap_page(dev, addr, size, dir, NULL); | ||
81 | debug_dma_unmap_page(dev, addr, size, dir, true); | ||
82 | } | ||
83 | |||
84 | static inline int | ||
85 | dma_map_sg(struct device *hwdev, struct scatterlist *sg, | ||
86 | int nents, enum dma_data_direction dir) | ||
87 | { | ||
88 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
89 | int ents; | ||
90 | |||
91 | BUG_ON(!valid_dma_direction(dir)); | ||
92 | ents = ops->map_sg(hwdev, sg, nents, dir, NULL); | ||
93 | debug_dma_map_sg(hwdev, sg, nents, ents, dir); | ||
94 | |||
95 | return ents; | ||
96 | } | ||
97 | |||
98 | static inline void | ||
99 | dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, | ||
100 | enum dma_data_direction dir) | ||
101 | { | ||
102 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
103 | |||
104 | BUG_ON(!valid_dma_direction(dir)); | ||
105 | debug_dma_unmap_sg(hwdev, sg, nents, dir); | ||
106 | if (ops->unmap_sg) | ||
107 | ops->unmap_sg(hwdev, sg, nents, dir, NULL); | ||
108 | } | ||
109 | |||
110 | static inline void | ||
111 | dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, | ||
112 | size_t size, enum dma_data_direction dir) | ||
113 | { | ||
114 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
115 | |||
116 | BUG_ON(!valid_dma_direction(dir)); | ||
117 | if (ops->sync_single_for_cpu) | ||
118 | ops->sync_single_for_cpu(hwdev, dma_handle, size, dir); | ||
119 | debug_dma_sync_single_for_cpu(hwdev, dma_handle, size, dir); | ||
120 | flush_write_buffers(); | ||
121 | } | ||
122 | |||
123 | static inline void | ||
124 | dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, | ||
125 | size_t size, enum dma_data_direction dir) | ||
126 | { | ||
127 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
128 | |||
129 | BUG_ON(!valid_dma_direction(dir)); | ||
130 | if (ops->sync_single_for_device) | ||
131 | ops->sync_single_for_device(hwdev, dma_handle, size, dir); | ||
132 | debug_dma_sync_single_for_device(hwdev, dma_handle, size, dir); | ||
133 | flush_write_buffers(); | ||
134 | } | ||
135 | |||
136 | static inline void | ||
137 | dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, | ||
138 | unsigned long offset, size_t size, | ||
139 | enum dma_data_direction dir) | ||
140 | { | ||
141 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
142 | |||
143 | BUG_ON(!valid_dma_direction(dir)); | ||
144 | if (ops->sync_single_range_for_cpu) | ||
145 | ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, | ||
146 | size, dir); | ||
147 | debug_dma_sync_single_range_for_cpu(hwdev, dma_handle, | ||
148 | offset, size, dir); | ||
149 | flush_write_buffers(); | ||
150 | } | ||
151 | |||
152 | static inline void | ||
153 | dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, | ||
154 | unsigned long offset, size_t size, | ||
155 | enum dma_data_direction dir) | ||
156 | { | ||
157 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
158 | |||
159 | BUG_ON(!valid_dma_direction(dir)); | ||
160 | if (ops->sync_single_range_for_device) | ||
161 | ops->sync_single_range_for_device(hwdev, dma_handle, | ||
162 | offset, size, dir); | ||
163 | debug_dma_sync_single_range_for_device(hwdev, dma_handle, | ||
164 | offset, size, dir); | ||
165 | flush_write_buffers(); | ||
166 | } | ||
167 | |||
168 | static inline void | ||
169 | dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, | ||
170 | int nelems, enum dma_data_direction dir) | ||
171 | { | ||
172 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
173 | |||
174 | BUG_ON(!valid_dma_direction(dir)); | ||
175 | if (ops->sync_sg_for_cpu) | ||
176 | ops->sync_sg_for_cpu(hwdev, sg, nelems, dir); | ||
177 | debug_dma_sync_sg_for_cpu(hwdev, sg, nelems, dir); | ||
178 | flush_write_buffers(); | ||
179 | } | ||
180 | |||
181 | static inline void | ||
182 | dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, | ||
183 | int nelems, enum dma_data_direction dir) | ||
184 | { | ||
185 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
186 | |||
187 | BUG_ON(!valid_dma_direction(dir)); | ||
188 | if (ops->sync_sg_for_device) | ||
189 | ops->sync_sg_for_device(hwdev, sg, nelems, dir); | ||
190 | debug_dma_sync_sg_for_device(hwdev, sg, nelems, dir); | ||
191 | |||
192 | flush_write_buffers(); | ||
193 | } | ||
194 | |||
195 | static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, | ||
196 | size_t offset, size_t size, | ||
197 | enum dma_data_direction dir) | ||
198 | { | ||
199 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
200 | dma_addr_t addr; | ||
201 | |||
202 | BUG_ON(!valid_dma_direction(dir)); | ||
203 | addr = ops->map_page(dev, page, offset, size, dir, NULL); | ||
204 | debug_dma_map_page(dev, page, offset, size, dir, addr, false); | ||
205 | |||
206 | return addr; | ||
207 | } | ||
208 | |||
209 | static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, | ||
210 | size_t size, enum dma_data_direction dir) | ||
211 | { | ||
212 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
213 | |||
214 | BUG_ON(!valid_dma_direction(dir)); | ||
215 | if (ops->unmap_page) | ||
216 | ops->unmap_page(dev, addr, size, dir, NULL); | ||
217 | debug_dma_unmap_page(dev, addr, size, dir, false); | ||
218 | } | ||
219 | |||
220 | static inline void | 58 | static inline void |
221 | dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 59 | dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
222 | enum dma_data_direction dir) | 60 | enum dma_data_direction dir) |
diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h index 5759c165a5cf..9e00a731a7fb 100644 --- a/arch/x86/include/asm/kmap_types.h +++ b/arch/x86/include/asm/kmap_types.h | |||
@@ -2,28 +2,11 @@ | |||
2 | #define _ASM_X86_KMAP_TYPES_H | 2 | #define _ASM_X86_KMAP_TYPES_H |
3 | 3 | ||
4 | #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) | 4 | #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) |
5 | # define D(n) __KM_FENCE_##n , | 5 | #define __WITH_KM_FENCE |
6 | #else | ||
7 | # define D(n) | ||
8 | #endif | 6 | #endif |
9 | 7 | ||
10 | enum km_type { | 8 | #include <asm-generic/kmap_types.h> |
11 | D(0) KM_BOUNCE_READ, | ||
12 | D(1) KM_SKB_SUNRPC_DATA, | ||
13 | D(2) KM_SKB_DATA_SOFTIRQ, | ||
14 | D(3) KM_USER0, | ||
15 | D(4) KM_USER1, | ||
16 | D(5) KM_BIO_SRC_IRQ, | ||
17 | D(6) KM_BIO_DST_IRQ, | ||
18 | D(7) KM_PTE0, | ||
19 | D(8) KM_PTE1, | ||
20 | D(9) KM_IRQ0, | ||
21 | D(10) KM_IRQ1, | ||
22 | D(11) KM_SOFTIRQ0, | ||
23 | D(12) KM_SOFTIRQ1, | ||
24 | D(13) KM_TYPE_NR | ||
25 | }; | ||
26 | 9 | ||
27 | #undef D | 10 | #undef __WITH_KM_FENCE |
28 | 11 | ||
29 | #endif /* _ASM_X86_KMAP_TYPES_H */ | 12 | #endif /* _ASM_X86_KMAP_TYPES_H */ |
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h new file mode 100644 index 000000000000..ed01518f297e --- /dev/null +++ b/arch/x86/include/asm/kmemcheck.h | |||
@@ -0,0 +1,42 @@ | |||
1 | #ifndef ASM_X86_KMEMCHECK_H | ||
2 | #define ASM_X86_KMEMCHECK_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <asm/ptrace.h> | ||
6 | |||
7 | #ifdef CONFIG_KMEMCHECK | ||
8 | bool kmemcheck_active(struct pt_regs *regs); | ||
9 | |||
10 | void kmemcheck_show(struct pt_regs *regs); | ||
11 | void kmemcheck_hide(struct pt_regs *regs); | ||
12 | |||
13 | bool kmemcheck_fault(struct pt_regs *regs, | ||
14 | unsigned long address, unsigned long error_code); | ||
15 | bool kmemcheck_trap(struct pt_regs *regs); | ||
16 | #else | ||
17 | static inline bool kmemcheck_active(struct pt_regs *regs) | ||
18 | { | ||
19 | return false; | ||
20 | } | ||
21 | |||
22 | static inline void kmemcheck_show(struct pt_regs *regs) | ||
23 | { | ||
24 | } | ||
25 | |||
26 | static inline void kmemcheck_hide(struct pt_regs *regs) | ||
27 | { | ||
28 | } | ||
29 | |||
30 | static inline bool kmemcheck_fault(struct pt_regs *regs, | ||
31 | unsigned long address, unsigned long error_code) | ||
32 | { | ||
33 | return false; | ||
34 | } | ||
35 | |||
36 | static inline bool kmemcheck_trap(struct pt_regs *regs) | ||
37 | { | ||
38 | return false; | ||
39 | } | ||
40 | #endif /* CONFIG_KMEMCHECK */ | ||
41 | |||
42 | #endif | ||
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 540a466e50f5..5cdd8d100ec9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -102,15 +102,39 @@ struct mce_log { | |||
102 | 102 | ||
103 | #ifdef __KERNEL__ | 103 | #ifdef __KERNEL__ |
104 | 104 | ||
105 | #include <linux/percpu.h> | ||
106 | #include <linux/init.h> | ||
107 | #include <asm/atomic.h> | ||
108 | |||
105 | extern int mce_disabled; | 109 | extern int mce_disabled; |
110 | extern int mce_p5_enabled; | ||
106 | 111 | ||
107 | #include <asm/atomic.h> | 112 | #ifdef CONFIG_X86_MCE |
108 | #include <linux/percpu.h> | 113 | void mcheck_init(struct cpuinfo_x86 *c); |
114 | #else | ||
115 | static inline void mcheck_init(struct cpuinfo_x86 *c) {} | ||
116 | #endif | ||
117 | |||
118 | #ifdef CONFIG_X86_OLD_MCE | ||
119 | extern int nr_mce_banks; | ||
120 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
121 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
122 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
123 | #endif | ||
124 | |||
125 | #ifdef CONFIG_X86_ANCIENT_MCE | ||
126 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
127 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
128 | static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } | ||
129 | #else | ||
130 | static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} | ||
131 | static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | ||
132 | static inline void enable_p5_mce(void) {} | ||
133 | #endif | ||
109 | 134 | ||
110 | void mce_setup(struct mce *m); | 135 | void mce_setup(struct mce *m); |
111 | void mce_log(struct mce *m); | 136 | void mce_log(struct mce *m); |
112 | DECLARE_PER_CPU(struct sys_device, mce_dev); | 137 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
113 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
114 | 138 | ||
115 | /* | 139 | /* |
116 | * To support more than 128 would need to escape the predefined | 140 | * To support more than 128 would need to escape the predefined |
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c); | |||
145 | DECLARE_PER_CPU(unsigned, mce_exception_count); | 169 | DECLARE_PER_CPU(unsigned, mce_exception_count); |
146 | DECLARE_PER_CPU(unsigned, mce_poll_count); | 170 | DECLARE_PER_CPU(unsigned, mce_poll_count); |
147 | 171 | ||
148 | void mce_log_therm_throt_event(__u64 status); | ||
149 | |||
150 | extern atomic_t mce_entry; | 172 | extern atomic_t mce_entry; |
151 | 173 | ||
152 | void do_machine_check(struct pt_regs *, long); | ||
153 | |||
154 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); | 174 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); |
155 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); | 175 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); |
156 | 176 | ||
@@ -167,13 +187,32 @@ void mce_notify_process(void); | |||
167 | DECLARE_PER_CPU(struct mce, injectm); | 187 | DECLARE_PER_CPU(struct mce, injectm); |
168 | extern struct file_operations mce_chrdev_ops; | 188 | extern struct file_operations mce_chrdev_ops; |
169 | 189 | ||
170 | #ifdef CONFIG_X86_MCE | 190 | /* |
171 | void mcheck_init(struct cpuinfo_x86 *c); | 191 | * Exception handler |
172 | #else | 192 | */ |
173 | #define mcheck_init(c) do { } while (0) | 193 | |
174 | #endif | 194 | /* Call the installed machine check handler for this CPU setup. */ |
195 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
196 | void do_machine_check(struct pt_regs *, long); | ||
197 | |||
198 | /* | ||
199 | * Threshold handler | ||
200 | */ | ||
175 | 201 | ||
176 | extern void (*mce_threshold_vector)(void); | 202 | extern void (*mce_threshold_vector)(void); |
203 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
204 | |||
205 | /* | ||
206 | * Thermal handler | ||
207 | */ | ||
208 | |||
209 | void intel_init_thermal(struct cpuinfo_x86 *c); | ||
210 | |||
211 | #ifdef CONFIG_X86_NEW_MCE | ||
212 | void mce_log_therm_throt_event(__u64 status); | ||
213 | #else | ||
214 | static inline void mce_log_therm_throt_event(__u64 status) {} | ||
215 | #endif | ||
177 | 216 | ||
178 | #endif /* __KERNEL__ */ | 217 | #endif /* __KERNEL__ */ |
179 | #endif /* _ASM_X86_MCE_H */ | 218 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 22603764e7db..48ad9d29484a 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -3,13 +3,10 @@ | |||
3 | 3 | ||
4 | #include <asm/msr-index.h> | 4 | #include <asm/msr-index.h> |
5 | 5 | ||
6 | #ifndef __ASSEMBLY__ | ||
7 | # include <linux/types.h> | ||
8 | #endif | ||
9 | |||
10 | #ifdef __KERNEL__ | 6 | #ifdef __KERNEL__ |
11 | #ifndef __ASSEMBLY__ | 7 | #ifndef __ASSEMBLY__ |
12 | 8 | ||
9 | #include <linux/types.h> | ||
13 | #include <asm/asm.h> | 10 | #include <asm/asm.h> |
14 | #include <asm/errno.h> | 11 | #include <asm/errno.h> |
15 | #include <asm/cpumask.h> | 12 | #include <asm/cpumask.h> |
@@ -264,6 +261,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
264 | #endif /* CONFIG_SMP */ | 261 | #endif /* CONFIG_SMP */ |
265 | #endif /* __ASSEMBLY__ */ | 262 | #endif /* __ASSEMBLY__ */ |
266 | #endif /* __KERNEL__ */ | 263 | #endif /* __KERNEL__ */ |
267 | |||
268 | |||
269 | #endif /* _ASM_X86_MSR_H */ | 264 | #endif /* _ASM_X86_MSR_H */ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 18ef7ebf2631..3cc06e3fceb8 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -317,6 +317,11 @@ static inline int pte_present(pte_t a) | |||
317 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); | 317 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); |
318 | } | 318 | } |
319 | 319 | ||
320 | static inline int pte_hidden(pte_t pte) | ||
321 | { | ||
322 | return pte_flags(pte) & _PAGE_HIDDEN; | ||
323 | } | ||
324 | |||
320 | static inline int pmd_present(pmd_t pmd) | 325 | static inline int pmd_present(pmd_t pmd) |
321 | { | 326 | { |
322 | return pmd_flags(pmd) & _PAGE_PRESENT; | 327 | return pmd_flags(pmd) & _PAGE_PRESENT; |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 4d258ad76a0f..54cb697f4900 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -18,7 +18,7 @@ | |||
18 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ | 18 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ |
19 | #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ | 19 | #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ |
20 | #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ | 20 | #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ |
21 | #define _PAGE_BIT_UNUSED3 11 | 21 | #define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */ |
22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ | 22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ |
23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 | 23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 |
24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 | 24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 |
@@ -41,13 +41,18 @@ | |||
41 | #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) | 41 | #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) |
42 | #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) | 42 | #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) |
43 | #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) | 43 | #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) |
44 | #define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) | ||
45 | #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) | 44 | #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) |
46 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) | 45 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) |
47 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) | 46 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) |
48 | #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) | 47 | #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) |
49 | #define __HAVE_ARCH_PTE_SPECIAL | 48 | #define __HAVE_ARCH_PTE_SPECIAL |
50 | 49 | ||
50 | #ifdef CONFIG_KMEMCHECK | ||
51 | #define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) | ||
52 | #else | ||
53 | #define _PAGE_HIDDEN (_AT(pteval_t, 0)) | ||
54 | #endif | ||
55 | |||
51 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 56 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
52 | #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) | 57 | #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) |
53 | #else | 58 | #else |
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 0e0e3ba827f7..c86f452256de 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h | |||
@@ -177,10 +177,18 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) | |||
177 | * No 3D Now! | 177 | * No 3D Now! |
178 | */ | 178 | */ |
179 | 179 | ||
180 | #ifndef CONFIG_KMEMCHECK | ||
180 | #define memcpy(t, f, n) \ | 181 | #define memcpy(t, f, n) \ |
181 | (__builtin_constant_p((n)) \ | 182 | (__builtin_constant_p((n)) \ |
182 | ? __constant_memcpy((t), (f), (n)) \ | 183 | ? __constant_memcpy((t), (f), (n)) \ |
183 | : __memcpy((t), (f), (n))) | 184 | : __memcpy((t), (f), (n))) |
185 | #else | ||
186 | /* | ||
187 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | ||
188 | * because it means that we know both memory operands in advance. | ||
189 | */ | ||
190 | #define memcpy(t, f, n) __memcpy((t), (f), (n)) | ||
191 | #endif | ||
184 | 192 | ||
185 | #endif | 193 | #endif |
186 | 194 | ||
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 2afe164bf1e6..19e2c468fc2c 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h | |||
@@ -27,6 +27,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t | |||
27 | function. */ | 27 | function. */ |
28 | 28 | ||
29 | #define __HAVE_ARCH_MEMCPY 1 | 29 | #define __HAVE_ARCH_MEMCPY 1 |
30 | #ifndef CONFIG_KMEMCHECK | ||
30 | #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 | 31 | #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 |
31 | extern void *memcpy(void *to, const void *from, size_t len); | 32 | extern void *memcpy(void *to, const void *from, size_t len); |
32 | #else | 33 | #else |
@@ -42,6 +43,13 @@ extern void *__memcpy(void *to, const void *from, size_t len); | |||
42 | __ret; \ | 43 | __ret; \ |
43 | }) | 44 | }) |
44 | #endif | 45 | #endif |
46 | #else | ||
47 | /* | ||
48 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | ||
49 | * because it means that we know both memory operands in advance. | ||
50 | */ | ||
51 | #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) | ||
52 | #endif | ||
45 | 53 | ||
46 | #define __HAVE_ARCH_MEMSET | 54 | #define __HAVE_ARCH_MEMSET |
47 | void *memset(void *s, int c, size_t n); | 55 | void *memset(void *s, int c, size_t n); |
diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h deleted file mode 100644 index c62349ee7860..000000000000 --- a/arch/x86/include/asm/therm_throt.h +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | #ifndef _ASM_X86_THERM_THROT_H | ||
2 | #define _ASM_X86_THERM_THROT_H | ||
3 | |||
4 | #include <asm/atomic.h> | ||
5 | |||
6 | extern atomic_t therm_throt_en; | ||
7 | int therm_throt_process(int curr); | ||
8 | |||
9 | #endif /* _ASM_X86_THERM_THROT_H */ | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 602c769fc98c..b0783520988b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -154,9 +154,9 @@ struct thread_info { | |||
154 | 154 | ||
155 | /* thread information allocation */ | 155 | /* thread information allocation */ |
156 | #ifdef CONFIG_DEBUG_STACK_USAGE | 156 | #ifdef CONFIG_DEBUG_STACK_USAGE |
157 | #define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) | 157 | #define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) |
158 | #else | 158 | #else |
159 | #define THREAD_FLAGS GFP_KERNEL | 159 | #define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK) |
160 | #endif | 160 | #endif |
161 | 161 | ||
162 | #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 162 | #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR |
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index b5c9d45c981f..1375cfc93960 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h | |||
@@ -4,9 +4,7 @@ | |||
4 | #include <asm/processor.h> | 4 | #include <asm/processor.h> |
5 | #include <asm/tsc.h> | 5 | #include <asm/tsc.h> |
6 | 6 | ||
7 | /* The PIT ticks at this frequency (in HZ): */ | 7 | /* Assume we use the PIT time source for the clock tick */ |
8 | #define PIT_TICK_RATE 1193182 | ||
9 | |||
10 | #define CLOCK_TICK_RATE PIT_TICK_RATE | 8 | #define CLOCK_TICK_RATE PIT_TICK_RATE |
11 | 9 | ||
12 | #define ARCH_HAS_READ_CURRENT_TIMER | 10 | #define ARCH_HAS_READ_CURRENT_TIMER |
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 11b3bb86e17b..7fcf6f3dbcc3 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h | |||
@@ -1,5 +1,10 @@ | |||
1 | #ifdef CONFIG_KMEMCHECK | ||
2 | /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ | ||
3 | # include <asm-generic/xor.h> | ||
4 | #else | ||
1 | #ifdef CONFIG_X86_32 | 5 | #ifdef CONFIG_X86_32 |
2 | # include "xor_32.h" | 6 | # include "xor_32.h" |
3 | #else | 7 | #else |
4 | # include "xor_64.h" | 8 | # include "xor_64.h" |
5 | #endif | 9 | #endif |
10 | #endif | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f3477bb84566..6c327b852e23 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -24,6 +24,8 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | |||
24 | CFLAGS_hpet.o := $(nostackp) | 24 | CFLAGS_hpet.o := $(nostackp) |
25 | CFLAGS_tsc.o := $(nostackp) | 25 | CFLAGS_tsc.o := $(nostackp) |
26 | CFLAGS_paravirt.o := $(nostackp) | 26 | CFLAGS_paravirt.o := $(nostackp) |
27 | GCOV_PROFILE_vsyscall_64.o := n | ||
28 | GCOV_PROFILE_hpet.o := n | ||
27 | 29 | ||
28 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 30 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
29 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 31 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile index 167bc16ce0e5..6a564ac67ef5 100644 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ b/arch/x86/kernel/acpi/realmode/Makefile | |||
@@ -42,6 +42,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \ | |||
42 | $(call cc-option, -mpreferred-stack-boundary=2) | 42 | $(call cc-option, -mpreferred-stack-boundary=2) |
43 | KBUILD_CFLAGS += $(call cc-option, -m32) | 43 | KBUILD_CFLAGS += $(call cc-option, -m32) |
44 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 44 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
45 | GCOV_PROFILE := n | ||
45 | 46 | ||
46 | WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) | 47 | WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) |
47 | 48 | ||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 1c60554537c3..9372f0406ad4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -434,6 +434,16 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | |||
434 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | 434 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); |
435 | } | 435 | } |
436 | 436 | ||
437 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ | ||
438 | static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | ||
439 | { | ||
440 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
441 | |||
442 | INC_STATS_COUNTER(domain_flush_single); | ||
443 | |||
444 | iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); | ||
445 | } | ||
446 | |||
437 | /* | 447 | /* |
438 | * This function is used to flush the IO/TLB for a given protection domain | 448 | * This function is used to flush the IO/TLB for a given protection domain |
439 | * on every IOMMU in the system | 449 | * on every IOMMU in the system |
@@ -1078,7 +1088,13 @@ static void attach_device(struct amd_iommu *iommu, | |||
1078 | amd_iommu_pd_table[devid] = domain; | 1088 | amd_iommu_pd_table[devid] = domain; |
1079 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1089 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1080 | 1090 | ||
1091 | /* | ||
1092 | * We might boot into a crash-kernel here. The crashed kernel | ||
1093 | * left the caches in the IOMMU dirty. So we have to flush | ||
1094 | * here to evict all dirty stuff. | ||
1095 | */ | ||
1081 | iommu_queue_inv_dev_entry(iommu, devid); | 1096 | iommu_queue_inv_dev_entry(iommu, devid); |
1097 | iommu_flush_tlb_pde(iommu, domain->id); | ||
1082 | } | 1098 | } |
1083 | 1099 | ||
1084 | /* | 1100 | /* |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 238989ec077d..10b2accd12ea 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -260,6 +260,14 @@ static void iommu_enable(struct amd_iommu *iommu) | |||
260 | 260 | ||
261 | static void iommu_disable(struct amd_iommu *iommu) | 261 | static void iommu_disable(struct amd_iommu *iommu) |
262 | { | 262 | { |
263 | /* Disable command buffer */ | ||
264 | iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); | ||
265 | |||
266 | /* Disable event logging and event interrupts */ | ||
267 | iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); | ||
268 | iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); | ||
269 | |||
270 | /* Disable IOMMU hardware itself */ | ||
263 | iommu_feature_disable(iommu, CONTROL_IOMMU_EN); | 271 | iommu_feature_disable(iommu, CONTROL_IOMMU_EN); |
264 | } | 272 | } |
265 | 273 | ||
@@ -478,6 +486,10 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu) | |||
478 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, | 486 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, |
479 | &entry, sizeof(entry)); | 487 | &entry, sizeof(entry)); |
480 | 488 | ||
489 | /* set head and tail to zero manually */ | ||
490 | writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
491 | writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | ||
492 | |||
481 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); | 493 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); |
482 | } | 494 | } |
483 | 495 | ||
@@ -1042,6 +1054,7 @@ static void enable_iommus(void) | |||
1042 | struct amd_iommu *iommu; | 1054 | struct amd_iommu *iommu; |
1043 | 1055 | ||
1044 | for_each_iommu(iommu) { | 1056 | for_each_iommu(iommu) { |
1057 | iommu_disable(iommu); | ||
1045 | iommu_set_device_table(iommu); | 1058 | iommu_set_device_table(iommu); |
1046 | iommu_enable_command_buffer(iommu); | 1059 | iommu_enable_command_buffer(iommu); |
1047 | iommu_enable_event_buffer(iommu); | 1060 | iommu_enable_event_buffer(iommu); |
@@ -1066,12 +1079,6 @@ static void disable_iommus(void) | |||
1066 | 1079 | ||
1067 | static int amd_iommu_resume(struct sys_device *dev) | 1080 | static int amd_iommu_resume(struct sys_device *dev) |
1068 | { | 1081 | { |
1069 | /* | ||
1070 | * Disable IOMMUs before reprogramming the hardware registers. | ||
1071 | * IOMMU is still enabled from the resume kernel. | ||
1072 | */ | ||
1073 | disable_iommus(); | ||
1074 | |||
1075 | /* re-load the hardware */ | 1082 | /* re-load the hardware */ |
1076 | enable_iommus(); | 1083 | enable_iommus(); |
1077 | 1084 | ||
@@ -1079,8 +1086,8 @@ static int amd_iommu_resume(struct sys_device *dev) | |||
1079 | * we have to flush after the IOMMUs are enabled because a | 1086 | * we have to flush after the IOMMUs are enabled because a |
1080 | * disabled IOMMU will never execute the commands we send | 1087 | * disabled IOMMU will never execute the commands we send |
1081 | */ | 1088 | */ |
1082 | amd_iommu_flush_all_domains(); | ||
1083 | amd_iommu_flush_all_devices(); | 1089 | amd_iommu_flush_all_devices(); |
1090 | amd_iommu_flush_all_domains(); | ||
1084 | 1091 | ||
1085 | return 0; | 1092 | return 0; |
1086 | } | 1093 | } |
@@ -1273,6 +1280,11 @@ free: | |||
1273 | goto out; | 1280 | goto out; |
1274 | } | 1281 | } |
1275 | 1282 | ||
1283 | void amd_iommu_shutdown(void) | ||
1284 | { | ||
1285 | disable_iommus(); | ||
1286 | } | ||
1287 | |||
1276 | /**************************************************************************** | 1288 | /**************************************************************************** |
1277 | * | 1289 | * |
1278 | * Early detect code. This code runs at IOMMU detection time in the DMA | 1290 | * Early detect code. This code runs at IOMMU detection time in the DMA |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ef8d9290c7ea..b7a79207295e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -462,7 +462,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) | |||
462 | static void | 462 | static void |
463 | __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | 463 | __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
464 | { | 464 | { |
465 | union entry_union eu; | 465 | union entry_union eu = {{0, 0}}; |
466 | |||
466 | eu.entry = e; | 467 | eu.entry = e; |
467 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | 468 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
468 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | 469 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
@@ -2003,7 +2004,9 @@ void disable_IO_APIC(void) | |||
2003 | /* | 2004 | /* |
2004 | * Use virtual wire A mode when interrupt remapping is enabled. | 2005 | * Use virtual wire A mode when interrupt remapping is enabled. |
2005 | */ | 2006 | */ |
2006 | disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); | 2007 | if (cpu_has_apic) |
2008 | disconnect_bsp_APIC(!intr_remapping_enabled && | ||
2009 | ioapic_i8259.pin != -1); | ||
2007 | } | 2010 | } |
2008 | 2011 | ||
2009 | #ifdef CONFIG_X86_32 | 2012 | #ifdef CONFIG_X86_32 |
@@ -3567,7 +3570,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3567 | 3570 | ||
3568 | #endif /* CONFIG_SMP */ | 3571 | #endif /* CONFIG_SMP */ |
3569 | 3572 | ||
3570 | struct irq_chip dmar_msi_type = { | 3573 | static struct irq_chip dmar_msi_type = { |
3571 | .name = "DMAR_MSI", | 3574 | .name = "DMAR_MSI", |
3572 | .unmask = dmar_msi_unmask, | 3575 | .unmask = dmar_msi_unmask, |
3573 | .mask = dmar_msi_mask, | 3576 | .mask = dmar_msi_mask, |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 440a8bccd91a..0c0182cc947d 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -20,23 +20,12 @@ | |||
20 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
21 | #include <asm/setup.h> | 21 | #include <asm/setup.h> |
22 | 22 | ||
23 | #include <linux/threads.h> | ||
24 | #include <linux/cpumask.h> | ||
25 | #include <asm/mpspec.h> | ||
26 | #include <asm/fixmap.h> | ||
27 | #include <asm/apicdef.h> | ||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/string.h> | ||
30 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
31 | #include <linux/init.h> | ||
32 | #include <asm/ipi.h> | 24 | #include <asm/ipi.h> |
33 | 25 | ||
34 | #include <linux/smp.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/interrupt.h> | 26 | #include <linux/interrupt.h> |
37 | #include <asm/acpi.h> | 27 | #include <asm/acpi.h> |
38 | #include <asm/e820.h> | 28 | #include <asm/e820.h> |
39 | #include <asm/setup.h> | ||
40 | 29 | ||
41 | #ifdef CONFIG_HOTPLUG_CPU | 30 | #ifdef CONFIG_HOTPLUG_CPU |
42 | #define DEFAULT_SEND_IPI (1) | 31 | #define DEFAULT_SEND_IPI (1) |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 344eee4ac0a4..eafdfbd1ea95 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <asm/ipi.h> | 44 | #include <asm/ipi.h> |
45 | #include <linux/kernel.h> | 45 | #include <linux/kernel.h> |
46 | #include <linux/string.h> | 46 | #include <linux/string.h> |
47 | #include <linux/init.h> | ||
48 | #include <linux/gfp.h> | 47 | #include <linux/gfp.h> |
49 | #include <linux/smp.h> | 48 | #include <linux/smp.h> |
50 | 49 | ||
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index ef0ae207a7c8..096d19aea2f7 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -463,7 +463,7 @@ static void uv_heartbeat(unsigned long ignored) | |||
463 | uv_set_scir_bits(bits); | 463 | uv_set_scir_bits(bits); |
464 | 464 | ||
465 | /* enable next timer period */ | 465 | /* enable next timer period */ |
466 | mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); | 466 | mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL); |
467 | } | 467 | } |
468 | 468 | ||
469 | static void __cpuinit uv_heartbeat_enable(int cpu) | 469 | static void __cpuinit uv_heartbeat_enable(int cpu) |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3ffdcfa9abdf..6b26d4deada0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -108,7 +108,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |||
108 | /* data */ | 108 | /* data */ |
109 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, | 109 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, |
110 | 110 | ||
111 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, | 111 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } }, |
112 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, | 112 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, |
113 | GDT_STACK_CANARY_INIT | 113 | GDT_STACK_CANARY_INIT |
114 | #endif | 114 | #endif |
@@ -487,7 +487,6 @@ out: | |||
487 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | 487 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) |
488 | { | 488 | { |
489 | char *v = c->x86_vendor_id; | 489 | char *v = c->x86_vendor_id; |
490 | static int printed; | ||
491 | int i; | 490 | int i; |
492 | 491 | ||
493 | for (i = 0; i < X86_VENDOR_NUM; i++) { | 492 | for (i = 0; i < X86_VENDOR_NUM; i++) { |
@@ -504,13 +503,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | |||
504 | } | 503 | } |
505 | } | 504 | } |
506 | 505 | ||
507 | if (!printed) { | 506 | printk_once(KERN_ERR |
508 | printed++; | 507 | "CPU: vendor_id '%s' unknown, using generic init.\n" \ |
509 | printk(KERN_ERR | 508 | "CPU: Your system may be unstable.\n", v); |
510 | "CPU: vendor_id '%s' unknown, using generic init.\n", v); | ||
511 | |||
512 | printk(KERN_ERR "CPU: Your system may be unstable.\n"); | ||
513 | } | ||
514 | 509 | ||
515 | c->x86_vendor = X86_VENDOR_UNKNOWN; | 510 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
516 | this_cpu = &default_cpu; | 511 | this_cpu = &default_cpu; |
@@ -853,6 +848,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
853 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 848 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
854 | numa_add_cpu(smp_processor_id()); | 849 | numa_add_cpu(smp_processor_id()); |
855 | #endif | 850 | #endif |
851 | |||
852 | /* Cap the iomem address space to what is addressable on all CPUs */ | ||
853 | iomem_resource.end &= (1ULL << c->x86_phys_bits) - 1; | ||
856 | } | 854 | } |
857 | 855 | ||
858 | #ifdef CONFIG_X86_64 | 856 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index cf52215d9eb1..81cbe64ed6b4 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1,3 +1,4 @@ | |||
1 | |||
1 | /* | 2 | /* |
2 | * (c) 2003-2006 Advanced Micro Devices, Inc. | 3 | * (c) 2003-2006 Advanced Micro Devices, Inc. |
3 | * Your use of this code is subject to the terms and conditions of the | 4 | * Your use of this code is subject to the terms and conditions of the |
@@ -117,20 +118,17 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) | |||
117 | u32 i = 0; | 118 | u32 i = 0; |
118 | 119 | ||
119 | if (cpu_family == CPU_HW_PSTATE) { | 120 | if (cpu_family == CPU_HW_PSTATE) { |
120 | if (data->currpstate == HW_PSTATE_INVALID) { | 121 | rdmsr(MSR_PSTATE_STATUS, lo, hi); |
121 | /* read (initial) hw pstate if not yet set */ | 122 | i = lo & HW_PSTATE_MASK; |
122 | rdmsr(MSR_PSTATE_STATUS, lo, hi); | 123 | data->currpstate = i; |
123 | i = lo & HW_PSTATE_MASK; | 124 | |
124 | 125 | /* | |
125 | /* | 126 | * a workaround for family 11h erratum 311 might cause |
126 | * a workaround for family 11h erratum 311 might cause | 127 | * an "out-of-range Pstate if the core is in Pstate-0 |
127 | * an "out-of-range Pstate if the core is in Pstate-0 | 128 | */ |
128 | */ | 129 | if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps)) |
129 | if (i >= data->numps) | 130 | data->currpstate = HW_PSTATE_0; |
130 | data->currpstate = HW_PSTATE_0; | 131 | |
131 | else | ||
132 | data->currpstate = i; | ||
133 | } | ||
134 | return 0; | 132 | return 0; |
135 | } | 133 | } |
136 | do { | 134 | do { |
@@ -510,41 +508,34 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, | |||
510 | return 0; | 508 | return 0; |
511 | } | 509 | } |
512 | 510 | ||
513 | static int check_supported_cpu(unsigned int cpu) | 511 | static void check_supported_cpu(void *_rc) |
514 | { | 512 | { |
515 | cpumask_t oldmask; | ||
516 | u32 eax, ebx, ecx, edx; | 513 | u32 eax, ebx, ecx, edx; |
517 | unsigned int rc = 0; | 514 | int *rc = _rc; |
518 | |||
519 | oldmask = current->cpus_allowed; | ||
520 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
521 | 515 | ||
522 | if (smp_processor_id() != cpu) { | 516 | *rc = -ENODEV; |
523 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); | ||
524 | goto out; | ||
525 | } | ||
526 | 517 | ||
527 | if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) | 518 | if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) |
528 | goto out; | 519 | return; |
529 | 520 | ||
530 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | 521 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); |
531 | if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && | 522 | if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && |
532 | ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) | 523 | ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) |
533 | goto out; | 524 | return; |
534 | 525 | ||
535 | if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { | 526 | if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { |
536 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || | 527 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || |
537 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { | 528 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { |
538 | printk(KERN_INFO PFX | 529 | printk(KERN_INFO PFX |
539 | "Processor cpuid %x not supported\n", eax); | 530 | "Processor cpuid %x not supported\n", eax); |
540 | goto out; | 531 | return; |
541 | } | 532 | } |
542 | 533 | ||
543 | eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); | 534 | eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); |
544 | if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { | 535 | if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { |
545 | printk(KERN_INFO PFX | 536 | printk(KERN_INFO PFX |
546 | "No frequency change capabilities detected\n"); | 537 | "No frequency change capabilities detected\n"); |
547 | goto out; | 538 | return; |
548 | } | 539 | } |
549 | 540 | ||
550 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | 541 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); |
@@ -552,21 +543,17 @@ static int check_supported_cpu(unsigned int cpu) | |||
552 | != P_STATE_TRANSITION_CAPABLE) { | 543 | != P_STATE_TRANSITION_CAPABLE) { |
553 | printk(KERN_INFO PFX | 544 | printk(KERN_INFO PFX |
554 | "Power state transitions not supported\n"); | 545 | "Power state transitions not supported\n"); |
555 | goto out; | 546 | return; |
556 | } | 547 | } |
557 | } else { /* must be a HW Pstate capable processor */ | 548 | } else { /* must be a HW Pstate capable processor */ |
558 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | 549 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); |
559 | if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) | 550 | if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) |
560 | cpu_family = CPU_HW_PSTATE; | 551 | cpu_family = CPU_HW_PSTATE; |
561 | else | 552 | else |
562 | goto out; | 553 | return; |
563 | } | 554 | } |
564 | 555 | ||
565 | rc = 1; | 556 | *rc = 0; |
566 | |||
567 | out: | ||
568 | set_cpus_allowed_ptr(current, &oldmask); | ||
569 | return rc; | ||
570 | } | 557 | } |
571 | 558 | ||
572 | static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, | 559 | static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, |
@@ -823,13 +810,14 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, | |||
823 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | 810 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) |
824 | return; | 811 | return; |
825 | 812 | ||
826 | control = data->acpi_data.states[index].control; data->irt = (control | 813 | control = data->acpi_data.states[index].control; |
827 | >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> | 814 | data->irt = (control >> IRT_SHIFT) & IRT_MASK; |
828 | RVO_SHIFT) & RVO_MASK; data->exttype = (control | 815 | data->rvo = (control >> RVO_SHIFT) & RVO_MASK; |
829 | >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; | 816 | data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; |
830 | data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 | 817 | data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; |
831 | << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = | 818 | data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK); |
832 | (control >> VST_SHIFT) & VST_MASK; } | 819 | data->vstable = (control >> VST_SHIFT) & VST_MASK; |
820 | } | ||
833 | 821 | ||
834 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | 822 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) |
835 | { | 823 | { |
@@ -1046,6 +1034,19 @@ static int get_transition_latency(struct powernow_k8_data *data) | |||
1046 | if (cur_latency > max_latency) | 1034 | if (cur_latency > max_latency) |
1047 | max_latency = cur_latency; | 1035 | max_latency = cur_latency; |
1048 | } | 1036 | } |
1037 | if (max_latency == 0) { | ||
1038 | /* | ||
1039 | * Fam 11h always returns 0 as transition latency. | ||
1040 | * This is intended and means "very fast". While cpufreq core | ||
1041 | * and governors currently can handle that gracefully, better | ||
1042 | * set it to 1 to avoid problems in the future. | ||
1043 | * For all others it's a BIOS bug. | ||
1044 | */ | ||
1045 | if (!boot_cpu_data.x86 == 0x11) | ||
1046 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " | ||
1047 | "latency\n"); | ||
1048 | max_latency = 1; | ||
1049 | } | ||
1049 | /* value in usecs, needs to be in nanoseconds */ | 1050 | /* value in usecs, needs to be in nanoseconds */ |
1050 | return 1000 * max_latency; | 1051 | return 1000 * max_latency; |
1051 | } | 1052 | } |
@@ -1093,7 +1094,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, | |||
1093 | freqs.old = find_khz_freq_from_fid(data->currfid); | 1094 | freqs.old = find_khz_freq_from_fid(data->currfid); |
1094 | freqs.new = find_khz_freq_from_fid(fid); | 1095 | freqs.new = find_khz_freq_from_fid(fid); |
1095 | 1096 | ||
1096 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1097 | for_each_cpu(i, data->available_cores) { |
1097 | freqs.cpu = i; | 1098 | freqs.cpu = i; |
1098 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1099 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
1099 | } | 1100 | } |
@@ -1101,7 +1102,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, | |||
1101 | res = transition_fid_vid(data, fid, vid); | 1102 | res = transition_fid_vid(data, fid, vid); |
1102 | freqs.new = find_khz_freq_from_fid(data->currfid); | 1103 | freqs.new = find_khz_freq_from_fid(data->currfid); |
1103 | 1104 | ||
1104 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1105 | for_each_cpu(i, data->available_cores) { |
1105 | freqs.cpu = i; | 1106 | freqs.cpu = i; |
1106 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1107 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
1107 | } | 1108 | } |
@@ -1126,7 +1127,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, | |||
1126 | data->currpstate); | 1127 | data->currpstate); |
1127 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1128 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
1128 | 1129 | ||
1129 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1130 | for_each_cpu(i, data->available_cores) { |
1130 | freqs.cpu = i; | 1131 | freqs.cpu = i; |
1131 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1132 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
1132 | } | 1133 | } |
@@ -1134,7 +1135,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, | |||
1134 | res = transition_pstate(data, pstate); | 1135 | res = transition_pstate(data, pstate); |
1135 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1136 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
1136 | 1137 | ||
1137 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1138 | for_each_cpu(i, data->available_cores) { |
1138 | freqs.cpu = i; | 1139 | freqs.cpu = i; |
1139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1140 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
1140 | } | 1141 | } |
@@ -1235,21 +1236,47 @@ static int powernowk8_verify(struct cpufreq_policy *pol) | |||
1235 | return cpufreq_frequency_table_verify(pol, data->powernow_table); | 1236 | return cpufreq_frequency_table_verify(pol, data->powernow_table); |
1236 | } | 1237 | } |
1237 | 1238 | ||
1238 | static const char ACPI_PSS_BIOS_BUG_MSG[] = | 1239 | struct init_on_cpu { |
1239 | KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" | 1240 | struct powernow_k8_data *data; |
1240 | KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; | 1241 | int rc; |
1242 | }; | ||
1243 | |||
1244 | static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu) | ||
1245 | { | ||
1246 | struct init_on_cpu *init_on_cpu = _init_on_cpu; | ||
1247 | |||
1248 | if (pending_bit_stuck()) { | ||
1249 | printk(KERN_ERR PFX "failing init, change pending bit set\n"); | ||
1250 | init_on_cpu->rc = -ENODEV; | ||
1251 | return; | ||
1252 | } | ||
1253 | |||
1254 | if (query_current_values_with_pending_wait(init_on_cpu->data)) { | ||
1255 | init_on_cpu->rc = -ENODEV; | ||
1256 | return; | ||
1257 | } | ||
1258 | |||
1259 | if (cpu_family == CPU_OPTERON) | ||
1260 | fidvid_msr_init(); | ||
1261 | |||
1262 | init_on_cpu->rc = 0; | ||
1263 | } | ||
1241 | 1264 | ||
1242 | /* per CPU init entry point to the driver */ | 1265 | /* per CPU init entry point to the driver */ |
1243 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | 1266 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) |
1244 | { | 1267 | { |
1268 | static const char ACPI_PSS_BIOS_BUG_MSG[] = | ||
1269 | KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" | ||
1270 | KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; | ||
1245 | struct powernow_k8_data *data; | 1271 | struct powernow_k8_data *data; |
1246 | cpumask_t oldmask; | 1272 | struct init_on_cpu init_on_cpu; |
1247 | int rc; | 1273 | int rc; |
1248 | 1274 | ||
1249 | if (!cpu_online(pol->cpu)) | 1275 | if (!cpu_online(pol->cpu)) |
1250 | return -ENODEV; | 1276 | return -ENODEV; |
1251 | 1277 | ||
1252 | if (!check_supported_cpu(pol->cpu)) | 1278 | smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1); |
1279 | if (rc) | ||
1253 | return -ENODEV; | 1280 | return -ENODEV; |
1254 | 1281 | ||
1255 | data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); | 1282 | data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); |
@@ -1289,27 +1316,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1289 | pol->cpuinfo.transition_latency = get_transition_latency(data); | 1316 | pol->cpuinfo.transition_latency = get_transition_latency(data); |
1290 | 1317 | ||
1291 | /* only run on specific CPU from here on */ | 1318 | /* only run on specific CPU from here on */ |
1292 | oldmask = current->cpus_allowed; | 1319 | init_on_cpu.data = data; |
1293 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); | 1320 | smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu, |
1294 | 1321 | &init_on_cpu, 1); | |
1295 | if (smp_processor_id() != pol->cpu) { | 1322 | rc = init_on_cpu.rc; |
1296 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | 1323 | if (rc != 0) |
1297 | goto err_out_unmask; | 1324 | goto err_out_exit_acpi; |
1298 | } | ||
1299 | |||
1300 | if (pending_bit_stuck()) { | ||
1301 | printk(KERN_ERR PFX "failing init, change pending bit set\n"); | ||
1302 | goto err_out_unmask; | ||
1303 | } | ||
1304 | |||
1305 | if (query_current_values_with_pending_wait(data)) | ||
1306 | goto err_out_unmask; | ||
1307 | |||
1308 | if (cpu_family == CPU_OPTERON) | ||
1309 | fidvid_msr_init(); | ||
1310 | |||
1311 | /* run on any CPU again */ | ||
1312 | set_cpus_allowed_ptr(current, &oldmask); | ||
1313 | 1325 | ||
1314 | if (cpu_family == CPU_HW_PSTATE) | 1326 | if (cpu_family == CPU_HW_PSTATE) |
1315 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); | 1327 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); |
@@ -1346,8 +1358,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1346 | 1358 | ||
1347 | return 0; | 1359 | return 0; |
1348 | 1360 | ||
1349 | err_out_unmask: | 1361 | err_out_exit_acpi: |
1350 | set_cpus_allowed_ptr(current, &oldmask); | ||
1351 | powernow_k8_cpu_exit_acpi(data); | 1362 | powernow_k8_cpu_exit_acpi(data); |
1352 | 1363 | ||
1353 | err_out: | 1364 | err_out: |
@@ -1372,28 +1383,25 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) | |||
1372 | return 0; | 1383 | return 0; |
1373 | } | 1384 | } |
1374 | 1385 | ||
1386 | static void query_values_on_cpu(void *_err) | ||
1387 | { | ||
1388 | int *err = _err; | ||
1389 | struct powernow_k8_data *data = __get_cpu_var(powernow_data); | ||
1390 | |||
1391 | *err = query_current_values_with_pending_wait(data); | ||
1392 | } | ||
1393 | |||
1375 | static unsigned int powernowk8_get(unsigned int cpu) | 1394 | static unsigned int powernowk8_get(unsigned int cpu) |
1376 | { | 1395 | { |
1377 | struct powernow_k8_data *data; | 1396 | struct powernow_k8_data *data = per_cpu(powernow_data, cpu); |
1378 | cpumask_t oldmask = current->cpus_allowed; | ||
1379 | unsigned int khz = 0; | 1397 | unsigned int khz = 0; |
1380 | unsigned int first; | 1398 | int err; |
1381 | |||
1382 | first = cpumask_first(cpu_core_mask(cpu)); | ||
1383 | data = per_cpu(powernow_data, first); | ||
1384 | 1399 | ||
1385 | if (!data) | 1400 | if (!data) |
1386 | return -EINVAL; | 1401 | return -EINVAL; |
1387 | 1402 | ||
1388 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 1403 | smp_call_function_single(cpu, query_values_on_cpu, &err, true); |
1389 | if (smp_processor_id() != cpu) { | 1404 | if (err) |
1390 | printk(KERN_ERR PFX | ||
1391 | "limiting to CPU %d failed in powernowk8_get\n", cpu); | ||
1392 | set_cpus_allowed_ptr(current, &oldmask); | ||
1393 | return 0; | ||
1394 | } | ||
1395 | |||
1396 | if (query_current_values_with_pending_wait(data)) | ||
1397 | goto out; | 1405 | goto out; |
1398 | 1406 | ||
1399 | if (cpu_family == CPU_HW_PSTATE) | 1407 | if (cpu_family == CPU_HW_PSTATE) |
@@ -1404,7 +1412,6 @@ static unsigned int powernowk8_get(unsigned int cpu) | |||
1404 | 1412 | ||
1405 | 1413 | ||
1406 | out: | 1414 | out: |
1407 | set_cpus_allowed_ptr(current, &oldmask); | ||
1408 | return khz; | 1415 | return khz; |
1409 | } | 1416 | } |
1410 | 1417 | ||
@@ -1430,7 +1437,9 @@ static int __cpuinit powernowk8_init(void) | |||
1430 | unsigned int i, supported_cpus = 0; | 1437 | unsigned int i, supported_cpus = 0; |
1431 | 1438 | ||
1432 | for_each_online_cpu(i) { | 1439 | for_each_online_cpu(i) { |
1433 | if (check_supported_cpu(i)) | 1440 | int rc; |
1441 | smp_call_function_single(i, check_supported_cpu, &rc, 1); | ||
1442 | if (rc == 0) | ||
1434 | supported_cpus++; | 1443 | supported_cpus++; |
1435 | } | 1444 | } |
1436 | 1445 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 6c6698feade1..c9c1190b5e1f 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -223,14 +223,3 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned | |||
223 | 223 | ||
224 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); | 224 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); |
225 | static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); | 225 | static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); |
226 | |||
227 | #ifdef CONFIG_SMP | ||
228 | static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) | ||
229 | { | ||
230 | } | ||
231 | #else | ||
232 | static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) | ||
233 | { | ||
234 | cpu_set(0, cpu_sharedcore_mask[0]); | ||
235 | } | ||
236 | #endif | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 55c831ed71ce..8d672ef162ce 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -323,14 +323,8 @@ static unsigned int get_cur_freq(unsigned int cpu) | |||
323 | { | 323 | { |
324 | unsigned l, h; | 324 | unsigned l, h; |
325 | unsigned clock_freq; | 325 | unsigned clock_freq; |
326 | cpumask_t saved_mask; | ||
327 | 326 | ||
328 | saved_mask = current->cpus_allowed; | 327 | rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h); |
329 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
330 | if (smp_processor_id() != cpu) | ||
331 | return 0; | ||
332 | |||
333 | rdmsr(MSR_IA32_PERF_STATUS, l, h); | ||
334 | clock_freq = extract_clock(l, cpu, 0); | 328 | clock_freq = extract_clock(l, cpu, 0); |
335 | 329 | ||
336 | if (unlikely(clock_freq == 0)) { | 330 | if (unlikely(clock_freq == 0)) { |
@@ -340,11 +334,9 @@ static unsigned int get_cur_freq(unsigned int cpu) | |||
340 | * P-state transition (like TM2). Get the last freq set | 334 | * P-state transition (like TM2). Get the last freq set |
341 | * in PERF_CTL. | 335 | * in PERF_CTL. |
342 | */ | 336 | */ |
343 | rdmsr(MSR_IA32_PERF_CTL, l, h); | 337 | rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h); |
344 | clock_freq = extract_clock(l, cpu, 1); | 338 | clock_freq = extract_clock(l, cpu, 1); |
345 | } | 339 | } |
346 | |||
347 | set_cpus_allowed_ptr(current, &saved_mask); | ||
348 | return clock_freq; | 340 | return clock_freq; |
349 | } | 341 | } |
350 | 342 | ||
@@ -467,15 +459,10 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
467 | struct cpufreq_freqs freqs; | 459 | struct cpufreq_freqs freqs; |
468 | int retval = 0; | 460 | int retval = 0; |
469 | unsigned int j, k, first_cpu, tmp; | 461 | unsigned int j, k, first_cpu, tmp; |
470 | cpumask_var_t saved_mask, covered_cpus; | 462 | cpumask_var_t covered_cpus; |
471 | 463 | ||
472 | if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) | 464 | if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) |
473 | return -ENOMEM; | ||
474 | if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { | ||
475 | free_cpumask_var(saved_mask); | ||
476 | return -ENOMEM; | 465 | return -ENOMEM; |
477 | } | ||
478 | cpumask_copy(saved_mask, ¤t->cpus_allowed); | ||
479 | 466 | ||
480 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { | 467 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { |
481 | retval = -ENODEV; | 468 | retval = -ENODEV; |
@@ -493,7 +480,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
493 | 480 | ||
494 | first_cpu = 1; | 481 | first_cpu = 1; |
495 | for_each_cpu(j, policy->cpus) { | 482 | for_each_cpu(j, policy->cpus) { |
496 | const struct cpumask *mask; | 483 | int good_cpu; |
497 | 484 | ||
498 | /* cpufreq holds the hotplug lock, so we are safe here */ | 485 | /* cpufreq holds the hotplug lock, so we are safe here */ |
499 | if (!cpu_online(j)) | 486 | if (!cpu_online(j)) |
@@ -504,32 +491,30 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
504 | * Make sure we are running on CPU that wants to change freq | 491 | * Make sure we are running on CPU that wants to change freq |
505 | */ | 492 | */ |
506 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | 493 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
507 | mask = policy->cpus; | 494 | good_cpu = cpumask_any_and(policy->cpus, |
495 | cpu_online_mask); | ||
508 | else | 496 | else |
509 | mask = cpumask_of(j); | 497 | good_cpu = j; |
510 | 498 | ||
511 | set_cpus_allowed_ptr(current, mask); | 499 | if (good_cpu >= nr_cpu_ids) { |
512 | preempt_disable(); | ||
513 | if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { | ||
514 | dprintk("couldn't limit to CPUs in this domain\n"); | 500 | dprintk("couldn't limit to CPUs in this domain\n"); |
515 | retval = -EAGAIN; | 501 | retval = -EAGAIN; |
516 | if (first_cpu) { | 502 | if (first_cpu) { |
517 | /* We haven't started the transition yet. */ | 503 | /* We haven't started the transition yet. */ |
518 | goto migrate_end; | 504 | goto out; |
519 | } | 505 | } |
520 | preempt_enable(); | ||
521 | break; | 506 | break; |
522 | } | 507 | } |
523 | 508 | ||
524 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; | 509 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; |
525 | 510 | ||
526 | if (first_cpu) { | 511 | if (first_cpu) { |
527 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 512 | rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h); |
528 | if (msr == (oldmsr & 0xffff)) { | 513 | if (msr == (oldmsr & 0xffff)) { |
529 | dprintk("no change needed - msr was and needs " | 514 | dprintk("no change needed - msr was and needs " |
530 | "to be %x\n", oldmsr); | 515 | "to be %x\n", oldmsr); |
531 | retval = 0; | 516 | retval = 0; |
532 | goto migrate_end; | 517 | goto out; |
533 | } | 518 | } |
534 | 519 | ||
535 | freqs.old = extract_clock(oldmsr, cpu, 0); | 520 | freqs.old = extract_clock(oldmsr, cpu, 0); |
@@ -553,14 +538,11 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
553 | oldmsr |= msr; | 538 | oldmsr |= msr; |
554 | } | 539 | } |
555 | 540 | ||
556 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 541 | wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h); |
557 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { | 542 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
558 | preempt_enable(); | ||
559 | break; | 543 | break; |
560 | } | ||
561 | 544 | ||
562 | cpu_set(j, *covered_cpus); | 545 | cpumask_set_cpu(j, covered_cpus); |
563 | preempt_enable(); | ||
564 | } | 546 | } |
565 | 547 | ||
566 | for_each_cpu(k, policy->cpus) { | 548 | for_each_cpu(k, policy->cpus) { |
@@ -578,10 +560,8 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
578 | * Best effort undo.. | 560 | * Best effort undo.. |
579 | */ | 561 | */ |
580 | 562 | ||
581 | for_each_cpu_mask_nr(j, *covered_cpus) { | 563 | for_each_cpu(j, covered_cpus) |
582 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); | 564 | wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h); |
583 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | ||
584 | } | ||
585 | 565 | ||
586 | tmp = freqs.new; | 566 | tmp = freqs.new; |
587 | freqs.new = freqs.old; | 567 | freqs.new = freqs.old; |
@@ -593,15 +573,9 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
593 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 573 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
594 | } | 574 | } |
595 | } | 575 | } |
596 | set_cpus_allowed_ptr(current, saved_mask); | ||
597 | retval = 0; | 576 | retval = 0; |
598 | goto out; | ||
599 | 577 | ||
600 | migrate_end: | ||
601 | preempt_enable(); | ||
602 | set_cpus_allowed_ptr(current, saved_mask); | ||
603 | out: | 578 | out: |
604 | free_cpumask_var(saved_mask); | ||
605 | free_cpumask_var(covered_cpus); | 579 | free_cpumask_var(covered_cpus); |
606 | return retval; | 580 | return retval; |
607 | } | 581 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 016c1a4fa3fc..6911e91fb4f6 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -89,7 +89,8 @@ static int speedstep_find_register(void) | |||
89 | * speedstep_set_state - set the SpeedStep state | 89 | * speedstep_set_state - set the SpeedStep state |
90 | * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | 90 | * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) |
91 | * | 91 | * |
92 | * Tries to change the SpeedStep state. | 92 | * Tries to change the SpeedStep state. Can be called from |
93 | * smp_call_function_single. | ||
93 | */ | 94 | */ |
94 | static void speedstep_set_state(unsigned int state) | 95 | static void speedstep_set_state(unsigned int state) |
95 | { | 96 | { |
@@ -143,6 +144,11 @@ static void speedstep_set_state(unsigned int state) | |||
143 | return; | 144 | return; |
144 | } | 145 | } |
145 | 146 | ||
147 | /* Wrapper for smp_call_function_single. */ | ||
148 | static void _speedstep_set_state(void *_state) | ||
149 | { | ||
150 | speedstep_set_state(*(unsigned int *)_state); | ||
151 | } | ||
146 | 152 | ||
147 | /** | 153 | /** |
148 | * speedstep_activate - activate SpeedStep control in the chipset | 154 | * speedstep_activate - activate SpeedStep control in the chipset |
@@ -226,22 +232,28 @@ static unsigned int speedstep_detect_chipset(void) | |||
226 | return 0; | 232 | return 0; |
227 | } | 233 | } |
228 | 234 | ||
229 | static unsigned int _speedstep_get(const struct cpumask *cpus) | 235 | struct get_freq_data { |
230 | { | ||
231 | unsigned int speed; | 236 | unsigned int speed; |
232 | cpumask_t cpus_allowed; | 237 | unsigned int processor; |
233 | 238 | }; | |
234 | cpus_allowed = current->cpus_allowed; | 239 | |
235 | set_cpus_allowed_ptr(current, cpus); | 240 | static void get_freq_data(void *_data) |
236 | speed = speedstep_get_frequency(speedstep_processor); | 241 | { |
237 | set_cpus_allowed_ptr(current, &cpus_allowed); | 242 | struct get_freq_data *data = _data; |
238 | dprintk("detected %u kHz as current frequency\n", speed); | 243 | |
239 | return speed; | 244 | data->speed = speedstep_get_frequency(data->processor); |
240 | } | 245 | } |
241 | 246 | ||
242 | static unsigned int speedstep_get(unsigned int cpu) | 247 | static unsigned int speedstep_get(unsigned int cpu) |
243 | { | 248 | { |
244 | return _speedstep_get(cpumask_of(cpu)); | 249 | struct get_freq_data data = { .processor = cpu }; |
250 | |||
251 | /* You're supposed to ensure CPU is online. */ | ||
252 | if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) | ||
253 | BUG(); | ||
254 | |||
255 | dprintk("detected %u kHz as current frequency\n", data.speed); | ||
256 | return data.speed; | ||
245 | } | 257 | } |
246 | 258 | ||
247 | /** | 259 | /** |
@@ -257,16 +269,16 @@ static int speedstep_target(struct cpufreq_policy *policy, | |||
257 | unsigned int target_freq, | 269 | unsigned int target_freq, |
258 | unsigned int relation) | 270 | unsigned int relation) |
259 | { | 271 | { |
260 | unsigned int newstate = 0; | 272 | unsigned int newstate = 0, policy_cpu; |
261 | struct cpufreq_freqs freqs; | 273 | struct cpufreq_freqs freqs; |
262 | cpumask_t cpus_allowed; | ||
263 | int i; | 274 | int i; |
264 | 275 | ||
265 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], | 276 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], |
266 | target_freq, relation, &newstate)) | 277 | target_freq, relation, &newstate)) |
267 | return -EINVAL; | 278 | return -EINVAL; |
268 | 279 | ||
269 | freqs.old = _speedstep_get(policy->cpus); | 280 | policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); |
281 | freqs.old = speedstep_get(policy_cpu); | ||
270 | freqs.new = speedstep_freqs[newstate].frequency; | 282 | freqs.new = speedstep_freqs[newstate].frequency; |
271 | freqs.cpu = policy->cpu; | 283 | freqs.cpu = policy->cpu; |
272 | 284 | ||
@@ -276,20 +288,13 @@ static int speedstep_target(struct cpufreq_policy *policy, | |||
276 | if (freqs.old == freqs.new) | 288 | if (freqs.old == freqs.new) |
277 | return 0; | 289 | return 0; |
278 | 290 | ||
279 | cpus_allowed = current->cpus_allowed; | ||
280 | |||
281 | for_each_cpu(i, policy->cpus) { | 291 | for_each_cpu(i, policy->cpus) { |
282 | freqs.cpu = i; | 292 | freqs.cpu = i; |
283 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 293 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
284 | } | 294 | } |
285 | 295 | ||
286 | /* switch to physical CPU where state is to be changed */ | 296 | smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate, |
287 | set_cpus_allowed_ptr(current, policy->cpus); | 297 | true); |
288 | |||
289 | speedstep_set_state(newstate); | ||
290 | |||
291 | /* allow to be run on all CPUs */ | ||
292 | set_cpus_allowed_ptr(current, &cpus_allowed); | ||
293 | 298 | ||
294 | for_each_cpu(i, policy->cpus) { | 299 | for_each_cpu(i, policy->cpus) { |
295 | freqs.cpu = i; | 300 | freqs.cpu = i; |
@@ -312,33 +317,43 @@ static int speedstep_verify(struct cpufreq_policy *policy) | |||
312 | return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); | 317 | return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); |
313 | } | 318 | } |
314 | 319 | ||
320 | struct get_freqs { | ||
321 | struct cpufreq_policy *policy; | ||
322 | int ret; | ||
323 | }; | ||
324 | |||
325 | static void get_freqs_on_cpu(void *_get_freqs) | ||
326 | { | ||
327 | struct get_freqs *get_freqs = _get_freqs; | ||
328 | |||
329 | get_freqs->ret = | ||
330 | speedstep_get_freqs(speedstep_processor, | ||
331 | &speedstep_freqs[SPEEDSTEP_LOW].frequency, | ||
332 | &speedstep_freqs[SPEEDSTEP_HIGH].frequency, | ||
333 | &get_freqs->policy->cpuinfo.transition_latency, | ||
334 | &speedstep_set_state); | ||
335 | } | ||
315 | 336 | ||
316 | static int speedstep_cpu_init(struct cpufreq_policy *policy) | 337 | static int speedstep_cpu_init(struct cpufreq_policy *policy) |
317 | { | 338 | { |
318 | int result = 0; | 339 | int result; |
319 | unsigned int speed; | 340 | unsigned int policy_cpu, speed; |
320 | cpumask_t cpus_allowed; | 341 | struct get_freqs gf; |
321 | 342 | ||
322 | /* only run on CPU to be set, or on its sibling */ | 343 | /* only run on CPU to be set, or on its sibling */ |
323 | #ifdef CONFIG_SMP | 344 | #ifdef CONFIG_SMP |
324 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); | 345 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); |
325 | #endif | 346 | #endif |
326 | 347 | policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); | |
327 | cpus_allowed = current->cpus_allowed; | ||
328 | set_cpus_allowed_ptr(current, policy->cpus); | ||
329 | 348 | ||
330 | /* detect low and high frequency and transition latency */ | 349 | /* detect low and high frequency and transition latency */ |
331 | result = speedstep_get_freqs(speedstep_processor, | 350 | gf.policy = policy; |
332 | &speedstep_freqs[SPEEDSTEP_LOW].frequency, | 351 | smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1); |
333 | &speedstep_freqs[SPEEDSTEP_HIGH].frequency, | 352 | if (gf.ret) |
334 | &policy->cpuinfo.transition_latency, | 353 | return gf.ret; |
335 | &speedstep_set_state); | ||
336 | set_cpus_allowed_ptr(current, &cpus_allowed); | ||
337 | if (result) | ||
338 | return result; | ||
339 | 354 | ||
340 | /* get current speed setting */ | 355 | /* get current speed setting */ |
341 | speed = _speedstep_get(policy->cpus); | 356 | speed = speedstep_get(policy_cpu); |
342 | if (!speed) | 357 | if (!speed) |
343 | return -EIO; | 358 | return -EIO; |
344 | 359 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 2e3c6862657b..f4c290b8482f 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | |||
@@ -226,6 +226,7 @@ static unsigned int pentium4_get_frequency(void) | |||
226 | } | 226 | } |
227 | 227 | ||
228 | 228 | ||
229 | /* Warning: may get called from smp_call_function_single. */ | ||
229 | unsigned int speedstep_get_frequency(unsigned int processor) | 230 | unsigned int speedstep_get_frequency(unsigned int processor) |
230 | { | 231 | { |
231 | switch (processor) { | 232 | switch (processor) { |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index daed39ba2614..3260ab044996 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -86,6 +86,29 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
86 | */ | 86 | */ |
87 | if (c->x86 == 6 && c->x86_model < 15) | 87 | if (c->x86 == 6 && c->x86_model < 15) |
88 | clear_cpu_cap(c, X86_FEATURE_PAT); | 88 | clear_cpu_cap(c, X86_FEATURE_PAT); |
89 | |||
90 | #ifdef CONFIG_KMEMCHECK | ||
91 | /* | ||
92 | * P4s have a "fast strings" feature which causes single- | ||
93 | * stepping REP instructions to only generate a #DB on | ||
94 | * cache-line boundaries. | ||
95 | * | ||
96 | * Ingo Molnar reported a Pentium D (model 6) and a Xeon | ||
97 | * (model 2) with the same problem. | ||
98 | */ | ||
99 | if (c->x86 == 15) { | ||
100 | u64 misc_enable; | ||
101 | |||
102 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
103 | |||
104 | if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { | ||
105 | printk(KERN_INFO "kmemcheck: Disabling fast string operations\n"); | ||
106 | |||
107 | misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING; | ||
108 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
109 | } | ||
110 | } | ||
111 | #endif | ||
89 | } | 112 | } |
90 | 113 | ||
91 | #ifdef CONFIG_X86_32 | 114 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 45004faf67ea..188a1ca5ad2b 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -1,11 +1,12 @@ | |||
1 | obj-y = mce.o therm_throt.o | 1 | obj-y = mce.o |
2 | 2 | ||
3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o | 3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o |
4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o | 4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o |
5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o | 5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o |
6 | obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o | 6 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
7 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o | 7 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o |
8 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o | ||
9 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | 8 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o |
10 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | 9 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o |
11 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 10 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
11 | |||
12 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index 89e510424152..b945d5dbc609 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c | |||
@@ -10,10 +10,9 @@ | |||
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | 15 | ||
15 | #include "mce.h" | ||
16 | |||
17 | /* Machine Check Handler For AMD Athlon/Duron: */ | 16 | /* Machine Check Handler For AMD Athlon/Duron: */ |
18 | static void k7_machine_check(struct pt_regs *regs, long error_code) | 17 | static void k7_machine_check(struct pt_regs *regs, long error_code) |
19 | { | 18 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index fabba15e4558..284d1de968bc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <asm/msr.h> | 44 | #include <asm/msr.h> |
45 | 45 | ||
46 | #include "mce-internal.h" | 46 | #include "mce-internal.h" |
47 | #include "mce.h" | ||
48 | 47 | ||
49 | /* Handle unconfigured int18 (should never happen) */ | 48 | /* Handle unconfigured int18 (should never happen) */ |
50 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | 49 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) |
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) | |||
57 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | 56 | void (*machine_check_vector)(struct pt_regs *, long error_code) = |
58 | unexpected_machine_check; | 57 | unexpected_machine_check; |
59 | 58 | ||
60 | int mce_disabled; | 59 | int mce_disabled __read_mostly; |
61 | 60 | ||
62 | #ifdef CONFIG_X86_NEW_MCE | 61 | #ifdef CONFIG_X86_NEW_MCE |
63 | 62 | ||
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); | |||
76 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | 75 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors |
77 | * 3: never panic or SIGBUS, log all errors (for testing only) | 76 | * 3: never panic or SIGBUS, log all errors (for testing only) |
78 | */ | 77 | */ |
79 | static int tolerant = 1; | 78 | static int tolerant __read_mostly = 1; |
80 | static int banks; | 79 | static int banks __read_mostly; |
81 | static u64 *bank; | 80 | static u64 *bank __read_mostly; |
82 | static unsigned long notify_user; | 81 | static int rip_msr __read_mostly; |
83 | static int rip_msr; | 82 | static int mce_bootlog __read_mostly = -1; |
84 | static int mce_bootlog = -1; | 83 | static int monarch_timeout __read_mostly = -1; |
85 | static int monarch_timeout = -1; | 84 | static int mce_panic_timeout __read_mostly; |
86 | static int mce_panic_timeout; | 85 | static int mce_dont_log_ce __read_mostly; |
87 | static int mce_dont_log_ce; | 86 | int mce_cmci_disabled __read_mostly; |
88 | int mce_cmci_disabled; | 87 | int mce_ignore_ce __read_mostly; |
89 | int mce_ignore_ce; | 88 | int mce_ser __read_mostly; |
90 | int mce_ser; | 89 | |
91 | 90 | /* User mode helper program triggered by machine check event */ | |
92 | static char trigger[128]; | 91 | static unsigned long mce_need_notify; |
93 | static char *trigger_argv[2] = { trigger, NULL }; | 92 | static char mce_helper[128]; |
93 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | ||
94 | 94 | ||
95 | static unsigned long dont_init_banks; | 95 | static unsigned long dont_init_banks; |
96 | 96 | ||
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce) | |||
180 | wmb(); | 180 | wmb(); |
181 | 181 | ||
182 | mce->finished = 1; | 182 | mce->finished = 1; |
183 | set_bit(0, ¬ify_user); | 183 | set_bit(0, &mce_need_notify); |
184 | } | 184 | } |
185 | 185 | ||
186 | static void print_mce(struct mce *m) | 186 | static void print_mce(struct mce *m) |
@@ -691,18 +691,21 @@ static atomic_t global_nwo; | |||
691 | * in the entry order. | 691 | * in the entry order. |
692 | * TBD double check parallel CPU hotunplug | 692 | * TBD double check parallel CPU hotunplug |
693 | */ | 693 | */ |
694 | static int mce_start(int no_way_out, int *order) | 694 | static int mce_start(int *no_way_out) |
695 | { | 695 | { |
696 | int nwo; | 696 | int order; |
697 | int cpus = num_online_cpus(); | 697 | int cpus = num_online_cpus(); |
698 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | 698 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; |
699 | 699 | ||
700 | if (!timeout) { | 700 | if (!timeout) |
701 | *order = -1; | 701 | return -1; |
702 | return no_way_out; | ||
703 | } | ||
704 | 702 | ||
705 | atomic_add(no_way_out, &global_nwo); | 703 | atomic_add(*no_way_out, &global_nwo); |
704 | /* | ||
705 | * global_nwo should be updated before mce_callin | ||
706 | */ | ||
707 | smp_wmb(); | ||
708 | order = atomic_add_return(1, &mce_callin); | ||
706 | 709 | ||
707 | /* | 710 | /* |
708 | * Wait for everyone. | 711 | * Wait for everyone. |
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order) | |||
710 | while (atomic_read(&mce_callin) != cpus) { | 713 | while (atomic_read(&mce_callin) != cpus) { |
711 | if (mce_timed_out(&timeout)) { | 714 | if (mce_timed_out(&timeout)) { |
712 | atomic_set(&global_nwo, 0); | 715 | atomic_set(&global_nwo, 0); |
713 | *order = -1; | 716 | return -1; |
714 | return no_way_out; | ||
715 | } | 717 | } |
716 | ndelay(SPINUNIT); | 718 | ndelay(SPINUNIT); |
717 | } | 719 | } |
718 | 720 | ||
719 | /* | 721 | /* |
720 | * Cache the global no_way_out state. | 722 | * mce_callin should be read before global_nwo |
721 | */ | 723 | */ |
722 | nwo = atomic_read(&global_nwo); | 724 | smp_rmb(); |
723 | 725 | ||
724 | /* | 726 | if (order == 1) { |
725 | * Monarch starts executing now, the others wait. | 727 | /* |
726 | */ | 728 | * Monarch: Starts executing now, the others wait. |
727 | if (*order == 1) { | 729 | */ |
728 | atomic_set(&mce_executing, 1); | 730 | atomic_set(&mce_executing, 1); |
729 | return nwo; | 731 | } else { |
732 | /* | ||
733 | * Subject: Now start the scanning loop one by one in | ||
734 | * the original callin order. | ||
735 | * This way when there are any shared banks it will be | ||
736 | * only seen by one CPU before cleared, avoiding duplicates. | ||
737 | */ | ||
738 | while (atomic_read(&mce_executing) < order) { | ||
739 | if (mce_timed_out(&timeout)) { | ||
740 | atomic_set(&global_nwo, 0); | ||
741 | return -1; | ||
742 | } | ||
743 | ndelay(SPINUNIT); | ||
744 | } | ||
730 | } | 745 | } |
731 | 746 | ||
732 | /* | 747 | /* |
733 | * Now start the scanning loop one by one | 748 | * Cache the global no_way_out state. |
734 | * in the original callin order. | ||
735 | * This way when there are any shared banks it will | ||
736 | * be only seen by one CPU before cleared, avoiding duplicates. | ||
737 | */ | 749 | */ |
738 | while (atomic_read(&mce_executing) < *order) { | 750 | *no_way_out = atomic_read(&global_nwo); |
739 | if (mce_timed_out(&timeout)) { | 751 | |
740 | atomic_set(&global_nwo, 0); | 752 | return order; |
741 | *order = -1; | ||
742 | return no_way_out; | ||
743 | } | ||
744 | ndelay(SPINUNIT); | ||
745 | } | ||
746 | return nwo; | ||
747 | } | 753 | } |
748 | 754 | ||
749 | /* | 755 | /* |
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
863 | * check handler. | 869 | * check handler. |
864 | */ | 870 | */ |
865 | int order; | 871 | int order; |
866 | |||
867 | /* | 872 | /* |
868 | * If no_way_out gets set, there is no safe way to recover from this | 873 | * If no_way_out gets set, there is no safe way to recover from this |
869 | * MCE. If tolerant is cranked up, we'll try anyway. | 874 | * MCE. If tolerant is cranked up, we'll try anyway. |
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
887 | if (!banks) | 892 | if (!banks) |
888 | goto out; | 893 | goto out; |
889 | 894 | ||
890 | order = atomic_add_return(1, &mce_callin); | ||
891 | mce_setup(&m); | 895 | mce_setup(&m); |
892 | 896 | ||
893 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | 897 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); |
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
909 | * This way we don't report duplicated events on shared banks | 913 | * This way we don't report duplicated events on shared banks |
910 | * because the first one to see it will clear it. | 914 | * because the first one to see it will clear it. |
911 | */ | 915 | */ |
912 | no_way_out = mce_start(no_way_out, &order); | 916 | order = mce_start(&no_way_out); |
913 | for (i = 0; i < banks; i++) { | 917 | for (i = 0; i < banks; i++) { |
914 | __clear_bit(i, toclear); | 918 | __clear_bit(i, toclear); |
915 | if (!bank[i]) | 919 | if (!bank[i]) |
@@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data) | |||
1118 | 1122 | ||
1119 | static void mce_do_trigger(struct work_struct *work) | 1123 | static void mce_do_trigger(struct work_struct *work) |
1120 | { | 1124 | { |
1121 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); | 1125 | call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); |
1122 | } | 1126 | } |
1123 | 1127 | ||
1124 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | 1128 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); |
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void) | |||
1135 | 1139 | ||
1136 | clear_thread_flag(TIF_MCE_NOTIFY); | 1140 | clear_thread_flag(TIF_MCE_NOTIFY); |
1137 | 1141 | ||
1138 | if (test_and_clear_bit(0, ¬ify_user)) { | 1142 | if (test_and_clear_bit(0, &mce_need_notify)) { |
1139 | wake_up_interruptible(&mce_wait); | 1143 | wake_up_interruptible(&mce_wait); |
1140 | 1144 | ||
1141 | /* | 1145 | /* |
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void) | |||
1143 | * work_pending is always cleared before the function is | 1147 | * work_pending is always cleared before the function is |
1144 | * executed. | 1148 | * executed. |
1145 | */ | 1149 | */ |
1146 | if (trigger[0] && !work_pending(&mce_trigger_work)) | 1150 | if (mce_helper[0] && !work_pending(&mce_trigger_work)) |
1147 | schedule_work(&mce_trigger_work); | 1151 | schedule_work(&mce_trigger_work); |
1148 | 1152 | ||
1149 | if (__ratelimit(&ratelimit)) | 1153 | if (__ratelimit(&ratelimit)) |
@@ -1245,7 +1249,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1245 | * Various K7s with broken bank 0 around. Always disable | 1249 | * Various K7s with broken bank 0 around. Always disable |
1246 | * by default. | 1250 | * by default. |
1247 | */ | 1251 | */ |
1248 | if (c->x86 == 6) | 1252 | if (c->x86 == 6 && banks > 0) |
1249 | bank[0] = 0; | 1253 | bank[0] = 0; |
1250 | } | 1254 | } |
1251 | 1255 | ||
@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | |||
1282 | return; | 1286 | return; |
1283 | switch (c->x86_vendor) { | 1287 | switch (c->x86_vendor) { |
1284 | case X86_VENDOR_INTEL: | 1288 | case X86_VENDOR_INTEL: |
1285 | if (mce_p5_enabled()) | 1289 | intel_p5_mcheck_init(c); |
1286 | intel_p5_mcheck_init(c); | ||
1287 | break; | 1290 | break; |
1288 | case X86_VENDOR_CENTAUR: | 1291 | case X86_VENDOR_CENTAUR: |
1289 | winchip_mcheck_init(c); | 1292 | winchip_mcheck_init(c); |
@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev) | |||
1609 | static void mce_cpu_restart(void *data) | 1612 | static void mce_cpu_restart(void *data) |
1610 | { | 1613 | { |
1611 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1614 | del_timer_sync(&__get_cpu_var(mce_timer)); |
1612 | if (mce_available(¤t_cpu_data)) | 1615 | if (!mce_available(¤t_cpu_data)) |
1613 | mce_init(); | 1616 | return; |
1617 | mce_init(); | ||
1614 | mce_init_timer(); | 1618 | mce_init_timer(); |
1615 | } | 1619 | } |
1616 | 1620 | ||
@@ -1620,6 +1624,26 @@ static void mce_restart(void) | |||
1620 | on_each_cpu(mce_cpu_restart, NULL, 1); | 1624 | on_each_cpu(mce_cpu_restart, NULL, 1); |
1621 | } | 1625 | } |
1622 | 1626 | ||
1627 | /* Toggle features for corrected errors */ | ||
1628 | static void mce_disable_ce(void *all) | ||
1629 | { | ||
1630 | if (!mce_available(¤t_cpu_data)) | ||
1631 | return; | ||
1632 | if (all) | ||
1633 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
1634 | cmci_clear(); | ||
1635 | } | ||
1636 | |||
1637 | static void mce_enable_ce(void *all) | ||
1638 | { | ||
1639 | if (!mce_available(¤t_cpu_data)) | ||
1640 | return; | ||
1641 | cmci_reenable(); | ||
1642 | cmci_recheck(); | ||
1643 | if (all) | ||
1644 | mce_init_timer(); | ||
1645 | } | ||
1646 | |||
1623 | static struct sysdev_class mce_sysclass = { | 1647 | static struct sysdev_class mce_sysclass = { |
1624 | .suspend = mce_suspend, | 1648 | .suspend = mce_suspend, |
1625 | .shutdown = mce_shutdown, | 1649 | .shutdown = mce_shutdown, |
@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | |||
1659 | static ssize_t | 1683 | static ssize_t |
1660 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) | 1684 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) |
1661 | { | 1685 | { |
1662 | strcpy(buf, trigger); | 1686 | strcpy(buf, mce_helper); |
1663 | strcat(buf, "\n"); | 1687 | strcat(buf, "\n"); |
1664 | return strlen(trigger) + 1; | 1688 | return strlen(mce_helper) + 1; |
1665 | } | 1689 | } |
1666 | 1690 | ||
1667 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 1691 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | |||
1670 | char *p; | 1694 | char *p; |
1671 | int len; | 1695 | int len; |
1672 | 1696 | ||
1673 | strncpy(trigger, buf, sizeof(trigger)); | 1697 | strncpy(mce_helper, buf, sizeof(mce_helper)); |
1674 | trigger[sizeof(trigger)-1] = 0; | 1698 | mce_helper[sizeof(mce_helper)-1] = 0; |
1675 | len = strlen(trigger); | 1699 | len = strlen(mce_helper); |
1676 | p = strchr(trigger, '\n'); | 1700 | p = strchr(mce_helper, '\n'); |
1677 | 1701 | ||
1678 | if (*p) | 1702 | if (*p) |
1679 | *p = 0; | 1703 | *p = 0; |
@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | |||
1681 | return len; | 1705 | return len; |
1682 | } | 1706 | } |
1683 | 1707 | ||
1708 | static ssize_t set_ignore_ce(struct sys_device *s, | ||
1709 | struct sysdev_attribute *attr, | ||
1710 | const char *buf, size_t size) | ||
1711 | { | ||
1712 | u64 new; | ||
1713 | |||
1714 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1715 | return -EINVAL; | ||
1716 | |||
1717 | if (mce_ignore_ce ^ !!new) { | ||
1718 | if (new) { | ||
1719 | /* disable ce features */ | ||
1720 | on_each_cpu(mce_disable_ce, (void *)1, 1); | ||
1721 | mce_ignore_ce = 1; | ||
1722 | } else { | ||
1723 | /* enable ce features */ | ||
1724 | mce_ignore_ce = 0; | ||
1725 | on_each_cpu(mce_enable_ce, (void *)1, 1); | ||
1726 | } | ||
1727 | } | ||
1728 | return size; | ||
1729 | } | ||
1730 | |||
1731 | static ssize_t set_cmci_disabled(struct sys_device *s, | ||
1732 | struct sysdev_attribute *attr, | ||
1733 | const char *buf, size_t size) | ||
1734 | { | ||
1735 | u64 new; | ||
1736 | |||
1737 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1738 | return -EINVAL; | ||
1739 | |||
1740 | if (mce_cmci_disabled ^ !!new) { | ||
1741 | if (new) { | ||
1742 | /* disable cmci */ | ||
1743 | on_each_cpu(mce_disable_ce, NULL, 1); | ||
1744 | mce_cmci_disabled = 1; | ||
1745 | } else { | ||
1746 | /* enable cmci */ | ||
1747 | mce_cmci_disabled = 0; | ||
1748 | on_each_cpu(mce_enable_ce, NULL, 1); | ||
1749 | } | ||
1750 | } | ||
1751 | return size; | ||
1752 | } | ||
1753 | |||
1684 | static ssize_t store_int_with_restart(struct sys_device *s, | 1754 | static ssize_t store_int_with_restart(struct sys_device *s, |
1685 | struct sysdev_attribute *attr, | 1755 | struct sysdev_attribute *attr, |
1686 | const char *buf, size_t size) | 1756 | const char *buf, size_t size) |
@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s, | |||
1693 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | 1763 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); |
1694 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | 1764 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); |
1695 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | 1765 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); |
1766 | static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); | ||
1696 | 1767 | ||
1697 | static struct sysdev_ext_attribute attr_check_interval = { | 1768 | static struct sysdev_ext_attribute attr_check_interval = { |
1698 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, | 1769 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, |
@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = { | |||
1700 | &check_interval | 1771 | &check_interval |
1701 | }; | 1772 | }; |
1702 | 1773 | ||
1774 | static struct sysdev_ext_attribute attr_ignore_ce = { | ||
1775 | _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), | ||
1776 | &mce_ignore_ce | ||
1777 | }; | ||
1778 | |||
1779 | static struct sysdev_ext_attribute attr_cmci_disabled = { | ||
1780 | _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), | ||
1781 | &mce_cmci_disabled | ||
1782 | }; | ||
1783 | |||
1703 | static struct sysdev_attribute *mce_attrs[] = { | 1784 | static struct sysdev_attribute *mce_attrs[] = { |
1704 | &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, | 1785 | &attr_tolerant.attr, |
1786 | &attr_check_interval.attr, | ||
1787 | &attr_trigger, | ||
1705 | &attr_monarch_timeout.attr, | 1788 | &attr_monarch_timeout.attr, |
1789 | &attr_dont_log_ce.attr, | ||
1790 | &attr_ignore_ce.attr, | ||
1791 | &attr_cmci_disabled.attr, | ||
1706 | NULL | 1792 | NULL |
1707 | }; | 1793 | }; |
1708 | 1794 | ||
@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized; | |||
1712 | static __cpuinit int mce_create_device(unsigned int cpu) | 1798 | static __cpuinit int mce_create_device(unsigned int cpu) |
1713 | { | 1799 | { |
1714 | int err; | 1800 | int err; |
1715 | int i; | 1801 | int i, j; |
1716 | 1802 | ||
1717 | if (!mce_available(&boot_cpu_data)) | 1803 | if (!mce_available(&boot_cpu_data)) |
1718 | return -EIO; | 1804 | return -EIO; |
@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1730 | if (err) | 1816 | if (err) |
1731 | goto error; | 1817 | goto error; |
1732 | } | 1818 | } |
1733 | for (i = 0; i < banks; i++) { | 1819 | for (j = 0; j < banks; j++) { |
1734 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | 1820 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), |
1735 | &bank_attrs[i]); | 1821 | &bank_attrs[j]); |
1736 | if (err) | 1822 | if (err) |
1737 | goto error2; | 1823 | goto error2; |
1738 | } | 1824 | } |
@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1740 | 1826 | ||
1741 | return 0; | 1827 | return 0; |
1742 | error2: | 1828 | error2: |
1743 | while (--i >= 0) | 1829 | while (--j >= 0) |
1744 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); | 1830 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); |
1745 | error: | 1831 | error: |
1746 | while (--i >= 0) | 1832 | while (--i >= 0) |
1747 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1833 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); |
@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void) | |||
1883 | if (!mce_available(&boot_cpu_data)) | 1969 | if (!mce_available(&boot_cpu_data)) |
1884 | return -EIO; | 1970 | return -EIO; |
1885 | 1971 | ||
1886 | alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | 1972 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); |
1887 | 1973 | ||
1888 | err = mce_init_banks(); | 1974 | err = mce_init_banks(); |
1889 | if (err) | 1975 | if (err) |
@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | |||
1915 | /* This has to be run for each processor */ | 2001 | /* This has to be run for each processor */ |
1916 | void mcheck_init(struct cpuinfo_x86 *c) | 2002 | void mcheck_init(struct cpuinfo_x86 *c) |
1917 | { | 2003 | { |
1918 | if (mce_disabled == 1) | 2004 | if (mce_disabled) |
1919 | return; | 2005 | return; |
1920 | 2006 | ||
1921 | switch (c->x86_vendor) { | 2007 | switch (c->x86_vendor) { |
@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c) | |||
1945 | 2031 | ||
1946 | static int __init mcheck_enable(char *str) | 2032 | static int __init mcheck_enable(char *str) |
1947 | { | 2033 | { |
1948 | mce_disabled = -1; | 2034 | mce_p5_enabled = 1; |
1949 | return 1; | 2035 | return 1; |
1950 | } | 2036 | } |
1951 | |||
1952 | __setup("mce", mcheck_enable); | 2037 | __setup("mce", mcheck_enable); |
1953 | 2038 | ||
1954 | #endif /* CONFIG_X86_OLD_MCE */ | 2039 | #endif /* CONFIG_X86_OLD_MCE */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h deleted file mode 100644 index 84a552b458c8..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <asm/mce.h> | ||
3 | |||
4 | #ifdef CONFIG_X86_OLD_MCE | ||
5 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
6 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
7 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
8 | #endif | ||
9 | |||
10 | #ifdef CONFIG_X86_ANCIENT_MCE | ||
11 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
12 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
13 | extern int mce_p5_enable; | ||
14 | static inline int mce_p5_enabled(void) { return mce_p5_enable; } | ||
15 | static inline void enable_p5_mce(void) { mce_p5_enable = 1; } | ||
16 | #else | ||
17 | static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} | ||
18 | static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | ||
19 | static inline int mce_p5_enabled(void) { return 0; } | ||
20 | static inline void enable_p5_mce(void) { } | ||
21 | #endif | ||
22 | |||
23 | /* Call the installed machine check handler for this CPU setup. */ | ||
24 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
25 | |||
26 | #ifdef CONFIG_X86_OLD_MCE | ||
27 | |||
28 | extern int nr_mce_banks; | ||
29 | |||
30 | void intel_set_thermal_handler(void); | ||
31 | |||
32 | #else | ||
33 | |||
34 | static inline void intel_set_thermal_handler(void) { } | ||
35 | |||
36 | #endif | ||
37 | |||
38 | void intel_init_thermal(struct cpuinfo_x86 *c); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ddae21620bda..ddae21620bda 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 2b011d2d8579..e1acec0f7a32 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -1,74 +1,226 @@ | |||
1 | /* | 1 | /* |
2 | * Common code for Intel machine checks | 2 | * Intel specific MCE features. |
3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> | ||
4 | * Copyright (C) 2008, 2009 Intel Corporation | ||
5 | * Author: Andi Kleen | ||
3 | */ | 6 | */ |
4 | #include <linux/interrupt.h> | ||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/smp.h> | ||
9 | 7 | ||
10 | #include <asm/therm_throt.h> | 8 | #include <linux/init.h> |
11 | #include <asm/processor.h> | 9 | #include <linux/interrupt.h> |
12 | #include <asm/system.h> | 10 | #include <linux/percpu.h> |
13 | #include <asm/apic.h> | 11 | #include <asm/apic.h> |
12 | #include <asm/processor.h> | ||
14 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/mce.h> | ||
15 | |||
16 | /* | ||
17 | * Support for Intel Correct Machine Check Interrupts. This allows | ||
18 | * the CPU to raise an interrupt when a corrected machine check happened. | ||
19 | * Normally we pick those up using a regular polling timer. | ||
20 | * Also supports reliable discovery of shared banks. | ||
21 | */ | ||
15 | 22 | ||
16 | #include "mce.h" | 23 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); |
17 | 24 | ||
18 | void intel_init_thermal(struct cpuinfo_x86 *c) | 25 | /* |
26 | * cmci_discover_lock protects against parallel discovery attempts | ||
27 | * which could race against each other. | ||
28 | */ | ||
29 | static DEFINE_SPINLOCK(cmci_discover_lock); | ||
30 | |||
31 | #define CMCI_THRESHOLD 1 | ||
32 | |||
33 | static int cmci_supported(int *banks) | ||
19 | { | 34 | { |
20 | unsigned int cpu = smp_processor_id(); | 35 | u64 cap; |
21 | int tm2 = 0; | ||
22 | u32 l, h; | ||
23 | 36 | ||
24 | /* Thermal monitoring depends on ACPI and clock modulation*/ | 37 | if (mce_cmci_disabled || mce_ignore_ce) |
25 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | 38 | return 0; |
26 | return; | ||
27 | 39 | ||
28 | /* | 40 | /* |
29 | * First check if its enabled already, in which case there might | 41 | * Vendor check is not strictly needed, but the initial |
30 | * be some SMM goo which handles it, so we can't even put a handler | 42 | * initialization is vendor keyed and this |
31 | * since it might be delivered via SMI already: | 43 | * makes sure none of the backdoors are entered otherwise. |
32 | */ | 44 | */ |
33 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 45 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
34 | h = apic_read(APIC_LVTTHMR); | 46 | return 0; |
35 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 47 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) |
36 | printk(KERN_DEBUG | 48 | return 0; |
37 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | 49 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
38 | return; | 50 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); |
51 | return !!(cap & MCG_CMCI_P); | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * The interrupt handler. This is called on every event. | ||
56 | * Just call the poller directly to log any events. | ||
57 | * This could in theory increase the threshold under high load, | ||
58 | * but doesn't for now. | ||
59 | */ | ||
60 | static void intel_threshold_interrupt(void) | ||
61 | { | ||
62 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
63 | mce_notify_irq(); | ||
64 | } | ||
65 | |||
66 | static void print_update(char *type, int *hdr, int num) | ||
67 | { | ||
68 | if (*hdr == 0) | ||
69 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
70 | *hdr = 1; | ||
71 | printk(KERN_CONT " %s:%d", type, num); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | ||
76 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | ||
77 | * banks. | ||
78 | */ | ||
79 | static void cmci_discover(int banks, int boot) | ||
80 | { | ||
81 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | ||
82 | unsigned long flags; | ||
83 | int hdr = 0; | ||
84 | int i; | ||
85 | |||
86 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
87 | for (i = 0; i < banks; i++) { | ||
88 | u64 val; | ||
89 | |||
90 | if (test_bit(i, owned)) | ||
91 | continue; | ||
92 | |||
93 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
94 | |||
95 | /* Already owned by someone else? */ | ||
96 | if (val & CMCI_EN) { | ||
97 | if (test_and_clear_bit(i, owned) || boot) | ||
98 | print_update("SHD", &hdr, i); | ||
99 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
100 | continue; | ||
101 | } | ||
102 | |||
103 | val |= CMCI_EN | CMCI_THRESHOLD; | ||
104 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
105 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
106 | |||
107 | /* Did the enable bit stick? -- the bank supports CMCI */ | ||
108 | if (val & CMCI_EN) { | ||
109 | if (!test_and_set_bit(i, owned) || boot) | ||
110 | print_update("CMCI", &hdr, i); | ||
111 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
112 | } else { | ||
113 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | ||
114 | } | ||
39 | } | 115 | } |
116 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
117 | if (hdr) | ||
118 | printk(KERN_CONT "\n"); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Just in case we missed an event during initialization check | ||
123 | * all the CMCI owned banks. | ||
124 | */ | ||
125 | void cmci_recheck(void) | ||
126 | { | ||
127 | unsigned long flags; | ||
128 | int banks; | ||
129 | |||
130 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) | ||
131 | return; | ||
132 | local_irq_save(flags); | ||
133 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
134 | local_irq_restore(flags); | ||
135 | } | ||
40 | 136 | ||
41 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | 137 | /* |
42 | tm2 = 1; | 138 | * Disable CMCI on this CPU for all banks it owns when it goes down. |
139 | * This allows other CPUs to claim the banks on rediscovery. | ||
140 | */ | ||
141 | void cmci_clear(void) | ||
142 | { | ||
143 | unsigned long flags; | ||
144 | int i; | ||
145 | int banks; | ||
146 | u64 val; | ||
43 | 147 | ||
44 | /* Check whether a vector already exists */ | 148 | if (!cmci_supported(&banks)) |
45 | if (h & APIC_VECTOR_MASK) { | ||
46 | printk(KERN_DEBUG | ||
47 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | ||
48 | cpu, (h & APIC_VECTOR_MASK)); | ||
49 | return; | 149 | return; |
150 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
151 | for (i = 0; i < banks; i++) { | ||
152 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | ||
153 | continue; | ||
154 | /* Disable CMCI */ | ||
155 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
156 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | ||
157 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
158 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | ||
50 | } | 159 | } |
160 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * After a CPU went down cycle through all the others and rediscover | ||
165 | * Must run in process context. | ||
166 | */ | ||
167 | void cmci_rediscover(int dying) | ||
168 | { | ||
169 | int banks; | ||
170 | int cpu; | ||
171 | cpumask_var_t old; | ||
172 | |||
173 | if (!cmci_supported(&banks)) | ||
174 | return; | ||
175 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) | ||
176 | return; | ||
177 | cpumask_copy(old, ¤t->cpus_allowed); | ||
51 | 178 | ||
52 | /* We'll mask the thermal vector in the lapic till we're ready: */ | 179 | for_each_online_cpu(cpu) { |
53 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | 180 | if (cpu == dying) |
54 | apic_write(APIC_LVTTHMR, h); | 181 | continue; |
182 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | ||
183 | continue; | ||
184 | /* Recheck banks in case CPUs don't all have the same */ | ||
185 | if (cmci_supported(&banks)) | ||
186 | cmci_discover(banks, 0); | ||
187 | } | ||
55 | 188 | ||
56 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 189 | set_cpus_allowed_ptr(current, old); |
57 | wrmsr(MSR_IA32_THERM_INTERRUPT, | 190 | free_cpumask_var(old); |
58 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | 191 | } |
59 | 192 | ||
60 | intel_set_thermal_handler(); | 193 | /* |
194 | * Reenable CMCI on this CPU in case a CPU down failed. | ||
195 | */ | ||
196 | void cmci_reenable(void) | ||
197 | { | ||
198 | int banks; | ||
199 | if (cmci_supported(&banks)) | ||
200 | cmci_discover(banks, 0); | ||
201 | } | ||
61 | 202 | ||
62 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 203 | static void intel_init_cmci(void) |
63 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | 204 | { |
205 | int banks; | ||
64 | 206 | ||
65 | /* Unmask the thermal vector: */ | 207 | if (!cmci_supported(&banks)) |
66 | l = apic_read(APIC_LVTTHMR); | 208 | return; |
67 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
68 | 209 | ||
69 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | 210 | mce_threshold_vector = intel_threshold_interrupt; |
70 | cpu, tm2 ? "TM2" : "TM1"); | 211 | cmci_discover(banks, 1); |
212 | /* | ||
213 | * For CPU #0 this runs with still disabled APIC, but that's | ||
214 | * ok because only the vector is set up. We still do another | ||
215 | * check for the banks later for CPU #0 just to make sure | ||
216 | * to not miss any events. | ||
217 | */ | ||
218 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); | ||
219 | cmci_recheck(); | ||
220 | } | ||
71 | 221 | ||
72 | /* enable thermal throttle processing */ | 222 | void mce_intel_feature_init(struct cpuinfo_x86 *c) |
73 | atomic_set(&therm_throt_en, 1); | 223 | { |
224 | intel_init_thermal(c); | ||
225 | intel_init_cmci(); | ||
74 | } | 226 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c deleted file mode 100644 index f2ef6952c400..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ /dev/null | |||
@@ -1,248 +0,0 @@ | |||
1 | /* | ||
2 | * Intel specific MCE features. | ||
3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> | ||
4 | * Copyright (C) 2008, 2009 Intel Corporation | ||
5 | * Author: Andi Kleen | ||
6 | */ | ||
7 | |||
8 | #include <linux/init.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/percpu.h> | ||
11 | #include <asm/processor.h> | ||
12 | #include <asm/apic.h> | ||
13 | #include <asm/msr.h> | ||
14 | #include <asm/mce.h> | ||
15 | #include <asm/hw_irq.h> | ||
16 | #include <asm/idle.h> | ||
17 | #include <asm/therm_throt.h> | ||
18 | |||
19 | #include "mce.h" | ||
20 | |||
21 | asmlinkage void smp_thermal_interrupt(void) | ||
22 | { | ||
23 | __u64 msr_val; | ||
24 | |||
25 | ack_APIC_irq(); | ||
26 | |||
27 | exit_idle(); | ||
28 | irq_enter(); | ||
29 | |||
30 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
31 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) | ||
32 | mce_log_therm_throt_event(msr_val); | ||
33 | |||
34 | inc_irq_stat(irq_thermal_count); | ||
35 | irq_exit(); | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * Support for Intel Correct Machine Check Interrupts. This allows | ||
40 | * the CPU to raise an interrupt when a corrected machine check happened. | ||
41 | * Normally we pick those up using a regular polling timer. | ||
42 | * Also supports reliable discovery of shared banks. | ||
43 | */ | ||
44 | |||
45 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | ||
46 | |||
47 | /* | ||
48 | * cmci_discover_lock protects against parallel discovery attempts | ||
49 | * which could race against each other. | ||
50 | */ | ||
51 | static DEFINE_SPINLOCK(cmci_discover_lock); | ||
52 | |||
53 | #define CMCI_THRESHOLD 1 | ||
54 | |||
55 | static int cmci_supported(int *banks) | ||
56 | { | ||
57 | u64 cap; | ||
58 | |||
59 | if (mce_cmci_disabled || mce_ignore_ce) | ||
60 | return 0; | ||
61 | |||
62 | /* | ||
63 | * Vendor check is not strictly needed, but the initial | ||
64 | * initialization is vendor keyed and this | ||
65 | * makes sure none of the backdoors are entered otherwise. | ||
66 | */ | ||
67 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
68 | return 0; | ||
69 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) | ||
70 | return 0; | ||
71 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
72 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); | ||
73 | return !!(cap & MCG_CMCI_P); | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * The interrupt handler. This is called on every event. | ||
78 | * Just call the poller directly to log any events. | ||
79 | * This could in theory increase the threshold under high load, | ||
80 | * but doesn't for now. | ||
81 | */ | ||
82 | static void intel_threshold_interrupt(void) | ||
83 | { | ||
84 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
85 | mce_notify_irq(); | ||
86 | } | ||
87 | |||
88 | static void print_update(char *type, int *hdr, int num) | ||
89 | { | ||
90 | if (*hdr == 0) | ||
91 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
92 | *hdr = 1; | ||
93 | printk(KERN_CONT " %s:%d", type, num); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | ||
98 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | ||
99 | * banks. | ||
100 | */ | ||
101 | static void cmci_discover(int banks, int boot) | ||
102 | { | ||
103 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | ||
104 | unsigned long flags; | ||
105 | int hdr = 0; | ||
106 | int i; | ||
107 | |||
108 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
109 | for (i = 0; i < banks; i++) { | ||
110 | u64 val; | ||
111 | |||
112 | if (test_bit(i, owned)) | ||
113 | continue; | ||
114 | |||
115 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
116 | |||
117 | /* Already owned by someone else? */ | ||
118 | if (val & CMCI_EN) { | ||
119 | if (test_and_clear_bit(i, owned) || boot) | ||
120 | print_update("SHD", &hdr, i); | ||
121 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
122 | continue; | ||
123 | } | ||
124 | |||
125 | val |= CMCI_EN | CMCI_THRESHOLD; | ||
126 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
127 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
128 | |||
129 | /* Did the enable bit stick? -- the bank supports CMCI */ | ||
130 | if (val & CMCI_EN) { | ||
131 | if (!test_and_set_bit(i, owned) || boot) | ||
132 | print_update("CMCI", &hdr, i); | ||
133 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
134 | } else { | ||
135 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | ||
136 | } | ||
137 | } | ||
138 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
139 | if (hdr) | ||
140 | printk(KERN_CONT "\n"); | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Just in case we missed an event during initialization check | ||
145 | * all the CMCI owned banks. | ||
146 | */ | ||
147 | void cmci_recheck(void) | ||
148 | { | ||
149 | unsigned long flags; | ||
150 | int banks; | ||
151 | |||
152 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) | ||
153 | return; | ||
154 | local_irq_save(flags); | ||
155 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
156 | local_irq_restore(flags); | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * Disable CMCI on this CPU for all banks it owns when it goes down. | ||
161 | * This allows other CPUs to claim the banks on rediscovery. | ||
162 | */ | ||
163 | void cmci_clear(void) | ||
164 | { | ||
165 | unsigned long flags; | ||
166 | int i; | ||
167 | int banks; | ||
168 | u64 val; | ||
169 | |||
170 | if (!cmci_supported(&banks)) | ||
171 | return; | ||
172 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
173 | for (i = 0; i < banks; i++) { | ||
174 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | ||
175 | continue; | ||
176 | /* Disable CMCI */ | ||
177 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
178 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | ||
179 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
180 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | ||
181 | } | ||
182 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * After a CPU went down cycle through all the others and rediscover | ||
187 | * Must run in process context. | ||
188 | */ | ||
189 | void cmci_rediscover(int dying) | ||
190 | { | ||
191 | int banks; | ||
192 | int cpu; | ||
193 | cpumask_var_t old; | ||
194 | |||
195 | if (!cmci_supported(&banks)) | ||
196 | return; | ||
197 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) | ||
198 | return; | ||
199 | cpumask_copy(old, ¤t->cpus_allowed); | ||
200 | |||
201 | for_each_online_cpu(cpu) { | ||
202 | if (cpu == dying) | ||
203 | continue; | ||
204 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | ||
205 | continue; | ||
206 | /* Recheck banks in case CPUs don't all have the same */ | ||
207 | if (cmci_supported(&banks)) | ||
208 | cmci_discover(banks, 0); | ||
209 | } | ||
210 | |||
211 | set_cpus_allowed_ptr(current, old); | ||
212 | free_cpumask_var(old); | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Reenable CMCI on this CPU in case a CPU down failed. | ||
217 | */ | ||
218 | void cmci_reenable(void) | ||
219 | { | ||
220 | int banks; | ||
221 | if (cmci_supported(&banks)) | ||
222 | cmci_discover(banks, 0); | ||
223 | } | ||
224 | |||
225 | static void intel_init_cmci(void) | ||
226 | { | ||
227 | int banks; | ||
228 | |||
229 | if (!cmci_supported(&banks)) | ||
230 | return; | ||
231 | |||
232 | mce_threshold_vector = intel_threshold_interrupt; | ||
233 | cmci_discover(banks, 1); | ||
234 | /* | ||
235 | * For CPU #0 this runs with still disabled APIC, but that's | ||
236 | * ok because only the vector is set up. We still do another | ||
237 | * check for the banks later for CPU #0 just to make sure | ||
238 | * to not miss any events. | ||
239 | */ | ||
240 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); | ||
241 | cmci_recheck(); | ||
242 | } | ||
243 | |||
244 | void mce_intel_feature_init(struct cpuinfo_x86 *c) | ||
245 | { | ||
246 | intel_init_thermal(c); | ||
247 | intel_init_cmci(); | ||
248 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index 70b710420f74..f5f2d6f71fb6 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c | |||
@@ -17,10 +17,9 @@ | |||
17 | 17 | ||
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <asm/system.h> | 19 | #include <asm/system.h> |
20 | #include <asm/mce.h> | ||
20 | #include <asm/msr.h> | 21 | #include <asm/msr.h> |
21 | 22 | ||
22 | #include "mce.h" | ||
23 | |||
24 | static int firstbank; | 23 | static int firstbank; |
25 | 24 | ||
26 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ | 25 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ |
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 82cee108a2d3..4482aea9aa2e 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
@@ -1,21 +1,15 @@ | |||
1 | /* | 1 | /* |
2 | * P4 specific Machine Check Exception Reporting | 2 | * P4 specific Machine Check Exception Reporting |
3 | */ | 3 | */ |
4 | |||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
7 | #include <linux/types.h> | 5 | #include <linux/types.h> |
8 | #include <linux/init.h> | 6 | #include <linux/init.h> |
9 | #include <linux/smp.h> | 7 | #include <linux/smp.h> |
10 | 8 | ||
11 | #include <asm/therm_throt.h> | ||
12 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
13 | #include <asm/system.h> | 10 | #include <asm/mce.h> |
14 | #include <asm/apic.h> | ||
15 | #include <asm/msr.h> | 11 | #include <asm/msr.h> |
16 | 12 | ||
17 | #include "mce.h" | ||
18 | |||
19 | /* as supported by the P4/Xeon family */ | 13 | /* as supported by the P4/Xeon family */ |
20 | struct intel_mce_extended_msrs { | 14 | struct intel_mce_extended_msrs { |
21 | u32 eax; | 15 | u32 eax; |
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs { | |||
33 | 27 | ||
34 | static int mce_num_extended_msrs; | 28 | static int mce_num_extended_msrs; |
35 | 29 | ||
36 | |||
37 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
38 | |||
39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) | ||
40 | { | ||
41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
42 | smp_processor_id()); | ||
43 | add_taint(TAINT_MACHINE_CHECK); | ||
44 | } | ||
45 | |||
46 | /* P4/Xeon Thermal transition interrupt handler: */ | ||
47 | static void intel_thermal_interrupt(struct pt_regs *regs) | ||
48 | { | ||
49 | __u64 msr_val; | ||
50 | |||
51 | ack_APIC_irq(); | ||
52 | |||
53 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
54 | therm_throt_process(msr_val & THERM_STATUS_PROCHOT); | ||
55 | } | ||
56 | |||
57 | /* Thermal interrupt handler for this CPU setup: */ | ||
58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = | ||
59 | unexpected_thermal_interrupt; | ||
60 | |||
61 | void smp_thermal_interrupt(struct pt_regs *regs) | ||
62 | { | ||
63 | irq_enter(); | ||
64 | vendor_thermal_interrupt(regs); | ||
65 | __get_cpu_var(irq_stat).irq_thermal_count++; | ||
66 | irq_exit(); | ||
67 | } | ||
68 | |||
69 | void intel_set_thermal_handler(void) | ||
70 | { | ||
71 | vendor_thermal_interrupt = intel_thermal_interrupt; | ||
72 | } | ||
73 | |||
74 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | ||
75 | |||
76 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | 30 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ |
77 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | 31 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) |
78 | { | 32 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 015f481ab1b0..5c0e6533d9bc 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -10,12 +10,11 @@ | |||
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | 15 | ||
15 | #include "mce.h" | ||
16 | |||
17 | /* By default disabled */ | 16 | /* By default disabled */ |
18 | int mce_p5_enable; | 17 | int mce_p5_enabled __read_mostly; |
19 | 18 | ||
20 | /* Machine check handler for Pentium class Intel CPUs: */ | 19 | /* Machine check handler for Pentium class Intel CPUs: */ |
21 | static void pentium_machine_check(struct pt_regs *regs, long error_code) | 20 | static void pentium_machine_check(struct pt_regs *regs, long error_code) |
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) | |||
43 | { | 42 | { |
44 | u32 l, h; | 43 | u32 l, h; |
45 | 44 | ||
46 | /* Check for MCE support: */ | 45 | /* Default P5 to off as its often misconnected: */ |
47 | if (!cpu_has(c, X86_FEATURE_MCE)) | 46 | if (!mce_p5_enabled) |
48 | return; | 47 | return; |
49 | 48 | ||
50 | #ifdef CONFIG_X86_OLD_MCE | 49 | /* Check for MCE support: */ |
51 | /* Default P5 to off as its often misconnected: */ | 50 | if (!cpu_has(c, X86_FEATURE_MCE)) |
52 | if (mce_disabled != -1) | ||
53 | return; | 51 | return; |
54 | #endif | ||
55 | 52 | ||
56 | machine_check_vector = pentium_machine_check; | 53 | machine_check_vector = pentium_machine_check; |
57 | /* Make sure the vector pointer is visible before we enable MCEs: */ | 54 | /* Make sure the vector pointer is visible before we enable MCEs: */ |
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 43c24e667457..01e4f8178183 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c | |||
@@ -10,10 +10,9 @@ | |||
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | 15 | ||
15 | #include "mce.h" | ||
16 | |||
17 | /* Machine Check Handler For PII/PIII */ | 16 | /* Machine Check Handler For PII/PIII */ |
18 | static void intel_machine_check(struct pt_regs *regs, long error_code) | 17 | static void intel_machine_check(struct pt_regs *regs, long error_code) |
19 | { | 18 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7b1ae2e20ba5..bff8dd191dd5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -13,13 +13,23 @@ | |||
13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | 13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. |
14 | * Inspired by Ross Biro's and Al Borchers' counter code. | 14 | * Inspired by Ross Biro's and Al Borchers' counter code. |
15 | */ | 15 | */ |
16 | #include <linux/interrupt.h> | ||
16 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
17 | #include <linux/jiffies.h> | 18 | #include <linux/jiffies.h> |
19 | #include <linux/kernel.h> | ||
18 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
19 | #include <linux/sysdev.h> | 21 | #include <linux/sysdev.h> |
22 | #include <linux/types.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/smp.h> | ||
20 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
21 | 26 | ||
22 | #include <asm/therm_throt.h> | 27 | #include <asm/processor.h> |
28 | #include <asm/system.h> | ||
29 | #include <asm/apic.h> | ||
30 | #include <asm/idle.h> | ||
31 | #include <asm/mce.h> | ||
32 | #include <asm/msr.h> | ||
23 | 33 | ||
24 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
25 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
@@ -27,7 +37,7 @@ | |||
27 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | 37 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; |
28 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | 38 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); |
29 | 39 | ||
30 | atomic_t therm_throt_en = ATOMIC_INIT(0); | 40 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
31 | 41 | ||
32 | #ifdef CONFIG_SYSFS | 42 | #ifdef CONFIG_SYSFS |
33 | #define define_therm_throt_sysdev_one_ro(_name) \ | 43 | #define define_therm_throt_sysdev_one_ro(_name) \ |
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
82 | * 1 : Event should be logged further, and a message has been | 92 | * 1 : Event should be logged further, and a message has been |
83 | * printed to the syslog. | 93 | * printed to the syslog. |
84 | */ | 94 | */ |
85 | int therm_throt_process(int curr) | 95 | static int therm_throt_process(int curr) |
86 | { | 96 | { |
87 | unsigned int cpu = smp_processor_id(); | 97 | unsigned int cpu = smp_processor_id(); |
88 | __u64 tmp_jiffs = get_jiffies_64(); | 98 | __u64 tmp_jiffs = get_jiffies_64(); |
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void) | |||
186 | 196 | ||
187 | return 0; | 197 | return 0; |
188 | } | 198 | } |
189 | |||
190 | device_initcall(thermal_throttle_init_device); | 199 | device_initcall(thermal_throttle_init_device); |
200 | |||
191 | #endif /* CONFIG_SYSFS */ | 201 | #endif /* CONFIG_SYSFS */ |
202 | |||
203 | /* Thermal transition interrupt handler */ | ||
204 | static void intel_thermal_interrupt(void) | ||
205 | { | ||
206 | __u64 msr_val; | ||
207 | |||
208 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
209 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) | ||
210 | mce_log_therm_throt_event(msr_val); | ||
211 | } | ||
212 | |||
213 | static void unexpected_thermal_interrupt(void) | ||
214 | { | ||
215 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
216 | smp_processor_id()); | ||
217 | add_taint(TAINT_MACHINE_CHECK); | ||
218 | } | ||
219 | |||
220 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | ||
221 | |||
222 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | ||
223 | { | ||
224 | exit_idle(); | ||
225 | irq_enter(); | ||
226 | inc_irq_stat(irq_thermal_count); | ||
227 | smp_thermal_vector(); | ||
228 | irq_exit(); | ||
229 | /* Ack only at the end to avoid potential reentry */ | ||
230 | ack_APIC_irq(); | ||
231 | } | ||
232 | |||
233 | void intel_init_thermal(struct cpuinfo_x86 *c) | ||
234 | { | ||
235 | unsigned int cpu = smp_processor_id(); | ||
236 | int tm2 = 0; | ||
237 | u32 l, h; | ||
238 | |||
239 | /* Thermal monitoring depends on ACPI and clock modulation*/ | ||
240 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | ||
241 | return; | ||
242 | |||
243 | /* | ||
244 | * First check if its enabled already, in which case there might | ||
245 | * be some SMM goo which handles it, so we can't even put a handler | ||
246 | * since it might be delivered via SMI already: | ||
247 | */ | ||
248 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
249 | h = apic_read(APIC_LVTTHMR); | ||
250 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
251 | printk(KERN_DEBUG | ||
252 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | ||
253 | return; | ||
254 | } | ||
255 | |||
256 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
257 | tm2 = 1; | ||
258 | |||
259 | /* Check whether a vector already exists */ | ||
260 | if (h & APIC_VECTOR_MASK) { | ||
261 | printk(KERN_DEBUG | ||
262 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | ||
263 | cpu, (h & APIC_VECTOR_MASK)); | ||
264 | return; | ||
265 | } | ||
266 | |||
267 | /* We'll mask the thermal vector in the lapic till we're ready: */ | ||
268 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | ||
269 | apic_write(APIC_LVTTHMR, h); | ||
270 | |||
271 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
272 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
273 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
274 | |||
275 | smp_thermal_vector = intel_thermal_interrupt; | ||
276 | |||
277 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
278 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
279 | |||
280 | /* Unmask the thermal vector: */ | ||
281 | l = apic_read(APIC_LVTTHMR); | ||
282 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
283 | |||
284 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | ||
285 | cpu, tm2 ? "TM2" : "TM1"); | ||
286 | |||
287 | /* enable thermal throttle processing */ | ||
288 | atomic_set(&therm_throt_en, 1); | ||
289 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 81b02487090b..54060f565974 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -9,10 +9,9 @@ | |||
9 | 9 | ||
10 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
11 | #include <asm/system.h> | 11 | #include <asm/system.h> |
12 | #include <asm/mce.h> | ||
12 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
13 | 14 | ||
14 | #include "mce.h" | ||
15 | |||
16 | /* Machine check handler for WinChip C6: */ | 15 | /* Machine check handler for WinChip C6: */ |
17 | static void winchip_machine_check(struct pt_regs *regs, long error_code) | 16 | static void winchip_machine_check(struct pt_regs *regs, long error_code) |
18 | { | 17 | { |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d6f5b9fbde32..5c481f6205bf 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -716,11 +716,15 @@ static void probe_nmi_watchdog(void) | |||
716 | wd_ops = &k7_wd_ops; | 716 | wd_ops = &k7_wd_ops; |
717 | break; | 717 | break; |
718 | case X86_VENDOR_INTEL: | 718 | case X86_VENDOR_INTEL: |
719 | /* | 719 | /* Work around where perfctr1 doesn't have a working enable |
720 | * Work around Core Duo (Yonah) errata AE49 where perfctr1 | 720 | * bit as described in the following errata: |
721 | * doesn't have a working enable bit. | 721 | * AE49 Core Duo and Intel Core Solo 65 nm |
722 | * AN49 Intel Pentium Dual-Core | ||
723 | * AF49 Dual-Core Intel Xeon Processor LV | ||
722 | */ | 724 | */ |
723 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { | 725 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || |
726 | ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && | ||
727 | boot_cpu_data.x86_mask == 4))) { | ||
724 | intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; | 728 | intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; |
725 | intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; | 729 | intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; |
726 | } | 730 | } |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 2ac1f0c2beb3..b07af8861244 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -182,6 +182,11 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier = | |||
182 | .notifier_call = cpuid_class_cpu_callback, | 182 | .notifier_call = cpuid_class_cpu_callback, |
183 | }; | 183 | }; |
184 | 184 | ||
185 | static char *cpuid_nodename(struct device *dev) | ||
186 | { | ||
187 | return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); | ||
188 | } | ||
189 | |||
185 | static int __init cpuid_init(void) | 190 | static int __init cpuid_init(void) |
186 | { | 191 | { |
187 | int i, err = 0; | 192 | int i, err = 0; |
@@ -198,6 +203,7 @@ static int __init cpuid_init(void) | |||
198 | err = PTR_ERR(cpuid_class); | 203 | err = PTR_ERR(cpuid_class); |
199 | goto out_chrdev; | 204 | goto out_chrdev; |
200 | } | 205 | } |
206 | cpuid_class->nodename = cpuid_nodename; | ||
201 | for_each_online_cpu(i) { | 207 | for_each_online_cpu(i) { |
202 | err = cpuid_device_create(i); | 208 | err = cpuid_device_create(i); |
203 | if (err != 0) | 209 | if (err != 0) |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index ff958248e61d..5e409dc298a4 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <asm/cpu.h> | 27 | #include <asm/cpu.h> |
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | 29 | #include <asm/virtext.h> |
30 | #include <asm/iommu.h> | ||
30 | 31 | ||
31 | 32 | ||
32 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
@@ -103,5 +104,10 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
103 | #ifdef CONFIG_HPET_TIMER | 104 | #ifdef CONFIG_HPET_TIMER |
104 | hpet_disable(); | 105 | hpet_disable(); |
105 | #endif | 106 | #endif |
107 | |||
108 | #ifdef CONFIG_X86_64 | ||
109 | pci_iommu_shutdown(); | ||
110 | #endif | ||
111 | |||
106 | crash_save_cpu(regs, safe_smp_processor_id()); | 112 | crash_save_cpu(regs, safe_smp_processor_id()); |
107 | } | 113 | } |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1736acc4d7aa..96f7ac0bbf01 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -240,10 +240,35 @@ static void __init do_add_efi_memmap(void) | |||
240 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; | 240 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; |
241 | int e820_type; | 241 | int e820_type; |
242 | 242 | ||
243 | if (md->attribute & EFI_MEMORY_WB) | 243 | switch (md->type) { |
244 | e820_type = E820_RAM; | 244 | case EFI_LOADER_CODE: |
245 | else | 245 | case EFI_LOADER_DATA: |
246 | case EFI_BOOT_SERVICES_CODE: | ||
247 | case EFI_BOOT_SERVICES_DATA: | ||
248 | case EFI_CONVENTIONAL_MEMORY: | ||
249 | if (md->attribute & EFI_MEMORY_WB) | ||
250 | e820_type = E820_RAM; | ||
251 | else | ||
252 | e820_type = E820_RESERVED; | ||
253 | break; | ||
254 | case EFI_ACPI_RECLAIM_MEMORY: | ||
255 | e820_type = E820_ACPI; | ||
256 | break; | ||
257 | case EFI_ACPI_MEMORY_NVS: | ||
258 | e820_type = E820_NVS; | ||
259 | break; | ||
260 | case EFI_UNUSABLE_MEMORY: | ||
261 | e820_type = E820_UNUSABLE; | ||
262 | break; | ||
263 | default: | ||
264 | /* | ||
265 | * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE | ||
266 | * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO | ||
267 | * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE | ||
268 | */ | ||
246 | e820_type = E820_RESERVED; | 269 | e820_type = E820_RESERVED; |
270 | break; | ||
271 | } | ||
247 | e820_add_region(start, size, e820_type); | 272 | e820_add_region(start, size, e820_type); |
248 | } | 273 | } |
249 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 274 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 0d4b28564c14..c097e7d607c6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -48,7 +48,6 @@ | |||
48 | #include <asm/segment.h> | 48 | #include <asm/segment.h> |
49 | #include <asm/smp.h> | 49 | #include <asm/smp.h> |
50 | #include <asm/page_types.h> | 50 | #include <asm/page_types.h> |
51 | #include <asm/desc.h> | ||
52 | #include <asm/percpu.h> | 51 | #include <asm/percpu.h> |
53 | #include <asm/dwarf2.h> | 52 | #include <asm/dwarf2.h> |
54 | #include <asm/processor-flags.h> | 53 | #include <asm/processor-flags.h> |
@@ -84,7 +83,7 @@ | |||
84 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF | 83 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF |
85 | #else | 84 | #else |
86 | #define preempt_stop(clobbers) | 85 | #define preempt_stop(clobbers) |
87 | #define resume_kernel restore_nocheck | 86 | #define resume_kernel restore_all |
88 | #endif | 87 | #endif |
89 | 88 | ||
90 | .macro TRACE_IRQS_IRET | 89 | .macro TRACE_IRQS_IRET |
@@ -372,7 +371,7 @@ END(ret_from_exception) | |||
372 | ENTRY(resume_kernel) | 371 | ENTRY(resume_kernel) |
373 | DISABLE_INTERRUPTS(CLBR_ANY) | 372 | DISABLE_INTERRUPTS(CLBR_ANY) |
374 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? | 373 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? |
375 | jnz restore_nocheck | 374 | jnz restore_all |
376 | need_resched: | 375 | need_resched: |
377 | movl TI_flags(%ebp), %ecx # need_resched set ? | 376 | movl TI_flags(%ebp), %ecx # need_resched set ? |
378 | testb $_TIF_NEED_RESCHED, %cl | 377 | testb $_TIF_NEED_RESCHED, %cl |
@@ -540,6 +539,8 @@ syscall_exit: | |||
540 | jne syscall_exit_work | 539 | jne syscall_exit_work |
541 | 540 | ||
542 | restore_all: | 541 | restore_all: |
542 | TRACE_IRQS_IRET | ||
543 | restore_all_notrace: | ||
543 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS | 544 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS |
544 | # Warning: PT_OLDSS(%esp) contains the wrong/random values if we | 545 | # Warning: PT_OLDSS(%esp) contains the wrong/random values if we |
545 | # are returning to the kernel. | 546 | # are returning to the kernel. |
@@ -551,8 +552,6 @@ restore_all: | |||
551 | CFI_REMEMBER_STATE | 552 | CFI_REMEMBER_STATE |
552 | je ldt_ss # returning to user-space with LDT SS | 553 | je ldt_ss # returning to user-space with LDT SS |
553 | restore_nocheck: | 554 | restore_nocheck: |
554 | TRACE_IRQS_IRET | ||
555 | restore_nocheck_notrace: | ||
556 | RESTORE_REGS 4 # skip orig_eax/error_code | 555 | RESTORE_REGS 4 # skip orig_eax/error_code |
557 | CFI_ADJUST_CFA_OFFSET -4 | 556 | CFI_ADJUST_CFA_OFFSET -4 |
558 | irq_return: | 557 | irq_return: |
@@ -588,22 +587,34 @@ ldt_ss: | |||
588 | jne restore_nocheck | 587 | jne restore_nocheck |
589 | #endif | 588 | #endif |
590 | 589 | ||
591 | /* If returning to userspace with 16bit stack, | 590 | /* |
592 | * try to fix the higher word of ESP, as the CPU | 591 | * Setup and switch to ESPFIX stack |
593 | * won't restore it. | 592 | * |
594 | * This is an "official" bug of all the x86-compatible | 593 | * We're returning to userspace with a 16 bit stack. The CPU will not |
595 | * CPUs, which we can try to work around to make | 594 | * restore the high word of ESP for us on executing iret... This is an |
596 | * dosemu and wine happy. */ | 595 | * "official" bug of all the x86-compatible CPUs, which we can work |
597 | movl PT_OLDESP(%esp), %eax | 596 | * around to make dosemu and wine happy. We do this by preloading the |
598 | movl %esp, %edx | 597 | * high word of ESP with the high word of the userspace ESP while |
599 | call patch_espfix_desc | 598 | * compensating for the offset by changing to the ESPFIX segment with |
599 | * a base address that matches for the difference. | ||
600 | */ | ||
601 | mov %esp, %edx /* load kernel esp */ | ||
602 | mov PT_OLDESP(%esp), %eax /* load userspace esp */ | ||
603 | mov %dx, %ax /* eax: new kernel esp */ | ||
604 | sub %eax, %edx /* offset (low word is 0) */ | ||
605 | PER_CPU(gdt_page, %ebx) | ||
606 | shr $16, %edx | ||
607 | mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ | ||
608 | mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ | ||
600 | pushl $__ESPFIX_SS | 609 | pushl $__ESPFIX_SS |
601 | CFI_ADJUST_CFA_OFFSET 4 | 610 | CFI_ADJUST_CFA_OFFSET 4 |
602 | pushl %eax | 611 | push %eax /* new kernel esp */ |
603 | CFI_ADJUST_CFA_OFFSET 4 | 612 | CFI_ADJUST_CFA_OFFSET 4 |
613 | /* Disable interrupts, but do not irqtrace this section: we | ||
614 | * will soon execute iret and the tracer was already set to | ||
615 | * the irqstate after the iret */ | ||
604 | DISABLE_INTERRUPTS(CLBR_EAX) | 616 | DISABLE_INTERRUPTS(CLBR_EAX) |
605 | TRACE_IRQS_OFF | 617 | lss (%esp), %esp /* switch to espfix segment */ |
606 | lss (%esp), %esp | ||
607 | CFI_ADJUST_CFA_OFFSET -8 | 618 | CFI_ADJUST_CFA_OFFSET -8 |
608 | jmp restore_nocheck | 619 | jmp restore_nocheck |
609 | CFI_ENDPROC | 620 | CFI_ENDPROC |
@@ -716,15 +727,24 @@ PTREGSCALL(vm86) | |||
716 | PTREGSCALL(vm86old) | 727 | PTREGSCALL(vm86old) |
717 | 728 | ||
718 | .macro FIXUP_ESPFIX_STACK | 729 | .macro FIXUP_ESPFIX_STACK |
719 | /* since we are on a wrong stack, we cant make it a C code :( */ | 730 | /* |
731 | * Switch back for ESPFIX stack to the normal zerobased stack | ||
732 | * | ||
733 | * We can't call C functions using the ESPFIX stack. This code reads | ||
734 | * the high word of the segment base from the GDT and swiches to the | ||
735 | * normal stack and adjusts ESP with the matching offset. | ||
736 | */ | ||
737 | /* fixup the stack */ | ||
720 | PER_CPU(gdt_page, %ebx) | 738 | PER_CPU(gdt_page, %ebx) |
721 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | 739 | mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ |
722 | addl %esp, %eax | 740 | mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */ |
741 | shl $16, %eax | ||
742 | addl %esp, %eax /* the adjusted stack pointer */ | ||
723 | pushl $__KERNEL_DS | 743 | pushl $__KERNEL_DS |
724 | CFI_ADJUST_CFA_OFFSET 4 | 744 | CFI_ADJUST_CFA_OFFSET 4 |
725 | pushl %eax | 745 | pushl %eax |
726 | CFI_ADJUST_CFA_OFFSET 4 | 746 | CFI_ADJUST_CFA_OFFSET 4 |
727 | lss (%esp), %esp | 747 | lss (%esp), %esp /* switch to the normal stack segment */ |
728 | CFI_ADJUST_CFA_OFFSET -8 | 748 | CFI_ADJUST_CFA_OFFSET -8 |
729 | .endm | 749 | .endm |
730 | .macro UNWIND_ESPFIX_STACK | 750 | .macro UNWIND_ESPFIX_STACK |
@@ -1331,7 +1351,7 @@ nmi_stack_correct: | |||
1331 | xorl %edx,%edx # zero error code | 1351 | xorl %edx,%edx # zero error code |
1332 | movl %esp,%eax # pt_regs pointer | 1352 | movl %esp,%eax # pt_regs pointer |
1333 | call do_nmi | 1353 | call do_nmi |
1334 | jmp restore_nocheck_notrace | 1354 | jmp restore_all_notrace |
1335 | CFI_ENDPROC | 1355 | CFI_ENDPROC |
1336 | 1356 | ||
1337 | nmi_stack_fixup: | 1357 | nmi_stack_fixup: |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index dc5ed4bdd88d..8663afb56535 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <asm/segment.h> | 13 | #include <asm/segment.h> |
14 | #include <asm/page_types.h> | 14 | #include <asm/page_types.h> |
15 | #include <asm/pgtable_types.h> | 15 | #include <asm/pgtable_types.h> |
16 | #include <asm/desc.h> | ||
17 | #include <asm/cache.h> | 16 | #include <asm/cache.h> |
18 | #include <asm/thread_info.h> | 17 | #include <asm/thread_info.h> |
19 | #include <asm/asm-offsets.h> | 18 | #include <asm/asm-offsets.h> |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 54b29bb24e71..fa54f78e2a05 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/linkage.h> | 12 | #include <linux/linkage.h> |
13 | #include <linux/threads.h> | 13 | #include <linux/threads.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <asm/desc.h> | ||
16 | #include <asm/segment.h> | 15 | #include <asm/segment.h> |
17 | #include <asm/pgtable.h> | 16 | #include <asm/pgtable.h> |
18 | #include <asm/page.h> | 17 | #include <asm/page.h> |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 81408b93f887..dedc2bddf7a5 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -510,7 +510,8 @@ static int hpet_setup_irq(struct hpet_dev *dev) | |||
510 | { | 510 | { |
511 | 511 | ||
512 | if (request_irq(dev->irq, hpet_interrupt_handler, | 512 | if (request_irq(dev->irq, hpet_interrupt_handler, |
513 | IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev)) | 513 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, |
514 | dev->name, dev)) | ||
514 | return -1; | 515 | return -1; |
515 | 516 | ||
516 | disable_irq(dev->irq); | 517 | disable_irq(dev->irq); |
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index c2e0bb0890d4..5cf36c053ac4 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/spinlock.h> | 7 | #include <linux/spinlock.h> |
8 | #include <linux/jiffies.h> | 8 | #include <linux/jiffies.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/timex.h> | ||
10 | #include <linux/delay.h> | 11 | #include <linux/delay.h> |
11 | #include <linux/init.h> | 12 | #include <linux/init.h> |
12 | #include <linux/io.h> | 13 | #include <linux/io.h> |
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index df3bf269beab..270ff83efc11 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
@@ -12,7 +12,6 @@ | |||
12 | 12 | ||
13 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | 13 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); |
14 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | 14 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); |
15 | struct mm_struct init_mm = INIT_MM(init_mm); | ||
16 | 15 | ||
17 | /* | 16 | /* |
18 | * Initial thread structure. | 17 | * Initial thread structure. |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 9c4461501fcb..9371448290ac 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -236,6 +236,7 @@ static const struct file_operations microcode_fops = { | |||
236 | static struct miscdevice microcode_dev = { | 236 | static struct miscdevice microcode_dev = { |
237 | .minor = MICROCODE_MINOR, | 237 | .minor = MICROCODE_MINOR, |
238 | .name = "microcode", | 238 | .name = "microcode", |
239 | .devnode = "cpu/microcode", | ||
239 | .fops = µcode_fops, | 240 | .fops = µcode_fops, |
240 | }; | 241 | }; |
241 | 242 | ||
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 3cf3413ec626..98fd6cd4e3a4 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -196,6 +196,11 @@ static struct notifier_block __refdata msr_class_cpu_notifier = { | |||
196 | .notifier_call = msr_class_cpu_callback, | 196 | .notifier_call = msr_class_cpu_callback, |
197 | }; | 197 | }; |
198 | 198 | ||
199 | static char *msr_nodename(struct device *dev) | ||
200 | { | ||
201 | return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); | ||
202 | } | ||
203 | |||
199 | static int __init msr_init(void) | 204 | static int __init msr_init(void) |
200 | { | 205 | { |
201 | int i, err = 0; | 206 | int i, err = 0; |
@@ -212,6 +217,7 @@ static int __init msr_init(void) | |||
212 | err = PTR_ERR(msr_class); | 217 | err = PTR_ERR(msr_class); |
213 | goto out_chrdev; | 218 | goto out_chrdev; |
214 | } | 219 | } |
220 | msr_class->nodename = msr_nodename; | ||
215 | for_each_online_cpu(i) { | 221 | for_each_online_cpu(i) { |
216 | err = msr_device_create(i); | 222 | err = msr_device_create(i); |
217 | if (err != 0) | 223 | if (err != 0) |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 745579bc8256..328592fb6044 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -290,6 +290,8 @@ static int __init pci_iommu_init(void) | |||
290 | void pci_iommu_shutdown(void) | 290 | void pci_iommu_shutdown(void) |
291 | { | 291 | { |
292 | gart_iommu_shutdown(); | 292 | gart_iommu_shutdown(); |
293 | |||
294 | amd_iommu_shutdown(); | ||
293 | } | 295 | } |
294 | /* Must execute after PCI subsystem */ | 296 | /* Must execute after PCI subsystem */ |
295 | fs_initcall(pci_iommu_init); | 297 | fs_initcall(pci_iommu_init); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3bb2be1649bd..994dd6a4a2a0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -63,7 +63,7 @@ void arch_task_cache_init(void) | |||
63 | task_xstate_cachep = | 63 | task_xstate_cachep = |
64 | kmem_cache_create("task_xstate", xstate_size, | 64 | kmem_cache_create("task_xstate", xstate_size, |
65 | __alignof__(union thread_xstate), | 65 | __alignof__(union thread_xstate), |
66 | SLAB_PANIC, NULL); | 66 | SLAB_PANIC | SLAB_NOTRACK, NULL); |
67 | } | 67 | } |
68 | 68 | ||
69 | /* | 69 | /* |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 4aaf7e48394f..c3eb207181fe 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -77,6 +77,13 @@ void save_stack_trace(struct stack_trace *trace) | |||
77 | } | 77 | } |
78 | EXPORT_SYMBOL_GPL(save_stack_trace); | 78 | EXPORT_SYMBOL_GPL(save_stack_trace); |
79 | 79 | ||
80 | void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) | ||
81 | { | ||
82 | dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); | ||
83 | if (trace->nr_entries < trace->max_entries) | ||
84 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
85 | } | ||
86 | |||
80 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 87 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
81 | { | 88 | { |
82 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); | 89 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1e1e27b7d438..a0f48f5671c0 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/edac.h> | 45 | #include <linux/edac.h> |
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | #include <asm/kmemcheck.h> | ||
48 | #include <asm/stacktrace.h> | 49 | #include <asm/stacktrace.h> |
49 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
50 | #include <asm/debugreg.h> | 51 | #include <asm/debugreg.h> |
@@ -53,6 +54,7 @@ | |||
53 | #include <asm/traps.h> | 54 | #include <asm/traps.h> |
54 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
55 | #include <asm/i387.h> | 56 | #include <asm/i387.h> |
57 | #include <asm/mce.h> | ||
56 | 58 | ||
57 | #include <asm/mach_traps.h> | 59 | #include <asm/mach_traps.h> |
58 | 60 | ||
@@ -64,8 +66,6 @@ | |||
64 | #include <asm/setup.h> | 66 | #include <asm/setup.h> |
65 | #include <asm/traps.h> | 67 | #include <asm/traps.h> |
66 | 68 | ||
67 | #include "cpu/mcheck/mce.h" | ||
68 | |||
69 | asmlinkage int system_call(void); | 69 | asmlinkage int system_call(void); |
70 | 70 | ||
71 | /* Do we ignore FPU interrupts ? */ | 71 | /* Do we ignore FPU interrupts ? */ |
@@ -534,6 +534,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
534 | 534 | ||
535 | get_debugreg(condition, 6); | 535 | get_debugreg(condition, 6); |
536 | 536 | ||
537 | /* Catch kmemcheck conditions first of all! */ | ||
538 | if (condition & DR_STEP && kmemcheck_trap(regs)) | ||
539 | return; | ||
540 | |||
537 | /* | 541 | /* |
538 | * The processor cleared BTF, so don't mark that we need it set. | 542 | * The processor cleared BTF, so don't mark that we need it set. |
539 | */ | 543 | */ |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3e1c057e98fe..b0597ad02c93 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/delay.h> | 9 | #include <linux/delay.h> |
10 | #include <linux/clocksource.h> | 10 | #include <linux/clocksource.h> |
11 | #include <linux/percpu.h> | 11 | #include <linux/percpu.h> |
12 | #include <linux/timex.h> | ||
12 | 13 | ||
13 | #include <asm/hpet.h> | 14 | #include <asm/hpet.h> |
14 | #include <asm/timer.h> | 15 | #include <asm/timer.h> |
@@ -631,17 +632,15 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
631 | void *data) | 632 | void *data) |
632 | { | 633 | { |
633 | struct cpufreq_freqs *freq = data; | 634 | struct cpufreq_freqs *freq = data; |
634 | unsigned long *lpj, dummy; | 635 | unsigned long *lpj; |
635 | 636 | ||
636 | if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) | 637 | if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) |
637 | return 0; | 638 | return 0; |
638 | 639 | ||
639 | lpj = &dummy; | 640 | lpj = &boot_cpu_data.loops_per_jiffy; |
640 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
641 | #ifdef CONFIG_SMP | 641 | #ifdef CONFIG_SMP |
642 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
642 | lpj = &cpu_data(freq->cpu).loops_per_jiffy; | 643 | lpj = &cpu_data(freq->cpu).loops_per_jiffy; |
643 | #else | ||
644 | lpj = &boot_cpu_data.loops_per_jiffy; | ||
645 | #endif | 644 | #endif |
646 | 645 | ||
647 | if (!ref_freq) { | 646 | if (!ref_freq) { |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 32d6ae8fb60e..e770bf349ec4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -1277,7 +1277,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) | |||
1277 | struct page *pages; | 1277 | struct page *pages; |
1278 | struct vmcs *vmcs; | 1278 | struct vmcs *vmcs; |
1279 | 1279 | ||
1280 | pages = alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); | 1280 | pages = alloc_pages_exact_node(node, GFP_KERNEL, vmcs_config.order); |
1281 | if (!pages) | 1281 | if (!pages) |
1282 | return NULL; | 1282 | return NULL; |
1283 | vmcs = page_address(pages); | 1283 | vmcs = page_address(pages); |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fdd30d08ab52..eefdeee8a871 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -10,6 +10,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o | |||
10 | 10 | ||
11 | obj-$(CONFIG_HIGHMEM) += highmem_32.o | 11 | obj-$(CONFIG_HIGHMEM) += highmem_32.o |
12 | 12 | ||
13 | obj-$(CONFIG_KMEMCHECK) += kmemcheck/ | ||
14 | |||
13 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o | 15 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o |
14 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o | 16 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o |
15 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 17 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c6acc6326374..c403526d5d15 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -14,6 +14,7 @@ | |||
14 | 14 | ||
15 | #include <asm/traps.h> /* dotraplinkage, ... */ | 15 | #include <asm/traps.h> /* dotraplinkage, ... */ |
16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
17 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ | ||
17 | 18 | ||
18 | /* | 19 | /* |
19 | * Page fault error code bits: | 20 | * Page fault error code bits: |
@@ -951,11 +952,17 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
951 | tsk = current; | 952 | tsk = current; |
952 | mm = tsk->mm; | 953 | mm = tsk->mm; |
953 | 954 | ||
954 | prefetchw(&mm->mmap_sem); | ||
955 | |||
956 | /* Get the faulting address: */ | 955 | /* Get the faulting address: */ |
957 | address = read_cr2(); | 956 | address = read_cr2(); |
958 | 957 | ||
958 | /* | ||
959 | * Detect and handle instructions that would cause a page fault for | ||
960 | * both a tracked kernel page and a userspace page. | ||
961 | */ | ||
962 | if (kmemcheck_active(regs)) | ||
963 | kmemcheck_hide(regs); | ||
964 | prefetchw(&mm->mmap_sem); | ||
965 | |||
959 | if (unlikely(kmmio_fault(regs, address))) | 966 | if (unlikely(kmmio_fault(regs, address))) |
960 | return; | 967 | return; |
961 | 968 | ||
@@ -973,9 +980,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
973 | * protection error (error_code & 9) == 0. | 980 | * protection error (error_code & 9) == 0. |
974 | */ | 981 | */ |
975 | if (unlikely(fault_in_kernel_space(address))) { | 982 | if (unlikely(fault_in_kernel_space(address))) { |
976 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | 983 | if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { |
977 | vmalloc_fault(address) >= 0) | 984 | if (vmalloc_fault(address) >= 0) |
978 | return; | 985 | return; |
986 | |||
987 | if (kmemcheck_fault(regs, address, error_code)) | ||
988 | return; | ||
989 | } | ||
979 | 990 | ||
980 | /* Can handle a stale RO->RW TLB: */ | 991 | /* Can handle a stale RO->RW TLB: */ |
981 | if (spurious_fault(error_code, address)) | 992 | if (spurious_fault(error_code, address)) |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 6340cef6798a..f97480941269 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -247,10 +247,15 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
247 | start &= PAGE_MASK; | 247 | start &= PAGE_MASK; |
248 | addr = start; | 248 | addr = start; |
249 | len = (unsigned long) nr_pages << PAGE_SHIFT; | 249 | len = (unsigned long) nr_pages << PAGE_SHIFT; |
250 | |||
250 | end = start + len; | 251 | end = start + len; |
251 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | 252 | if (end < start) |
252 | (void __user *)start, len))) | 253 | goto slow_irqon; |
254 | |||
255 | #ifdef CONFIG_X86_64 | ||
256 | if (end >> __VIRTUAL_MASK_SHIFT) | ||
253 | goto slow_irqon; | 257 | goto slow_irqon; |
258 | #endif | ||
254 | 259 | ||
255 | /* | 260 | /* |
256 | * XXX: batch / limit 'nr', to avoid large irq off latency | 261 | * XXX: batch / limit 'nr', to avoid large irq off latency |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 34c1bfb64f1c..f53b57e4086f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -213,7 +213,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
213 | if (!after_bootmem) | 213 | if (!after_bootmem) |
214 | init_gbpages(); | 214 | init_gbpages(); |
215 | 215 | ||
216 | #ifdef CONFIG_DEBUG_PAGEALLOC | 216 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
217 | /* | 217 | /* |
218 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 218 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. |
219 | * This will simplify cpa(), which otherwise needs to support splitting | 219 | * This will simplify cpa(), which otherwise needs to support splitting |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9ff3c0816d15..3cd7711bb949 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -111,7 +111,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
111 | pte_t *page_table = NULL; | 111 | pte_t *page_table = NULL; |
112 | 112 | ||
113 | if (after_bootmem) { | 113 | if (after_bootmem) { |
114 | #ifdef CONFIG_DEBUG_PAGEALLOC | 114 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
115 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | 115 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); |
116 | #endif | 116 | #endif |
117 | if (!page_table) | 117 | if (!page_table) |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 52bb9519bb86..c4378f4fd4a5 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -104,7 +104,7 @@ static __ref void *spp_getpage(void) | |||
104 | void *ptr; | 104 | void *ptr; |
105 | 105 | ||
106 | if (after_bootmem) | 106 | if (after_bootmem) |
107 | ptr = (void *) get_zeroed_page(GFP_ATOMIC); | 107 | ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); |
108 | else | 108 | else |
109 | ptr = alloc_bootmem_pages(PAGE_SIZE); | 109 | ptr = alloc_bootmem_pages(PAGE_SIZE); |
110 | 110 | ||
@@ -281,7 +281,7 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
281 | void *adr; | 281 | void *adr; |
282 | 282 | ||
283 | if (after_bootmem) { | 283 | if (after_bootmem) { |
284 | adr = (void *)get_zeroed_page(GFP_ATOMIC); | 284 | adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); |
285 | *phys = __pa(adr); | 285 | *phys = __pa(adr); |
286 | 286 | ||
287 | return adr; | 287 | return adr; |
@@ -527,7 +527,7 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
527 | return phys_pud_init(pud, addr, end, page_size_mask); | 527 | return phys_pud_init(pud, addr, end, page_size_mask); |
528 | } | 528 | } |
529 | 529 | ||
530 | unsigned long __init | 530 | unsigned long __meminit |
531 | kernel_physical_mapping_init(unsigned long start, | 531 | kernel_physical_mapping_init(unsigned long start, |
532 | unsigned long end, | 532 | unsigned long end, |
533 | unsigned long page_size_mask) | 533 | unsigned long page_size_mask) |
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile new file mode 100644 index 000000000000..520b3bce4095 --- /dev/null +++ b/arch/x86/mm/kmemcheck/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o | |||
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c new file mode 100644 index 000000000000..4901d0dafda6 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -0,0 +1,228 @@ | |||
1 | #include <linux/interrupt.h> | ||
2 | #include <linux/kdebug.h> | ||
3 | #include <linux/kmemcheck.h> | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/ptrace.h> | ||
7 | #include <linux/stacktrace.h> | ||
8 | #include <linux/string.h> | ||
9 | |||
10 | #include "error.h" | ||
11 | #include "shadow.h" | ||
12 | |||
13 | enum kmemcheck_error_type { | ||
14 | KMEMCHECK_ERROR_INVALID_ACCESS, | ||
15 | KMEMCHECK_ERROR_BUG, | ||
16 | }; | ||
17 | |||
18 | #define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) | ||
19 | |||
20 | struct kmemcheck_error { | ||
21 | enum kmemcheck_error_type type; | ||
22 | |||
23 | union { | ||
24 | /* KMEMCHECK_ERROR_INVALID_ACCESS */ | ||
25 | struct { | ||
26 | /* Kind of access that caused the error */ | ||
27 | enum kmemcheck_shadow state; | ||
28 | /* Address and size of the erroneous read */ | ||
29 | unsigned long address; | ||
30 | unsigned int size; | ||
31 | }; | ||
32 | }; | ||
33 | |||
34 | struct pt_regs regs; | ||
35 | struct stack_trace trace; | ||
36 | unsigned long trace_entries[32]; | ||
37 | |||
38 | /* We compress it to a char. */ | ||
39 | unsigned char shadow_copy[SHADOW_COPY_SIZE]; | ||
40 | unsigned char memory_copy[SHADOW_COPY_SIZE]; | ||
41 | }; | ||
42 | |||
43 | /* | ||
44 | * Create a ring queue of errors to output. We can't call printk() directly | ||
45 | * from the kmemcheck traps, since this may call the console drivers and | ||
46 | * result in a recursive fault. | ||
47 | */ | ||
48 | static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; | ||
49 | static unsigned int error_count; | ||
50 | static unsigned int error_rd; | ||
51 | static unsigned int error_wr; | ||
52 | static unsigned int error_missed_count; | ||
53 | |||
54 | static struct kmemcheck_error *error_next_wr(void) | ||
55 | { | ||
56 | struct kmemcheck_error *e; | ||
57 | |||
58 | if (error_count == ARRAY_SIZE(error_fifo)) { | ||
59 | ++error_missed_count; | ||
60 | return NULL; | ||
61 | } | ||
62 | |||
63 | e = &error_fifo[error_wr]; | ||
64 | if (++error_wr == ARRAY_SIZE(error_fifo)) | ||
65 | error_wr = 0; | ||
66 | ++error_count; | ||
67 | return e; | ||
68 | } | ||
69 | |||
70 | static struct kmemcheck_error *error_next_rd(void) | ||
71 | { | ||
72 | struct kmemcheck_error *e; | ||
73 | |||
74 | if (error_count == 0) | ||
75 | return NULL; | ||
76 | |||
77 | e = &error_fifo[error_rd]; | ||
78 | if (++error_rd == ARRAY_SIZE(error_fifo)) | ||
79 | error_rd = 0; | ||
80 | --error_count; | ||
81 | return e; | ||
82 | } | ||
83 | |||
84 | void kmemcheck_error_recall(void) | ||
85 | { | ||
86 | static const char *desc[] = { | ||
87 | [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", | ||
88 | [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", | ||
89 | [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", | ||
90 | [KMEMCHECK_SHADOW_FREED] = "freed", | ||
91 | }; | ||
92 | |||
93 | static const char short_desc[] = { | ||
94 | [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', | ||
95 | [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', | ||
96 | [KMEMCHECK_SHADOW_INITIALIZED] = 'i', | ||
97 | [KMEMCHECK_SHADOW_FREED] = 'f', | ||
98 | }; | ||
99 | |||
100 | struct kmemcheck_error *e; | ||
101 | unsigned int i; | ||
102 | |||
103 | e = error_next_rd(); | ||
104 | if (!e) | ||
105 | return; | ||
106 | |||
107 | switch (e->type) { | ||
108 | case KMEMCHECK_ERROR_INVALID_ACCESS: | ||
109 | printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " | ||
110 | "from %s memory (%p)\n", | ||
111 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? | ||
112 | desc[e->state] : "(invalid shadow state)", | ||
113 | (void *) e->address); | ||
114 | |||
115 | printk(KERN_INFO); | ||
116 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) | ||
117 | printk("%02x", e->memory_copy[i]); | ||
118 | printk("\n"); | ||
119 | |||
120 | printk(KERN_INFO); | ||
121 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { | ||
122 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) | ||
123 | printk(" %c", short_desc[e->shadow_copy[i]]); | ||
124 | else | ||
125 | printk(" ?"); | ||
126 | } | ||
127 | printk("\n"); | ||
128 | printk(KERN_INFO "%*c\n", 2 + 2 | ||
129 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); | ||
130 | break; | ||
131 | case KMEMCHECK_ERROR_BUG: | ||
132 | printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | __show_regs(&e->regs, 1); | ||
137 | print_stack_trace(&e->trace, 0); | ||
138 | } | ||
139 | |||
140 | static void do_wakeup(unsigned long data) | ||
141 | { | ||
142 | while (error_count > 0) | ||
143 | kmemcheck_error_recall(); | ||
144 | |||
145 | if (error_missed_count > 0) { | ||
146 | printk(KERN_WARNING "kmemcheck: Lost %d error reports because " | ||
147 | "the queue was too small\n", error_missed_count); | ||
148 | error_missed_count = 0; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); | ||
153 | |||
154 | /* | ||
155 | * Save the context of an error report. | ||
156 | */ | ||
157 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
158 | unsigned long address, unsigned int size, struct pt_regs *regs) | ||
159 | { | ||
160 | static unsigned long prev_ip; | ||
161 | |||
162 | struct kmemcheck_error *e; | ||
163 | void *shadow_copy; | ||
164 | void *memory_copy; | ||
165 | |||
166 | /* Don't report several adjacent errors from the same EIP. */ | ||
167 | if (regs->ip == prev_ip) | ||
168 | return; | ||
169 | prev_ip = regs->ip; | ||
170 | |||
171 | e = error_next_wr(); | ||
172 | if (!e) | ||
173 | return; | ||
174 | |||
175 | e->type = KMEMCHECK_ERROR_INVALID_ACCESS; | ||
176 | |||
177 | e->state = state; | ||
178 | e->address = address; | ||
179 | e->size = size; | ||
180 | |||
181 | /* Save regs */ | ||
182 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
183 | |||
184 | /* Save stack trace */ | ||
185 | e->trace.nr_entries = 0; | ||
186 | e->trace.entries = e->trace_entries; | ||
187 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
188 | e->trace.skip = 0; | ||
189 | save_stack_trace_bp(&e->trace, regs->bp); | ||
190 | |||
191 | /* Round address down to nearest 16 bytes */ | ||
192 | shadow_copy = kmemcheck_shadow_lookup(address | ||
193 | & ~(SHADOW_COPY_SIZE - 1)); | ||
194 | BUG_ON(!shadow_copy); | ||
195 | |||
196 | memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); | ||
197 | |||
198 | kmemcheck_show_addr(address); | ||
199 | memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); | ||
200 | memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); | ||
201 | kmemcheck_hide_addr(address); | ||
202 | |||
203 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * Save the context of a kmemcheck bug. | ||
208 | */ | ||
209 | void kmemcheck_error_save_bug(struct pt_regs *regs) | ||
210 | { | ||
211 | struct kmemcheck_error *e; | ||
212 | |||
213 | e = error_next_wr(); | ||
214 | if (!e) | ||
215 | return; | ||
216 | |||
217 | e->type = KMEMCHECK_ERROR_BUG; | ||
218 | |||
219 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
220 | |||
221 | e->trace.nr_entries = 0; | ||
222 | e->trace.entries = e->trace_entries; | ||
223 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
224 | e->trace.skip = 1; | ||
225 | save_stack_trace(&e->trace); | ||
226 | |||
227 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
228 | } | ||
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h new file mode 100644 index 000000000000..0efc2e8d0a20 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.h | |||
@@ -0,0 +1,15 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
3 | |||
4 | #include <linux/ptrace.h> | ||
5 | |||
6 | #include "shadow.h" | ||
7 | |||
8 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
9 | unsigned long address, unsigned int size, struct pt_regs *regs); | ||
10 | |||
11 | void kmemcheck_error_save_bug(struct pt_regs *regs); | ||
12 | |||
13 | void kmemcheck_error_recall(void); | ||
14 | |||
15 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c new file mode 100644 index 000000000000..2c55ed098654 --- /dev/null +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -0,0 +1,640 @@ | |||
1 | /** | ||
2 | * kmemcheck - a heavyweight memory checker for the linux kernel | ||
3 | * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no> | ||
4 | * (With a lot of help from Ingo Molnar and Pekka Enberg.) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2) as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/init.h> | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/kallsyms.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/kmemcheck.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/page-flags.h> | ||
19 | #include <linux/percpu.h> | ||
20 | #include <linux/ptrace.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/types.h> | ||
23 | |||
24 | #include <asm/cacheflush.h> | ||
25 | #include <asm/kmemcheck.h> | ||
26 | #include <asm/pgtable.h> | ||
27 | #include <asm/tlbflush.h> | ||
28 | |||
29 | #include "error.h" | ||
30 | #include "opcode.h" | ||
31 | #include "pte.h" | ||
32 | #include "selftest.h" | ||
33 | #include "shadow.h" | ||
34 | |||
35 | |||
36 | #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT | ||
37 | # define KMEMCHECK_ENABLED 0 | ||
38 | #endif | ||
39 | |||
40 | #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT | ||
41 | # define KMEMCHECK_ENABLED 1 | ||
42 | #endif | ||
43 | |||
44 | #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT | ||
45 | # define KMEMCHECK_ENABLED 2 | ||
46 | #endif | ||
47 | |||
48 | int kmemcheck_enabled = KMEMCHECK_ENABLED; | ||
49 | |||
50 | int __init kmemcheck_init(void) | ||
51 | { | ||
52 | #ifdef CONFIG_SMP | ||
53 | /* | ||
54 | * Limit SMP to use a single CPU. We rely on the fact that this code | ||
55 | * runs before SMP is set up. | ||
56 | */ | ||
57 | if (setup_max_cpus > 1) { | ||
58 | printk(KERN_INFO | ||
59 | "kmemcheck: Limiting number of CPUs to 1.\n"); | ||
60 | setup_max_cpus = 1; | ||
61 | } | ||
62 | #endif | ||
63 | |||
64 | if (!kmemcheck_selftest()) { | ||
65 | printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); | ||
66 | kmemcheck_enabled = 0; | ||
67 | return -EINVAL; | ||
68 | } | ||
69 | |||
70 | printk(KERN_INFO "kmemcheck: Initialized\n"); | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | early_initcall(kmemcheck_init); | ||
75 | |||
76 | /* | ||
77 | * We need to parse the kmemcheck= option before any memory is allocated. | ||
78 | */ | ||
79 | static int __init param_kmemcheck(char *str) | ||
80 | { | ||
81 | if (!str) | ||
82 | return -EINVAL; | ||
83 | |||
84 | sscanf(str, "%d", &kmemcheck_enabled); | ||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | early_param("kmemcheck", param_kmemcheck); | ||
89 | |||
90 | int kmemcheck_show_addr(unsigned long address) | ||
91 | { | ||
92 | pte_t *pte; | ||
93 | |||
94 | pte = kmemcheck_pte_lookup(address); | ||
95 | if (!pte) | ||
96 | return 0; | ||
97 | |||
98 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
99 | __flush_tlb_one(address); | ||
100 | return 1; | ||
101 | } | ||
102 | |||
103 | int kmemcheck_hide_addr(unsigned long address) | ||
104 | { | ||
105 | pte_t *pte; | ||
106 | |||
107 | pte = kmemcheck_pte_lookup(address); | ||
108 | if (!pte) | ||
109 | return 0; | ||
110 | |||
111 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
112 | __flush_tlb_one(address); | ||
113 | return 1; | ||
114 | } | ||
115 | |||
116 | struct kmemcheck_context { | ||
117 | bool busy; | ||
118 | int balance; | ||
119 | |||
120 | /* | ||
121 | * There can be at most two memory operands to an instruction, but | ||
122 | * each address can cross a page boundary -- so we may need up to | ||
123 | * four addresses that must be hidden/revealed for each fault. | ||
124 | */ | ||
125 | unsigned long addr[4]; | ||
126 | unsigned long n_addrs; | ||
127 | unsigned long flags; | ||
128 | |||
129 | /* Data size of the instruction that caused a fault. */ | ||
130 | unsigned int size; | ||
131 | }; | ||
132 | |||
133 | static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); | ||
134 | |||
135 | bool kmemcheck_active(struct pt_regs *regs) | ||
136 | { | ||
137 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
138 | |||
139 | return data->balance > 0; | ||
140 | } | ||
141 | |||
142 | /* Save an address that needs to be shown/hidden */ | ||
143 | static void kmemcheck_save_addr(unsigned long addr) | ||
144 | { | ||
145 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
146 | |||
147 | BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); | ||
148 | data->addr[data->n_addrs++] = addr; | ||
149 | } | ||
150 | |||
151 | static unsigned int kmemcheck_show_all(void) | ||
152 | { | ||
153 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
154 | unsigned int i; | ||
155 | unsigned int n; | ||
156 | |||
157 | n = 0; | ||
158 | for (i = 0; i < data->n_addrs; ++i) | ||
159 | n += kmemcheck_show_addr(data->addr[i]); | ||
160 | |||
161 | return n; | ||
162 | } | ||
163 | |||
164 | static unsigned int kmemcheck_hide_all(void) | ||
165 | { | ||
166 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
167 | unsigned int i; | ||
168 | unsigned int n; | ||
169 | |||
170 | n = 0; | ||
171 | for (i = 0; i < data->n_addrs; ++i) | ||
172 | n += kmemcheck_hide_addr(data->addr[i]); | ||
173 | |||
174 | return n; | ||
175 | } | ||
176 | |||
177 | /* | ||
178 | * Called from the #PF handler. | ||
179 | */ | ||
180 | void kmemcheck_show(struct pt_regs *regs) | ||
181 | { | ||
182 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
183 | |||
184 | BUG_ON(!irqs_disabled()); | ||
185 | |||
186 | if (unlikely(data->balance != 0)) { | ||
187 | kmemcheck_show_all(); | ||
188 | kmemcheck_error_save_bug(regs); | ||
189 | data->balance = 0; | ||
190 | return; | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * None of the addresses actually belonged to kmemcheck. Note that | ||
195 | * this is not an error. | ||
196 | */ | ||
197 | if (kmemcheck_show_all() == 0) | ||
198 | return; | ||
199 | |||
200 | ++data->balance; | ||
201 | |||
202 | /* | ||
203 | * The IF needs to be cleared as well, so that the faulting | ||
204 | * instruction can run "uninterrupted". Otherwise, we might take | ||
205 | * an interrupt and start executing that before we've had a chance | ||
206 | * to hide the page again. | ||
207 | * | ||
208 | * NOTE: In the rare case of multiple faults, we must not override | ||
209 | * the original flags: | ||
210 | */ | ||
211 | if (!(regs->flags & X86_EFLAGS_TF)) | ||
212 | data->flags = regs->flags; | ||
213 | |||
214 | regs->flags |= X86_EFLAGS_TF; | ||
215 | regs->flags &= ~X86_EFLAGS_IF; | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * Called from the #DB handler. | ||
220 | */ | ||
221 | void kmemcheck_hide(struct pt_regs *regs) | ||
222 | { | ||
223 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
224 | int n; | ||
225 | |||
226 | BUG_ON(!irqs_disabled()); | ||
227 | |||
228 | if (data->balance == 0) | ||
229 | return; | ||
230 | |||
231 | if (unlikely(data->balance != 1)) { | ||
232 | kmemcheck_show_all(); | ||
233 | kmemcheck_error_save_bug(regs); | ||
234 | data->n_addrs = 0; | ||
235 | data->balance = 0; | ||
236 | |||
237 | if (!(data->flags & X86_EFLAGS_TF)) | ||
238 | regs->flags &= ~X86_EFLAGS_TF; | ||
239 | if (data->flags & X86_EFLAGS_IF) | ||
240 | regs->flags |= X86_EFLAGS_IF; | ||
241 | return; | ||
242 | } | ||
243 | |||
244 | if (kmemcheck_enabled) | ||
245 | n = kmemcheck_hide_all(); | ||
246 | else | ||
247 | n = kmemcheck_show_all(); | ||
248 | |||
249 | if (n == 0) | ||
250 | return; | ||
251 | |||
252 | --data->balance; | ||
253 | |||
254 | data->n_addrs = 0; | ||
255 | |||
256 | if (!(data->flags & X86_EFLAGS_TF)) | ||
257 | regs->flags &= ~X86_EFLAGS_TF; | ||
258 | if (data->flags & X86_EFLAGS_IF) | ||
259 | regs->flags |= X86_EFLAGS_IF; | ||
260 | } | ||
261 | |||
262 | void kmemcheck_show_pages(struct page *p, unsigned int n) | ||
263 | { | ||
264 | unsigned int i; | ||
265 | |||
266 | for (i = 0; i < n; ++i) { | ||
267 | unsigned long address; | ||
268 | pte_t *pte; | ||
269 | unsigned int level; | ||
270 | |||
271 | address = (unsigned long) page_address(&p[i]); | ||
272 | pte = lookup_address(address, &level); | ||
273 | BUG_ON(!pte); | ||
274 | BUG_ON(level != PG_LEVEL_4K); | ||
275 | |||
276 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
277 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); | ||
278 | __flush_tlb_one(address); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | bool kmemcheck_page_is_tracked(struct page *p) | ||
283 | { | ||
284 | /* This will also check the "hidden" flag of the PTE. */ | ||
285 | return kmemcheck_pte_lookup((unsigned long) page_address(p)); | ||
286 | } | ||
287 | |||
288 | void kmemcheck_hide_pages(struct page *p, unsigned int n) | ||
289 | { | ||
290 | unsigned int i; | ||
291 | |||
292 | for (i = 0; i < n; ++i) { | ||
293 | unsigned long address; | ||
294 | pte_t *pte; | ||
295 | unsigned int level; | ||
296 | |||
297 | address = (unsigned long) page_address(&p[i]); | ||
298 | pte = lookup_address(address, &level); | ||
299 | BUG_ON(!pte); | ||
300 | BUG_ON(level != PG_LEVEL_4K); | ||
301 | |||
302 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
303 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); | ||
304 | __flush_tlb_one(address); | ||
305 | } | ||
306 | } | ||
307 | |||
308 | /* Access may NOT cross page boundary */ | ||
309 | static void kmemcheck_read_strict(struct pt_regs *regs, | ||
310 | unsigned long addr, unsigned int size) | ||
311 | { | ||
312 | void *shadow; | ||
313 | enum kmemcheck_shadow status; | ||
314 | |||
315 | shadow = kmemcheck_shadow_lookup(addr); | ||
316 | if (!shadow) | ||
317 | return; | ||
318 | |||
319 | kmemcheck_save_addr(addr); | ||
320 | status = kmemcheck_shadow_test(shadow, size); | ||
321 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
322 | return; | ||
323 | |||
324 | if (kmemcheck_enabled) | ||
325 | kmemcheck_error_save(status, addr, size, regs); | ||
326 | |||
327 | if (kmemcheck_enabled == 2) | ||
328 | kmemcheck_enabled = 0; | ||
329 | |||
330 | /* Don't warn about it again. */ | ||
331 | kmemcheck_shadow_set(shadow, size); | ||
332 | } | ||
333 | |||
334 | /* Access may cross page boundary */ | ||
335 | static void kmemcheck_read(struct pt_regs *regs, | ||
336 | unsigned long addr, unsigned int size) | ||
337 | { | ||
338 | unsigned long page = addr & PAGE_MASK; | ||
339 | unsigned long next_addr = addr + size - 1; | ||
340 | unsigned long next_page = next_addr & PAGE_MASK; | ||
341 | |||
342 | if (likely(page == next_page)) { | ||
343 | kmemcheck_read_strict(regs, addr, size); | ||
344 | return; | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * What we do is basically to split the access across the | ||
349 | * two pages and handle each part separately. Yes, this means | ||
350 | * that we may now see reads that are 3 + 5 bytes, for | ||
351 | * example (and if both are uninitialized, there will be two | ||
352 | * reports), but it makes the code a lot simpler. | ||
353 | */ | ||
354 | kmemcheck_read_strict(regs, addr, next_page - addr); | ||
355 | kmemcheck_read_strict(regs, next_page, next_addr - next_page); | ||
356 | } | ||
357 | |||
358 | static void kmemcheck_write_strict(struct pt_regs *regs, | ||
359 | unsigned long addr, unsigned int size) | ||
360 | { | ||
361 | void *shadow; | ||
362 | |||
363 | shadow = kmemcheck_shadow_lookup(addr); | ||
364 | if (!shadow) | ||
365 | return; | ||
366 | |||
367 | kmemcheck_save_addr(addr); | ||
368 | kmemcheck_shadow_set(shadow, size); | ||
369 | } | ||
370 | |||
371 | static void kmemcheck_write(struct pt_regs *regs, | ||
372 | unsigned long addr, unsigned int size) | ||
373 | { | ||
374 | unsigned long page = addr & PAGE_MASK; | ||
375 | unsigned long next_addr = addr + size - 1; | ||
376 | unsigned long next_page = next_addr & PAGE_MASK; | ||
377 | |||
378 | if (likely(page == next_page)) { | ||
379 | kmemcheck_write_strict(regs, addr, size); | ||
380 | return; | ||
381 | } | ||
382 | |||
383 | /* See comment in kmemcheck_read(). */ | ||
384 | kmemcheck_write_strict(regs, addr, next_page - addr); | ||
385 | kmemcheck_write_strict(regs, next_page, next_addr - next_page); | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * Copying is hard. We have two addresses, each of which may be split across | ||
390 | * a page (and each page will have different shadow addresses). | ||
391 | */ | ||
392 | static void kmemcheck_copy(struct pt_regs *regs, | ||
393 | unsigned long src_addr, unsigned long dst_addr, unsigned int size) | ||
394 | { | ||
395 | uint8_t shadow[8]; | ||
396 | enum kmemcheck_shadow status; | ||
397 | |||
398 | unsigned long page; | ||
399 | unsigned long next_addr; | ||
400 | unsigned long next_page; | ||
401 | |||
402 | uint8_t *x; | ||
403 | unsigned int i; | ||
404 | unsigned int n; | ||
405 | |||
406 | BUG_ON(size > sizeof(shadow)); | ||
407 | |||
408 | page = src_addr & PAGE_MASK; | ||
409 | next_addr = src_addr + size - 1; | ||
410 | next_page = next_addr & PAGE_MASK; | ||
411 | |||
412 | if (likely(page == next_page)) { | ||
413 | /* Same page */ | ||
414 | x = kmemcheck_shadow_lookup(src_addr); | ||
415 | if (x) { | ||
416 | kmemcheck_save_addr(src_addr); | ||
417 | for (i = 0; i < size; ++i) | ||
418 | shadow[i] = x[i]; | ||
419 | } else { | ||
420 | for (i = 0; i < size; ++i) | ||
421 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
422 | } | ||
423 | } else { | ||
424 | n = next_page - src_addr; | ||
425 | BUG_ON(n > sizeof(shadow)); | ||
426 | |||
427 | /* First page */ | ||
428 | x = kmemcheck_shadow_lookup(src_addr); | ||
429 | if (x) { | ||
430 | kmemcheck_save_addr(src_addr); | ||
431 | for (i = 0; i < n; ++i) | ||
432 | shadow[i] = x[i]; | ||
433 | } else { | ||
434 | /* Not tracked */ | ||
435 | for (i = 0; i < n; ++i) | ||
436 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
437 | } | ||
438 | |||
439 | /* Second page */ | ||
440 | x = kmemcheck_shadow_lookup(next_page); | ||
441 | if (x) { | ||
442 | kmemcheck_save_addr(next_page); | ||
443 | for (i = n; i < size; ++i) | ||
444 | shadow[i] = x[i - n]; | ||
445 | } else { | ||
446 | /* Not tracked */ | ||
447 | for (i = n; i < size; ++i) | ||
448 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | page = dst_addr & PAGE_MASK; | ||
453 | next_addr = dst_addr + size - 1; | ||
454 | next_page = next_addr & PAGE_MASK; | ||
455 | |||
456 | if (likely(page == next_page)) { | ||
457 | /* Same page */ | ||
458 | x = kmemcheck_shadow_lookup(dst_addr); | ||
459 | if (x) { | ||
460 | kmemcheck_save_addr(dst_addr); | ||
461 | for (i = 0; i < size; ++i) { | ||
462 | x[i] = shadow[i]; | ||
463 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
464 | } | ||
465 | } | ||
466 | } else { | ||
467 | n = next_page - dst_addr; | ||
468 | BUG_ON(n > sizeof(shadow)); | ||
469 | |||
470 | /* First page */ | ||
471 | x = kmemcheck_shadow_lookup(dst_addr); | ||
472 | if (x) { | ||
473 | kmemcheck_save_addr(dst_addr); | ||
474 | for (i = 0; i < n; ++i) { | ||
475 | x[i] = shadow[i]; | ||
476 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
477 | } | ||
478 | } | ||
479 | |||
480 | /* Second page */ | ||
481 | x = kmemcheck_shadow_lookup(next_page); | ||
482 | if (x) { | ||
483 | kmemcheck_save_addr(next_page); | ||
484 | for (i = n; i < size; ++i) { | ||
485 | x[i - n] = shadow[i]; | ||
486 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
487 | } | ||
488 | } | ||
489 | } | ||
490 | |||
491 | status = kmemcheck_shadow_test(shadow, size); | ||
492 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
493 | return; | ||
494 | |||
495 | if (kmemcheck_enabled) | ||
496 | kmemcheck_error_save(status, src_addr, size, regs); | ||
497 | |||
498 | if (kmemcheck_enabled == 2) | ||
499 | kmemcheck_enabled = 0; | ||
500 | } | ||
501 | |||
502 | enum kmemcheck_method { | ||
503 | KMEMCHECK_READ, | ||
504 | KMEMCHECK_WRITE, | ||
505 | }; | ||
506 | |||
507 | static void kmemcheck_access(struct pt_regs *regs, | ||
508 | unsigned long fallback_address, enum kmemcheck_method fallback_method) | ||
509 | { | ||
510 | const uint8_t *insn; | ||
511 | const uint8_t *insn_primary; | ||
512 | unsigned int size; | ||
513 | |||
514 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
515 | |||
516 | /* Recursive fault -- ouch. */ | ||
517 | if (data->busy) { | ||
518 | kmemcheck_show_addr(fallback_address); | ||
519 | kmemcheck_error_save_bug(regs); | ||
520 | return; | ||
521 | } | ||
522 | |||
523 | data->busy = true; | ||
524 | |||
525 | insn = (const uint8_t *) regs->ip; | ||
526 | insn_primary = kmemcheck_opcode_get_primary(insn); | ||
527 | |||
528 | kmemcheck_opcode_decode(insn, &size); | ||
529 | |||
530 | switch (insn_primary[0]) { | ||
531 | #ifdef CONFIG_KMEMCHECK_BITOPS_OK | ||
532 | /* AND, OR, XOR */ | ||
533 | /* | ||
534 | * Unfortunately, these instructions have to be excluded from | ||
535 | * our regular checking since they access only some (and not | ||
536 | * all) bits. This clears out "bogus" bitfield-access warnings. | ||
537 | */ | ||
538 | case 0x80: | ||
539 | case 0x81: | ||
540 | case 0x82: | ||
541 | case 0x83: | ||
542 | switch ((insn_primary[1] >> 3) & 7) { | ||
543 | /* OR */ | ||
544 | case 1: | ||
545 | /* AND */ | ||
546 | case 4: | ||
547 | /* XOR */ | ||
548 | case 6: | ||
549 | kmemcheck_write(regs, fallback_address, size); | ||
550 | goto out; | ||
551 | |||
552 | /* ADD */ | ||
553 | case 0: | ||
554 | /* ADC */ | ||
555 | case 2: | ||
556 | /* SBB */ | ||
557 | case 3: | ||
558 | /* SUB */ | ||
559 | case 5: | ||
560 | /* CMP */ | ||
561 | case 7: | ||
562 | break; | ||
563 | } | ||
564 | break; | ||
565 | #endif | ||
566 | |||
567 | /* MOVS, MOVSB, MOVSW, MOVSD */ | ||
568 | case 0xa4: | ||
569 | case 0xa5: | ||
570 | /* | ||
571 | * These instructions are special because they take two | ||
572 | * addresses, but we only get one page fault. | ||
573 | */ | ||
574 | kmemcheck_copy(regs, regs->si, regs->di, size); | ||
575 | goto out; | ||
576 | |||
577 | /* CMPS, CMPSB, CMPSW, CMPSD */ | ||
578 | case 0xa6: | ||
579 | case 0xa7: | ||
580 | kmemcheck_read(regs, regs->si, size); | ||
581 | kmemcheck_read(regs, regs->di, size); | ||
582 | goto out; | ||
583 | } | ||
584 | |||
585 | /* | ||
586 | * If the opcode isn't special in any way, we use the data from the | ||
587 | * page fault handler to determine the address and type of memory | ||
588 | * access. | ||
589 | */ | ||
590 | switch (fallback_method) { | ||
591 | case KMEMCHECK_READ: | ||
592 | kmemcheck_read(regs, fallback_address, size); | ||
593 | goto out; | ||
594 | case KMEMCHECK_WRITE: | ||
595 | kmemcheck_write(regs, fallback_address, size); | ||
596 | goto out; | ||
597 | } | ||
598 | |||
599 | out: | ||
600 | data->busy = false; | ||
601 | } | ||
602 | |||
603 | bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, | ||
604 | unsigned long error_code) | ||
605 | { | ||
606 | pte_t *pte; | ||
607 | |||
608 | /* | ||
609 | * XXX: Is it safe to assume that memory accesses from virtual 86 | ||
610 | * mode or non-kernel code segments will _never_ access kernel | ||
611 | * memory (e.g. tracked pages)? For now, we need this to avoid | ||
612 | * invoking kmemcheck for PnP BIOS calls. | ||
613 | */ | ||
614 | if (regs->flags & X86_VM_MASK) | ||
615 | return false; | ||
616 | if (regs->cs != __KERNEL_CS) | ||
617 | return false; | ||
618 | |||
619 | pte = kmemcheck_pte_lookup(address); | ||
620 | if (!pte) | ||
621 | return false; | ||
622 | |||
623 | if (error_code & 2) | ||
624 | kmemcheck_access(regs, address, KMEMCHECK_WRITE); | ||
625 | else | ||
626 | kmemcheck_access(regs, address, KMEMCHECK_READ); | ||
627 | |||
628 | kmemcheck_show(regs); | ||
629 | return true; | ||
630 | } | ||
631 | |||
632 | bool kmemcheck_trap(struct pt_regs *regs) | ||
633 | { | ||
634 | if (!kmemcheck_active(regs)) | ||
635 | return false; | ||
636 | |||
637 | /* We're done. */ | ||
638 | kmemcheck_hide(regs); | ||
639 | return true; | ||
640 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c new file mode 100644 index 000000000000..63c19e27aa6f --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.c | |||
@@ -0,0 +1,106 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include "opcode.h" | ||
4 | |||
5 | static bool opcode_is_prefix(uint8_t b) | ||
6 | { | ||
7 | return | ||
8 | /* Group 1 */ | ||
9 | b == 0xf0 || b == 0xf2 || b == 0xf3 | ||
10 | /* Group 2 */ | ||
11 | || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 | ||
12 | || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e | ||
13 | /* Group 3 */ | ||
14 | || b == 0x66 | ||
15 | /* Group 4 */ | ||
16 | || b == 0x67; | ||
17 | } | ||
18 | |||
19 | #ifdef CONFIG_X86_64 | ||
20 | static bool opcode_is_rex_prefix(uint8_t b) | ||
21 | { | ||
22 | return (b & 0xf0) == 0x40; | ||
23 | } | ||
24 | #else | ||
25 | static bool opcode_is_rex_prefix(uint8_t b) | ||
26 | { | ||
27 | return false; | ||
28 | } | ||
29 | #endif | ||
30 | |||
31 | #define REX_W (1 << 3) | ||
32 | |||
33 | /* | ||
34 | * This is a VERY crude opcode decoder. We only need to find the size of the | ||
35 | * load/store that caused our #PF and this should work for all the opcodes | ||
36 | * that we care about. Moreover, the ones who invented this instruction set | ||
37 | * should be shot. | ||
38 | */ | ||
39 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) | ||
40 | { | ||
41 | /* Default operand size */ | ||
42 | int operand_size_override = 4; | ||
43 | |||
44 | /* prefixes */ | ||
45 | for (; opcode_is_prefix(*op); ++op) { | ||
46 | if (*op == 0x66) | ||
47 | operand_size_override = 2; | ||
48 | } | ||
49 | |||
50 | /* REX prefix */ | ||
51 | if (opcode_is_rex_prefix(*op)) { | ||
52 | uint8_t rex = *op; | ||
53 | |||
54 | ++op; | ||
55 | if (rex & REX_W) { | ||
56 | switch (*op) { | ||
57 | case 0x63: | ||
58 | *size = 4; | ||
59 | return; | ||
60 | case 0x0f: | ||
61 | ++op; | ||
62 | |||
63 | switch (*op) { | ||
64 | case 0xb6: | ||
65 | case 0xbe: | ||
66 | *size = 1; | ||
67 | return; | ||
68 | case 0xb7: | ||
69 | case 0xbf: | ||
70 | *size = 2; | ||
71 | return; | ||
72 | } | ||
73 | |||
74 | break; | ||
75 | } | ||
76 | |||
77 | *size = 8; | ||
78 | return; | ||
79 | } | ||
80 | } | ||
81 | |||
82 | /* escape opcode */ | ||
83 | if (*op == 0x0f) { | ||
84 | ++op; | ||
85 | |||
86 | /* | ||
87 | * This is move with zero-extend and sign-extend, respectively; | ||
88 | * we don't have to think about 0xb6/0xbe, because this is | ||
89 | * already handled in the conditional below. | ||
90 | */ | ||
91 | if (*op == 0xb7 || *op == 0xbf) | ||
92 | operand_size_override = 2; | ||
93 | } | ||
94 | |||
95 | *size = (*op & 1) ? operand_size_override : 1; | ||
96 | } | ||
97 | |||
98 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) | ||
99 | { | ||
100 | /* skip prefixes */ | ||
101 | while (opcode_is_prefix(*op)) | ||
102 | ++op; | ||
103 | if (opcode_is_rex_prefix(*op)) | ||
104 | ++op; | ||
105 | return op; | ||
106 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h new file mode 100644 index 000000000000..6956aad66b5b --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.h | |||
@@ -0,0 +1,9 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); | ||
7 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); | ||
8 | |||
9 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c new file mode 100644 index 000000000000..4ead26eeaf96 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.c | |||
@@ -0,0 +1,22 @@ | |||
1 | #include <linux/mm.h> | ||
2 | |||
3 | #include <asm/pgtable.h> | ||
4 | |||
5 | #include "pte.h" | ||
6 | |||
7 | pte_t *kmemcheck_pte_lookup(unsigned long address) | ||
8 | { | ||
9 | pte_t *pte; | ||
10 | unsigned int level; | ||
11 | |||
12 | pte = lookup_address(address, &level); | ||
13 | if (!pte) | ||
14 | return NULL; | ||
15 | if (level != PG_LEVEL_4K) | ||
16 | return NULL; | ||
17 | if (!pte_hidden(*pte)) | ||
18 | return NULL; | ||
19 | |||
20 | return pte; | ||
21 | } | ||
22 | |||
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h new file mode 100644 index 000000000000..9f5966456492 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.h | |||
@@ -0,0 +1,10 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__PTE_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__PTE_H | ||
3 | |||
4 | #include <linux/mm.h> | ||
5 | |||
6 | #include <asm/pgtable.h> | ||
7 | |||
8 | pte_t *kmemcheck_pte_lookup(unsigned long address); | ||
9 | |||
10 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c new file mode 100644 index 000000000000..036efbea8b28 --- /dev/null +++ b/arch/x86/mm/kmemcheck/selftest.c | |||
@@ -0,0 +1,69 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | |||
3 | #include "opcode.h" | ||
4 | #include "selftest.h" | ||
5 | |||
6 | struct selftest_opcode { | ||
7 | unsigned int expected_size; | ||
8 | const uint8_t *insn; | ||
9 | const char *desc; | ||
10 | }; | ||
11 | |||
12 | static const struct selftest_opcode selftest_opcodes[] = { | ||
13 | /* REP MOVS */ | ||
14 | {1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"}, | ||
15 | {4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"}, | ||
16 | |||
17 | /* MOVZX / MOVZXD */ | ||
18 | {1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"}, | ||
19 | {1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"}, | ||
20 | |||
21 | /* MOVSX / MOVSXD */ | ||
22 | {1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"}, | ||
23 | {1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"}, | ||
24 | |||
25 | #ifdef CONFIG_X86_64 | ||
26 | /* MOVZX / MOVZXD */ | ||
27 | {1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"}, | ||
28 | {2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"}, | ||
29 | |||
30 | /* MOVSX / MOVSXD */ | ||
31 | {1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"}, | ||
32 | {2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"}, | ||
33 | {4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"}, | ||
34 | #endif | ||
35 | }; | ||
36 | |||
37 | static bool selftest_opcode_one(const struct selftest_opcode *op) | ||
38 | { | ||
39 | unsigned size; | ||
40 | |||
41 | kmemcheck_opcode_decode(op->insn, &size); | ||
42 | |||
43 | if (size == op->expected_size) | ||
44 | return true; | ||
45 | |||
46 | printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n", | ||
47 | op->desc, op->expected_size, size); | ||
48 | return false; | ||
49 | } | ||
50 | |||
51 | static bool selftest_opcodes_all(void) | ||
52 | { | ||
53 | bool pass = true; | ||
54 | unsigned int i; | ||
55 | |||
56 | for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i) | ||
57 | pass = pass && selftest_opcode_one(&selftest_opcodes[i]); | ||
58 | |||
59 | return pass; | ||
60 | } | ||
61 | |||
62 | bool kmemcheck_selftest(void) | ||
63 | { | ||
64 | bool pass = true; | ||
65 | |||
66 | pass = pass && selftest_opcodes_all(); | ||
67 | |||
68 | return pass; | ||
69 | } | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h new file mode 100644 index 000000000000..8fed4fe11f95 --- /dev/null +++ b/arch/x86/mm/kmemcheck/selftest.h | |||
@@ -0,0 +1,6 @@ | |||
1 | #ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
2 | #define ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
3 | |||
4 | bool kmemcheck_selftest(void); | ||
5 | |||
6 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c new file mode 100644 index 000000000000..e773b6bd0079 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.c | |||
@@ -0,0 +1,162 @@ | |||
1 | #include <linux/kmemcheck.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/mm.h> | ||
4 | #include <linux/module.h> | ||
5 | |||
6 | #include <asm/page.h> | ||
7 | #include <asm/pgtable.h> | ||
8 | |||
9 | #include "pte.h" | ||
10 | #include "shadow.h" | ||
11 | |||
12 | /* | ||
13 | * Return the shadow address for the given address. Returns NULL if the | ||
14 | * address is not tracked. | ||
15 | * | ||
16 | * We need to be extremely careful not to follow any invalid pointers, | ||
17 | * because this function can be called for *any* possible address. | ||
18 | */ | ||
19 | void *kmemcheck_shadow_lookup(unsigned long address) | ||
20 | { | ||
21 | pte_t *pte; | ||
22 | struct page *page; | ||
23 | |||
24 | if (!virt_addr_valid(address)) | ||
25 | return NULL; | ||
26 | |||
27 | pte = kmemcheck_pte_lookup(address); | ||
28 | if (!pte) | ||
29 | return NULL; | ||
30 | |||
31 | page = virt_to_page(address); | ||
32 | if (!page->shadow) | ||
33 | return NULL; | ||
34 | return page->shadow + (address & (PAGE_SIZE - 1)); | ||
35 | } | ||
36 | |||
37 | static void mark_shadow(void *address, unsigned int n, | ||
38 | enum kmemcheck_shadow status) | ||
39 | { | ||
40 | unsigned long addr = (unsigned long) address; | ||
41 | unsigned long last_addr = addr + n - 1; | ||
42 | unsigned long page = addr & PAGE_MASK; | ||
43 | unsigned long last_page = last_addr & PAGE_MASK; | ||
44 | unsigned int first_n; | ||
45 | void *shadow; | ||
46 | |||
47 | /* If the memory range crosses a page boundary, stop there. */ | ||
48 | if (page == last_page) | ||
49 | first_n = n; | ||
50 | else | ||
51 | first_n = page + PAGE_SIZE - addr; | ||
52 | |||
53 | shadow = kmemcheck_shadow_lookup(addr); | ||
54 | if (shadow) | ||
55 | memset(shadow, status, first_n); | ||
56 | |||
57 | addr += first_n; | ||
58 | n -= first_n; | ||
59 | |||
60 | /* Do full-page memset()s. */ | ||
61 | while (n >= PAGE_SIZE) { | ||
62 | shadow = kmemcheck_shadow_lookup(addr); | ||
63 | if (shadow) | ||
64 | memset(shadow, status, PAGE_SIZE); | ||
65 | |||
66 | addr += PAGE_SIZE; | ||
67 | n -= PAGE_SIZE; | ||
68 | } | ||
69 | |||
70 | /* Do the remaining page, if any. */ | ||
71 | if (n > 0) { | ||
72 | shadow = kmemcheck_shadow_lookup(addr); | ||
73 | if (shadow) | ||
74 | memset(shadow, status, n); | ||
75 | } | ||
76 | } | ||
77 | |||
78 | void kmemcheck_mark_unallocated(void *address, unsigned int n) | ||
79 | { | ||
80 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); | ||
81 | } | ||
82 | |||
83 | void kmemcheck_mark_uninitialized(void *address, unsigned int n) | ||
84 | { | ||
85 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Fill the shadow memory of the given address such that the memory at that | ||
90 | * address is marked as being initialized. | ||
91 | */ | ||
92 | void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
93 | { | ||
94 | mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); | ||
95 | } | ||
96 | EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); | ||
97 | |||
98 | void kmemcheck_mark_freed(void *address, unsigned int n) | ||
99 | { | ||
100 | mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); | ||
101 | } | ||
102 | |||
103 | void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) | ||
104 | { | ||
105 | unsigned int i; | ||
106 | |||
107 | for (i = 0; i < n; ++i) | ||
108 | kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); | ||
109 | } | ||
110 | |||
111 | void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) | ||
112 | { | ||
113 | unsigned int i; | ||
114 | |||
115 | for (i = 0; i < n; ++i) | ||
116 | kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); | ||
117 | } | ||
118 | |||
119 | void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) | ||
120 | { | ||
121 | unsigned int i; | ||
122 | |||
123 | for (i = 0; i < n; ++i) | ||
124 | kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE); | ||
125 | } | ||
126 | |||
127 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | ||
128 | { | ||
129 | uint8_t *x; | ||
130 | unsigned int i; | ||
131 | |||
132 | x = shadow; | ||
133 | |||
134 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
135 | /* | ||
136 | * Make sure _some_ bytes are initialized. Gcc frequently generates | ||
137 | * code to access neighboring bytes. | ||
138 | */ | ||
139 | for (i = 0; i < size; ++i) { | ||
140 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) | ||
141 | return x[i]; | ||
142 | } | ||
143 | #else | ||
144 | /* All bytes must be initialized. */ | ||
145 | for (i = 0; i < size; ++i) { | ||
146 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) | ||
147 | return x[i]; | ||
148 | } | ||
149 | #endif | ||
150 | |||
151 | return x[0]; | ||
152 | } | ||
153 | |||
154 | void kmemcheck_shadow_set(void *shadow, unsigned int size) | ||
155 | { | ||
156 | uint8_t *x; | ||
157 | unsigned int i; | ||
158 | |||
159 | x = shadow; | ||
160 | for (i = 0; i < size; ++i) | ||
161 | x[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
162 | } | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h new file mode 100644 index 000000000000..af46d9ab9d86 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.h | |||
@@ -0,0 +1,16 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
3 | |||
4 | enum kmemcheck_shadow { | ||
5 | KMEMCHECK_SHADOW_UNALLOCATED, | ||
6 | KMEMCHECK_SHADOW_UNINITIALIZED, | ||
7 | KMEMCHECK_SHADOW_INITIALIZED, | ||
8 | KMEMCHECK_SHADOW_FREED, | ||
9 | }; | ||
10 | |||
11 | void *kmemcheck_shadow_lookup(unsigned long address); | ||
12 | |||
13 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); | ||
14 | void kmemcheck_shadow_set(void *shadow, unsigned int size); | ||
15 | |||
16 | #endif | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 6ce9518fe2ac..3cfe9ced8a4c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -470,7 +470,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
470 | 470 | ||
471 | if (!debug_pagealloc) | 471 | if (!debug_pagealloc) |
472 | spin_unlock(&cpa_lock); | 472 | spin_unlock(&cpa_lock); |
473 | base = alloc_pages(GFP_KERNEL, 0); | 473 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); |
474 | if (!debug_pagealloc) | 474 | if (!debug_pagealloc) |
475 | spin_lock(&cpa_lock); | 475 | spin_lock(&cpa_lock); |
476 | if (!base) | 476 | if (!base) |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 7aa03a5389f5..8e43bdd45456 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -4,9 +4,11 @@ | |||
4 | #include <asm/tlb.h> | 4 | #include <asm/tlb.h> |
5 | #include <asm/fixmap.h> | 5 | #include <asm/fixmap.h> |
6 | 6 | ||
7 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO | ||
8 | |||
7 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 9 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
8 | { | 10 | { |
9 | return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | 11 | return (pte_t *)__get_free_page(PGALLOC_GFP); |
10 | } | 12 | } |
11 | 13 | ||
12 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | 14 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) |
@@ -14,9 +16,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
14 | struct page *pte; | 16 | struct page *pte; |
15 | 17 | ||
16 | #ifdef CONFIG_HIGHPTE | 18 | #ifdef CONFIG_HIGHPTE |
17 | pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | 19 | pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0); |
18 | #else | 20 | #else |
19 | pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | 21 | pte = alloc_pages(PGALLOC_GFP, 0); |
20 | #endif | 22 | #endif |
21 | if (pte) | 23 | if (pte) |
22 | pgtable_page_ctor(pte); | 24 | pgtable_page_ctor(pte); |
@@ -161,7 +163,7 @@ static int preallocate_pmds(pmd_t *pmds[]) | |||
161 | bool failed = false; | 163 | bool failed = false; |
162 | 164 | ||
163 | for(i = 0; i < PREALLOCATED_PMDS; i++) { | 165 | for(i = 0; i < PREALLOCATED_PMDS; i++) { |
164 | pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | 166 | pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP); |
165 | if (pmd == NULL) | 167 | if (pmd == NULL) |
166 | failed = true; | 168 | failed = true; |
167 | pmds[i] = pmd; | 169 | pmds[i] = pmd; |
@@ -228,7 +230,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
228 | pmd_t *pmds[PREALLOCATED_PMDS]; | 230 | pmd_t *pmds[PREALLOCATED_PMDS]; |
229 | unsigned long flags; | 231 | unsigned long flags; |
230 | 232 | ||
231 | pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | 233 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); |
232 | 234 | ||
233 | if (pgd == NULL) | 235 | if (pgd == NULL) |
234 | goto out; | 236 | goto out; |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index a85bef20a3b9..0fb56db16d18 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -116,7 +116,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
116 | struct pci_bus *bus; | 116 | struct pci_bus *bus; |
117 | struct pci_dev *dev; | 117 | struct pci_dev *dev; |
118 | int idx; | 118 | int idx; |
119 | struct resource *r, *pr; | 119 | struct resource *r; |
120 | 120 | ||
121 | /* Depth-First Search on bus tree */ | 121 | /* Depth-First Search on bus tree */ |
122 | list_for_each_entry(bus, bus_list, node) { | 122 | list_for_each_entry(bus, bus_list, node) { |
@@ -126,9 +126,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
126 | r = &dev->resource[idx]; | 126 | r = &dev->resource[idx]; |
127 | if (!r->flags) | 127 | if (!r->flags) |
128 | continue; | 128 | continue; |
129 | pr = pci_find_parent_resource(dev, r); | 129 | if (!r->start || |
130 | if (!r->start || !pr || | 130 | pci_claim_resource(dev, idx) < 0) { |
131 | request_resource(pr, r) < 0) { | ||
132 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); | 131 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); |
133 | /* | 132 | /* |
134 | * Something is wrong with the region. | 133 | * Something is wrong with the region. |
@@ -149,7 +148,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
149 | struct pci_dev *dev = NULL; | 148 | struct pci_dev *dev = NULL; |
150 | int idx, disabled; | 149 | int idx, disabled; |
151 | u16 command; | 150 | u16 command; |
152 | struct resource *r, *pr; | 151 | struct resource *r; |
153 | 152 | ||
154 | for_each_pci_dev(dev) { | 153 | for_each_pci_dev(dev) { |
155 | pci_read_config_word(dev, PCI_COMMAND, &command); | 154 | pci_read_config_word(dev, PCI_COMMAND, &command); |
@@ -168,8 +167,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
168 | (unsigned long long) r->start, | 167 | (unsigned long long) r->start, |
169 | (unsigned long long) r->end, | 168 | (unsigned long long) r->end, |
170 | r->flags, disabled, pass); | 169 | r->flags, disabled, pass); |
171 | pr = pci_find_parent_resource(dev, r); | 170 | if (pci_claim_resource(dev, idx) < 0) { |
172 | if (!pr || request_resource(pr, r) < 0) { | ||
173 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); | 171 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); |
174 | /* We'll assign a new address later */ | 172 | /* We'll assign a new address later */ |
175 | r->end -= r->start; | 173 | r->end -= r->start; |
@@ -197,7 +195,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
197 | static int __init pcibios_assign_resources(void) | 195 | static int __init pcibios_assign_resources(void) |
198 | { | 196 | { |
199 | struct pci_dev *dev = NULL; | 197 | struct pci_dev *dev = NULL; |
200 | struct resource *r, *pr; | 198 | struct resource *r; |
201 | 199 | ||
202 | if (!(pci_probe & PCI_ASSIGN_ROMS)) { | 200 | if (!(pci_probe & PCI_ASSIGN_ROMS)) { |
203 | /* | 201 | /* |
@@ -209,8 +207,7 @@ static int __init pcibios_assign_resources(void) | |||
209 | r = &dev->resource[PCI_ROM_RESOURCE]; | 207 | r = &dev->resource[PCI_ROM_RESOURCE]; |
210 | if (!r->flags || !r->start) | 208 | if (!r->flags || !r->start) |
211 | continue; | 209 | continue; |
212 | pr = pci_find_parent_resource(dev, r); | 210 | if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { |
213 | if (!pr || request_resource(pr, r) < 0) { | ||
214 | r->end -= r->start; | 211 | r->end -= r->start; |
215 | r->start = 0; | 212 | r->start = 0; |
216 | } | 213 | } |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 16a9020c8f11..88112b49f02c 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -123,6 +123,7 @@ quiet_cmd_vdso = VDSO $@ | |||
123 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) | 123 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) |
124 | 124 | ||
125 | VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv) | 125 | VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv) |
126 | GCOV_PROFILE := n | ||
126 | 127 | ||
127 | # | 128 | # |
128 | # Install the unstripped copy of vdso*.so listed in $(vdso-install-y). | 129 | # Install the unstripped copy of vdso*.so listed in $(vdso-install-y). |