aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/Makefile1
-rw-r--r--arch/x86/boot/bioscall.S2
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/include/asm/amd_iommu.h2
-rw-r--r--arch/x86/include/asm/atomic_32.h3
-rw-r--r--arch/x86/include/asm/desc.h26
-rw-r--r--arch/x86/include/asm/dma-mapping.h168
-rw-r--r--arch/x86/include/asm/kmap_types.h23
-rw-r--r--arch/x86/include/asm/kmemcheck.h42
-rw-r--r--arch/x86/include/asm/mce.h63
-rw-r--r--arch/x86/include/asm/msr.h7
-rw-r--r--arch/x86/include/asm/pgtable.h5
-rw-r--r--arch/x86/include/asm/pgtable_types.h9
-rw-r--r--arch/x86/include/asm/string_32.h8
-rw-r--r--arch/x86/include/asm/string_64.h8
-rw-r--r--arch/x86/include/asm/therm_throt.h9
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/timex.h4
-rw-r--r--arch/x86/include/asm/xor.h5
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/realmode/Makefile1
-rw-r--r--arch/x86/kernel/amd_iommu.c16
-rw-r--r--arch/x86/kernel/amd_iommu_init.c26
-rw-r--r--arch/x86/kernel/apic/io_apic.c9
-rw-r--r--arch/x86/kernel/apic/probe_32.c11
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/cpu/common.c16
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c191
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h11
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c60
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c93
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c23
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile9
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c233
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.h38
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c (renamed from arch/x86/kernel/cpu/mcheck/mce_amd_64.c)0
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c250
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c248
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c106
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c3
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c12
-rw-r--r--arch/x86/kernel/cpuid.c6
-rw-r--r--arch/x86/kernel/crash.c6
-rw-r--r--arch/x86/kernel/efi.c31
-rw-r--r--arch/x86/kernel/entry_32.S64
-rw-r--r--arch/x86/kernel/head_32.S1
-rw-r--r--arch/x86/kernel/head_64.S1
-rw-r--r--arch/x86/kernel/hpet.c3
-rw-r--r--arch/x86/kernel/i8253.c1
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/microcode_core.c1
-rw-r--r--arch/x86/kernel/msr.c6
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/stacktrace.c7
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/tsc.c9
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c21
-rw-r--r--arch/x86/mm/gup.c9
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c6
-rw-r--r--arch/x86/mm/kmemcheck/Makefile1
-rw-r--r--arch/x86/mm/kmemcheck/error.c228
-rw-r--r--arch/x86/mm/kmemcheck/error.h15
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c640
-rw-r--r--arch/x86/mm/kmemcheck/opcode.c106
-rw-r--r--arch/x86/mm/kmemcheck/opcode.h9
-rw-r--r--arch/x86/mm/kmemcheck/pte.c22
-rw-r--r--arch/x86/mm/kmemcheck/pte.h10
-rw-r--r--arch/x86/mm/kmemcheck/selftest.c69
-rw-r--r--arch/x86/mm/kmemcheck/selftest.h6
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c162
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h16
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/mm/pgtable.c12
-rw-r--r--arch/x86/pci/i386.c17
-rw-r--r--arch/x86/vdso/Makefile1
89 files changed, 2325 insertions, 1015 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fcf12af07427..d1430ef6b4f9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,6 +28,7 @@ config X86
28 select HAVE_KPROBES 28 select HAVE_KPROBES
29 select ARCH_WANT_OPTIONAL_GPIOLIB 29 select ARCH_WANT_OPTIONAL_GPIOLIB
30 select ARCH_WANT_FRAME_POINTERS 30 select ARCH_WANT_FRAME_POINTERS
31 select HAVE_DMA_ATTRS
31 select HAVE_KRETPROBES 32 select HAVE_KRETPROBES
32 select HAVE_FTRACE_MCOUNT_RECORD 33 select HAVE_FTRACE_MCOUNT_RECORD
33 select HAVE_DYNAMIC_FTRACE 34 select HAVE_DYNAMIC_FTRACE
@@ -47,6 +48,7 @@ config X86
47 select HAVE_KERNEL_GZIP 48 select HAVE_KERNEL_GZIP
48 select HAVE_KERNEL_BZIP2 49 select HAVE_KERNEL_BZIP2
49 select HAVE_KERNEL_LZMA 50 select HAVE_KERNEL_LZMA
51 select HAVE_ARCH_KMEMCHECK
50 52
51config OUTPUT_FORMAT 53config OUTPUT_FORMAT
52 string 54 string
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index edbd0ca62067..1b68659c41b4 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -81,6 +81,11 @@ ifdef CONFIG_CC_STACKPROTECTOR
81 endif 81 endif
82endif 82endif
83 83
84# Don't unroll struct assignments with kmemcheck enabled
85ifeq ($(CONFIG_KMEMCHECK),y)
86 KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
87endif
88
84# Stackpointer is addressed different for 32 bit and 64 bit x86 89# Stackpointer is addressed different for 32 bit and 64 bit x86
85sp-$(CONFIG_X86_32) := esp 90sp-$(CONFIG_X86_32) := esp
86sp-$(CONFIG_X86_64) := rsp 91sp-$(CONFIG_X86_64) := rsp
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 8d16ada25048..ec749c2bfdd7 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -70,6 +70,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
70 $(call cc-option, -mpreferred-stack-boundary=2) 70 $(call cc-option, -mpreferred-stack-boundary=2)
71KBUILD_CFLAGS += $(call cc-option, -m32) 71KBUILD_CFLAGS += $(call cc-option, -m32)
72KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ 72KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
73GCOV_PROFILE := n
73 74
74$(obj)/bzImage: asflags-y := $(SVGA_MODE) 75$(obj)/bzImage: asflags-y := $(SVGA_MODE)
75 76
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
index 507793739ea5..1dfbf64e52a2 100644
--- a/arch/x86/boot/bioscall.S
+++ b/arch/x86/boot/bioscall.S
@@ -13,7 +13,7 @@
13 * touching registers they shouldn't be. 13 * touching registers they shouldn't be.
14 */ 14 */
15 15
16 .code16 16 .code16gcc
17 .text 17 .text
18 .globl intcall 18 .globl intcall
19 .type intcall, @function 19 .type intcall, @function
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 49c8a4c37d7c..e2ff504b4ddc 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -15,6 +15,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
15KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) 15KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
16 16
17KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ 17KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
18GCOV_PROFILE := n
18 19
19LDFLAGS := -m elf_$(UTS_MACHINE) 20LDFLAGS := -m elf_$(UTS_MACHINE)
20LDFLAGS_vmlinux := -T 21LDFLAGS_vmlinux := -T
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 262e02820049..bdf96f119f06 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -29,9 +29,11 @@ extern void amd_iommu_detect(void);
29extern irqreturn_t amd_iommu_int_handler(int irq, void *data); 29extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
30extern void amd_iommu_flush_all_domains(void); 30extern void amd_iommu_flush_all_domains(void);
31extern void amd_iommu_flush_all_devices(void); 31extern void amd_iommu_flush_all_devices(void);
32extern void amd_iommu_shutdown(void);
32#else 33#else
33static inline int amd_iommu_init(void) { return -ENODEV; } 34static inline int amd_iommu_init(void) { return -ENODEV; }
34static inline void amd_iommu_detect(void) { } 35static inline void amd_iommu_detect(void) { }
36static inline void amd_iommu_shutdown(void) { }
35#endif 37#endif
36 38
37#endif /* _ASM_X86_AMD_IOMMU_H */ 39#endif /* _ASM_X86_AMD_IOMMU_H */
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 8cb9c814e120..2503d4e64c2a 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -257,7 +257,7 @@ typedef struct {
257 257
258/** 258/**
259 * atomic64_read - read atomic64 variable 259 * atomic64_read - read atomic64 variable
260 * @v: pointer of type atomic64_t 260 * @ptr: pointer of type atomic64_t
261 * 261 *
262 * Atomically reads the value of @v. 262 * Atomically reads the value of @v.
263 * Doesn't imply a read memory barrier. 263 * Doesn't imply a read memory barrier.
@@ -294,7 +294,6 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
294 * atomic64_xchg - xchg atomic64 variable 294 * atomic64_xchg - xchg atomic64 variable
295 * @ptr: pointer to type atomic64_t 295 * @ptr: pointer to type atomic64_t
296 * @new_val: value to assign 296 * @new_val: value to assign
297 * @old_val: old value that was there
298 * 297 *
299 * Atomically xchgs the value of @ptr to @new_val and returns 298 * Atomically xchgs the value of @ptr to @new_val and returns
300 * the old value. 299 * the old value.
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index c45f415ce315..c993e9e0fed4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -1,7 +1,6 @@
1#ifndef _ASM_X86_DESC_H 1#ifndef _ASM_X86_DESC_H
2#define _ASM_X86_DESC_H 2#define _ASM_X86_DESC_H
3 3
4#ifndef __ASSEMBLY__
5#include <asm/desc_defs.h> 4#include <asm/desc_defs.h>
6#include <asm/ldt.h> 5#include <asm/ldt.h>
7#include <asm/mmu.h> 6#include <asm/mmu.h>
@@ -380,29 +379,4 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
380 _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); 379 _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
381} 380}
382 381
383#else
384/*
385 * GET_DESC_BASE reads the descriptor base of the specified segment.
386 *
387 * Args:
388 * idx - descriptor index
389 * gdt - GDT pointer
390 * base - 32bit register to which the base will be written
391 * lo_w - lo word of the "base" register
392 * lo_b - lo byte of the "base" register
393 * hi_b - hi byte of the low word of the "base" register
394 *
395 * Example:
396 * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
397 * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
398 */
399#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
400 movb idx * 8 + 4(gdt), lo_b; \
401 movb idx * 8 + 7(gdt), hi_b; \
402 shll $16, base; \
403 movw idx * 8 + 2(gdt), lo_w;
404
405
406#endif /* __ASSEMBLY__ */
407
408#endif /* _ASM_X86_DESC_H */ 382#endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index f82fdc412c64..1c3f9435f1c9 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -6,6 +6,7 @@
6 * Documentation/DMA-API.txt for documentation. 6 * Documentation/DMA-API.txt for documentation.
7 */ 7 */
8 8
9#include <linux/kmemcheck.h>
9#include <linux/scatterlist.h> 10#include <linux/scatterlist.h>
10#include <linux/dma-debug.h> 11#include <linux/dma-debug.h>
11#include <linux/dma-attrs.h> 12#include <linux/dma-attrs.h>
@@ -32,6 +33,8 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
32#endif 33#endif
33} 34}
34 35
36#include <asm-generic/dma-mapping-common.h>
37
35/* Make sure we keep the same behaviour */ 38/* Make sure we keep the same behaviour */
36static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 39static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
37{ 40{
@@ -52,171 +55,6 @@ extern int dma_set_mask(struct device *dev, u64 mask);
52extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, 55extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
53 dma_addr_t *dma_addr, gfp_t flag); 56 dma_addr_t *dma_addr, gfp_t flag);
54 57
55static inline dma_addr_t
56dma_map_single(struct device *hwdev, void *ptr, size_t size,
57 enum dma_data_direction dir)
58{
59 struct dma_map_ops *ops = get_dma_ops(hwdev);
60 dma_addr_t addr;
61
62 BUG_ON(!valid_dma_direction(dir));
63 addr = ops->map_page(hwdev, virt_to_page(ptr),
64 (unsigned long)ptr & ~PAGE_MASK, size,
65 dir, NULL);
66 debug_dma_map_page(hwdev, virt_to_page(ptr),
67 (unsigned long)ptr & ~PAGE_MASK, size,
68 dir, addr, true);
69 return addr;
70}
71
72static inline void
73dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
74 enum dma_data_direction dir)
75{
76 struct dma_map_ops *ops = get_dma_ops(dev);
77
78 BUG_ON(!valid_dma_direction(dir));
79 if (ops->unmap_page)
80 ops->unmap_page(dev, addr, size, dir, NULL);
81 debug_dma_unmap_page(dev, addr, size, dir, true);
82}
83
84static inline int
85dma_map_sg(struct device *hwdev, struct scatterlist *sg,
86 int nents, enum dma_data_direction dir)
87{
88 struct dma_map_ops *ops = get_dma_ops(hwdev);
89 int ents;
90
91 BUG_ON(!valid_dma_direction(dir));
92 ents = ops->map_sg(hwdev, sg, nents, dir, NULL);
93 debug_dma_map_sg(hwdev, sg, nents, ents, dir);
94
95 return ents;
96}
97
98static inline void
99dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
100 enum dma_data_direction dir)
101{
102 struct dma_map_ops *ops = get_dma_ops(hwdev);
103
104 BUG_ON(!valid_dma_direction(dir));
105 debug_dma_unmap_sg(hwdev, sg, nents, dir);
106 if (ops->unmap_sg)
107 ops->unmap_sg(hwdev, sg, nents, dir, NULL);
108}
109
110static inline void
111dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
112 size_t size, enum dma_data_direction dir)
113{
114 struct dma_map_ops *ops = get_dma_ops(hwdev);
115
116 BUG_ON(!valid_dma_direction(dir));
117 if (ops->sync_single_for_cpu)
118 ops->sync_single_for_cpu(hwdev, dma_handle, size, dir);
119 debug_dma_sync_single_for_cpu(hwdev, dma_handle, size, dir);
120 flush_write_buffers();
121}
122
123static inline void
124dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
125 size_t size, enum dma_data_direction dir)
126{
127 struct dma_map_ops *ops = get_dma_ops(hwdev);
128
129 BUG_ON(!valid_dma_direction(dir));
130 if (ops->sync_single_for_device)
131 ops->sync_single_for_device(hwdev, dma_handle, size, dir);
132 debug_dma_sync_single_for_device(hwdev, dma_handle, size, dir);
133 flush_write_buffers();
134}
135
136static inline void
137dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
138 unsigned long offset, size_t size,
139 enum dma_data_direction dir)
140{
141 struct dma_map_ops *ops = get_dma_ops(hwdev);
142
143 BUG_ON(!valid_dma_direction(dir));
144 if (ops->sync_single_range_for_cpu)
145 ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
146 size, dir);
147 debug_dma_sync_single_range_for_cpu(hwdev, dma_handle,
148 offset, size, dir);
149 flush_write_buffers();
150}
151
152static inline void
153dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
154 unsigned long offset, size_t size,
155 enum dma_data_direction dir)
156{
157 struct dma_map_ops *ops = get_dma_ops(hwdev);
158
159 BUG_ON(!valid_dma_direction(dir));
160 if (ops->sync_single_range_for_device)
161 ops->sync_single_range_for_device(hwdev, dma_handle,
162 offset, size, dir);
163 debug_dma_sync_single_range_for_device(hwdev, dma_handle,
164 offset, size, dir);
165 flush_write_buffers();
166}
167
168static inline void
169dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
170 int nelems, enum dma_data_direction dir)
171{
172 struct dma_map_ops *ops = get_dma_ops(hwdev);
173
174 BUG_ON(!valid_dma_direction(dir));
175 if (ops->sync_sg_for_cpu)
176 ops->sync_sg_for_cpu(hwdev, sg, nelems, dir);
177 debug_dma_sync_sg_for_cpu(hwdev, sg, nelems, dir);
178 flush_write_buffers();
179}
180
181static inline void
182dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
183 int nelems, enum dma_data_direction dir)
184{
185 struct dma_map_ops *ops = get_dma_ops(hwdev);
186
187 BUG_ON(!valid_dma_direction(dir));
188 if (ops->sync_sg_for_device)
189 ops->sync_sg_for_device(hwdev, sg, nelems, dir);
190 debug_dma_sync_sg_for_device(hwdev, sg, nelems, dir);
191
192 flush_write_buffers();
193}
194
195static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
196 size_t offset, size_t size,
197 enum dma_data_direction dir)
198{
199 struct dma_map_ops *ops = get_dma_ops(dev);
200 dma_addr_t addr;
201
202 BUG_ON(!valid_dma_direction(dir));
203 addr = ops->map_page(dev, page, offset, size, dir, NULL);
204 debug_dma_map_page(dev, page, offset, size, dir, addr, false);
205
206 return addr;
207}
208
209static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
210 size_t size, enum dma_data_direction dir)
211{
212 struct dma_map_ops *ops = get_dma_ops(dev);
213
214 BUG_ON(!valid_dma_direction(dir));
215 if (ops->unmap_page)
216 ops->unmap_page(dev, addr, size, dir, NULL);
217 debug_dma_unmap_page(dev, addr, size, dir, false);
218}
219
220static inline void 58static inline void
221dma_cache_sync(struct device *dev, void *vaddr, size_t size, 59dma_cache_sync(struct device *dev, void *vaddr, size_t size,
222 enum dma_data_direction dir) 60 enum dma_data_direction dir)
diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h
index 5759c165a5cf..9e00a731a7fb 100644
--- a/arch/x86/include/asm/kmap_types.h
+++ b/arch/x86/include/asm/kmap_types.h
@@ -2,28 +2,11 @@
2#define _ASM_X86_KMAP_TYPES_H 2#define _ASM_X86_KMAP_TYPES_H
3 3
4#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) 4#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM)
5# define D(n) __KM_FENCE_##n , 5#define __WITH_KM_FENCE
6#else
7# define D(n)
8#endif 6#endif
9 7
10enum km_type { 8#include <asm-generic/kmap_types.h>
11D(0) KM_BOUNCE_READ,
12D(1) KM_SKB_SUNRPC_DATA,
13D(2) KM_SKB_DATA_SOFTIRQ,
14D(3) KM_USER0,
15D(4) KM_USER1,
16D(5) KM_BIO_SRC_IRQ,
17D(6) KM_BIO_DST_IRQ,
18D(7) KM_PTE0,
19D(8) KM_PTE1,
20D(9) KM_IRQ0,
21D(10) KM_IRQ1,
22D(11) KM_SOFTIRQ0,
23D(12) KM_SOFTIRQ1,
24D(13) KM_TYPE_NR
25};
26 9
27#undef D 10#undef __WITH_KM_FENCE
28 11
29#endif /* _ASM_X86_KMAP_TYPES_H */ 12#endif /* _ASM_X86_KMAP_TYPES_H */
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h
new file mode 100644
index 000000000000..ed01518f297e
--- /dev/null
+++ b/arch/x86/include/asm/kmemcheck.h
@@ -0,0 +1,42 @@
1#ifndef ASM_X86_KMEMCHECK_H
2#define ASM_X86_KMEMCHECK_H
3
4#include <linux/types.h>
5#include <asm/ptrace.h>
6
7#ifdef CONFIG_KMEMCHECK
8bool kmemcheck_active(struct pt_regs *regs);
9
10void kmemcheck_show(struct pt_regs *regs);
11void kmemcheck_hide(struct pt_regs *regs);
12
13bool kmemcheck_fault(struct pt_regs *regs,
14 unsigned long address, unsigned long error_code);
15bool kmemcheck_trap(struct pt_regs *regs);
16#else
17static inline bool kmemcheck_active(struct pt_regs *regs)
18{
19 return false;
20}
21
22static inline void kmemcheck_show(struct pt_regs *regs)
23{
24}
25
26static inline void kmemcheck_hide(struct pt_regs *regs)
27{
28}
29
30static inline bool kmemcheck_fault(struct pt_regs *regs,
31 unsigned long address, unsigned long error_code)
32{
33 return false;
34}
35
36static inline bool kmemcheck_trap(struct pt_regs *regs)
37{
38 return false;
39}
40#endif /* CONFIG_KMEMCHECK */
41
42#endif
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 540a466e50f5..5cdd8d100ec9 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -102,15 +102,39 @@ struct mce_log {
102 102
103#ifdef __KERNEL__ 103#ifdef __KERNEL__
104 104
105#include <linux/percpu.h>
106#include <linux/init.h>
107#include <asm/atomic.h>
108
105extern int mce_disabled; 109extern int mce_disabled;
110extern int mce_p5_enabled;
106 111
107#include <asm/atomic.h> 112#ifdef CONFIG_X86_MCE
108#include <linux/percpu.h> 113void mcheck_init(struct cpuinfo_x86 *c);
114#else
115static inline void mcheck_init(struct cpuinfo_x86 *c) {}
116#endif
117
118#ifdef CONFIG_X86_OLD_MCE
119extern int nr_mce_banks;
120void amd_mcheck_init(struct cpuinfo_x86 *c);
121void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
122void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
123#endif
124
125#ifdef CONFIG_X86_ANCIENT_MCE
126void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
127void winchip_mcheck_init(struct cpuinfo_x86 *c);
128static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
129#else
130static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
131static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
132static inline void enable_p5_mce(void) {}
133#endif
109 134
110void mce_setup(struct mce *m); 135void mce_setup(struct mce *m);
111void mce_log(struct mce *m); 136void mce_log(struct mce *m);
112DECLARE_PER_CPU(struct sys_device, mce_dev); 137DECLARE_PER_CPU(struct sys_device, mce_dev);
113extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
114 138
115/* 139/*
116 * To support more than 128 would need to escape the predefined 140 * To support more than 128 would need to escape the predefined
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
145DECLARE_PER_CPU(unsigned, mce_exception_count); 169DECLARE_PER_CPU(unsigned, mce_exception_count);
146DECLARE_PER_CPU(unsigned, mce_poll_count); 170DECLARE_PER_CPU(unsigned, mce_poll_count);
147 171
148void mce_log_therm_throt_event(__u64 status);
149
150extern atomic_t mce_entry; 172extern atomic_t mce_entry;
151 173
152void do_machine_check(struct pt_regs *, long);
153
154typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); 174typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
155DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); 175DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
156 176
@@ -167,13 +187,32 @@ void mce_notify_process(void);
167DECLARE_PER_CPU(struct mce, injectm); 187DECLARE_PER_CPU(struct mce, injectm);
168extern struct file_operations mce_chrdev_ops; 188extern struct file_operations mce_chrdev_ops;
169 189
170#ifdef CONFIG_X86_MCE 190/*
171void mcheck_init(struct cpuinfo_x86 *c); 191 * Exception handler
172#else 192 */
173#define mcheck_init(c) do { } while (0) 193
174#endif 194/* Call the installed machine check handler for this CPU setup. */
195extern void (*machine_check_vector)(struct pt_regs *, long error_code);
196void do_machine_check(struct pt_regs *, long);
197
198/*
199 * Threshold handler
200 */
175 201
176extern void (*mce_threshold_vector)(void); 202extern void (*mce_threshold_vector)(void);
203extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
204
205/*
206 * Thermal handler
207 */
208
209void intel_init_thermal(struct cpuinfo_x86 *c);
210
211#ifdef CONFIG_X86_NEW_MCE
212void mce_log_therm_throt_event(__u64 status);
213#else
214static inline void mce_log_therm_throt_event(__u64 status) {}
215#endif
177 216
178#endif /* __KERNEL__ */ 217#endif /* __KERNEL__ */
179#endif /* _ASM_X86_MCE_H */ 218#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 22603764e7db..48ad9d29484a 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -3,13 +3,10 @@
3 3
4#include <asm/msr-index.h> 4#include <asm/msr-index.h>
5 5
6#ifndef __ASSEMBLY__
7# include <linux/types.h>
8#endif
9
10#ifdef __KERNEL__ 6#ifdef __KERNEL__
11#ifndef __ASSEMBLY__ 7#ifndef __ASSEMBLY__
12 8
9#include <linux/types.h>
13#include <asm/asm.h> 10#include <asm/asm.h>
14#include <asm/errno.h> 11#include <asm/errno.h>
15#include <asm/cpumask.h> 12#include <asm/cpumask.h>
@@ -264,6 +261,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
264#endif /* CONFIG_SMP */ 261#endif /* CONFIG_SMP */
265#endif /* __ASSEMBLY__ */ 262#endif /* __ASSEMBLY__ */
266#endif /* __KERNEL__ */ 263#endif /* __KERNEL__ */
267
268
269#endif /* _ASM_X86_MSR_H */ 264#endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 18ef7ebf2631..3cc06e3fceb8 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -317,6 +317,11 @@ static inline int pte_present(pte_t a)
317 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); 317 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
318} 318}
319 319
320static inline int pte_hidden(pte_t pte)
321{
322 return pte_flags(pte) & _PAGE_HIDDEN;
323}
324
320static inline int pmd_present(pmd_t pmd) 325static inline int pmd_present(pmd_t pmd)
321{ 326{
322 return pmd_flags(pmd) & _PAGE_PRESENT; 327 return pmd_flags(pmd) & _PAGE_PRESENT;
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 4d258ad76a0f..54cb697f4900 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -18,7 +18,7 @@
18#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ 18#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
19#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ 19#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
20#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ 20#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
21#define _PAGE_BIT_UNUSED3 11 21#define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */
22#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ 22#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
23#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 23#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
24#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 24#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
@@ -41,13 +41,18 @@
41#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) 41#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
42#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) 42#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
43#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) 43#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
44#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
45#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) 44#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
46#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) 45#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
47#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) 46#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
48#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) 47#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
49#define __HAVE_ARCH_PTE_SPECIAL 48#define __HAVE_ARCH_PTE_SPECIAL
50 49
50#ifdef CONFIG_KMEMCHECK
51#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
52#else
53#define _PAGE_HIDDEN (_AT(pteval_t, 0))
54#endif
55
51#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 56#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
52#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) 57#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
53#else 58#else
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 0e0e3ba827f7..c86f452256de 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -177,10 +177,18 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
177 * No 3D Now! 177 * No 3D Now!
178 */ 178 */
179 179
180#ifndef CONFIG_KMEMCHECK
180#define memcpy(t, f, n) \ 181#define memcpy(t, f, n) \
181 (__builtin_constant_p((n)) \ 182 (__builtin_constant_p((n)) \
182 ? __constant_memcpy((t), (f), (n)) \ 183 ? __constant_memcpy((t), (f), (n)) \
183 : __memcpy((t), (f), (n))) 184 : __memcpy((t), (f), (n)))
185#else
186/*
187 * kmemcheck becomes very happy if we use the REP instructions unconditionally,
188 * because it means that we know both memory operands in advance.
189 */
190#define memcpy(t, f, n) __memcpy((t), (f), (n))
191#endif
184 192
185#endif 193#endif
186 194
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 2afe164bf1e6..19e2c468fc2c 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -27,6 +27,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
27 function. */ 27 function. */
28 28
29#define __HAVE_ARCH_MEMCPY 1 29#define __HAVE_ARCH_MEMCPY 1
30#ifndef CONFIG_KMEMCHECK
30#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 31#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
31extern void *memcpy(void *to, const void *from, size_t len); 32extern void *memcpy(void *to, const void *from, size_t len);
32#else 33#else
@@ -42,6 +43,13 @@ extern void *__memcpy(void *to, const void *from, size_t len);
42 __ret; \ 43 __ret; \
43}) 44})
44#endif 45#endif
46#else
47/*
48 * kmemcheck becomes very happy if we use the REP instructions unconditionally,
49 * because it means that we know both memory operands in advance.
50 */
51#define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
52#endif
45 53
46#define __HAVE_ARCH_MEMSET 54#define __HAVE_ARCH_MEMSET
47void *memset(void *s, int c, size_t n); 55void *memset(void *s, int c, size_t n);
diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h
deleted file mode 100644
index c62349ee7860..000000000000
--- a/arch/x86/include/asm/therm_throt.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef _ASM_X86_THERM_THROT_H
2#define _ASM_X86_THERM_THROT_H
3
4#include <asm/atomic.h>
5
6extern atomic_t therm_throt_en;
7int therm_throt_process(int curr);
8
9#endif /* _ASM_X86_THERM_THROT_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 602c769fc98c..b0783520988b 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -154,9 +154,9 @@ struct thread_info {
154 154
155/* thread information allocation */ 155/* thread information allocation */
156#ifdef CONFIG_DEBUG_STACK_USAGE 156#ifdef CONFIG_DEBUG_STACK_USAGE
157#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) 157#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
158#else 158#else
159#define THREAD_FLAGS GFP_KERNEL 159#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK)
160#endif 160#endif
161 161
162#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR 162#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
index b5c9d45c981f..1375cfc93960 100644
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -4,9 +4,7 @@
4#include <asm/processor.h> 4#include <asm/processor.h>
5#include <asm/tsc.h> 5#include <asm/tsc.h>
6 6
7/* The PIT ticks at this frequency (in HZ): */ 7/* Assume we use the PIT time source for the clock tick */
8#define PIT_TICK_RATE 1193182
9
10#define CLOCK_TICK_RATE PIT_TICK_RATE 8#define CLOCK_TICK_RATE PIT_TICK_RATE
11 9
12#define ARCH_HAS_READ_CURRENT_TIMER 10#define ARCH_HAS_READ_CURRENT_TIMER
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 11b3bb86e17b..7fcf6f3dbcc3 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,5 +1,10 @@
1#ifdef CONFIG_KMEMCHECK
2/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
3# include <asm-generic/xor.h>
4#else
1#ifdef CONFIG_X86_32 5#ifdef CONFIG_X86_32
2# include "xor_32.h" 6# include "xor_32.h"
3#else 7#else
4# include "xor_64.h" 8# include "xor_64.h"
5#endif 9#endif
10#endif
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f3477bb84566..6c327b852e23 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -24,6 +24,8 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
24CFLAGS_hpet.o := $(nostackp) 24CFLAGS_hpet.o := $(nostackp)
25CFLAGS_tsc.o := $(nostackp) 25CFLAGS_tsc.o := $(nostackp)
26CFLAGS_paravirt.o := $(nostackp) 26CFLAGS_paravirt.o := $(nostackp)
27GCOV_PROFILE_vsyscall_64.o := n
28GCOV_PROFILE_hpet.o := n
27 29
28obj-y := process_$(BITS).o signal.o entry_$(BITS).o 30obj-y := process_$(BITS).o signal.o entry_$(BITS).o
29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 31obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
index 167bc16ce0e5..6a564ac67ef5 100644
--- a/arch/x86/kernel/acpi/realmode/Makefile
+++ b/arch/x86/kernel/acpi/realmode/Makefile
@@ -42,6 +42,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \
42 $(call cc-option, -mpreferred-stack-boundary=2) 42 $(call cc-option, -mpreferred-stack-boundary=2)
43KBUILD_CFLAGS += $(call cc-option, -m32) 43KBUILD_CFLAGS += $(call cc-option, -m32)
44KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ 44KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
45GCOV_PROFILE := n
45 46
46WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) 47WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y))
47 48
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 1c60554537c3..9372f0406ad4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -434,6 +434,16 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
434 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 434 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
435} 435}
436 436
437/* Flush the whole IO/TLB for a given protection domain - including PDE */
438static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
439{
440 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
441
442 INC_STATS_COUNTER(domain_flush_single);
443
444 iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
445}
446
437/* 447/*
438 * This function is used to flush the IO/TLB for a given protection domain 448 * This function is used to flush the IO/TLB for a given protection domain
439 * on every IOMMU in the system 449 * on every IOMMU in the system
@@ -1078,7 +1088,13 @@ static void attach_device(struct amd_iommu *iommu,
1078 amd_iommu_pd_table[devid] = domain; 1088 amd_iommu_pd_table[devid] = domain;
1079 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1089 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1080 1090
1091 /*
1092 * We might boot into a crash-kernel here. The crashed kernel
1093 * left the caches in the IOMMU dirty. So we have to flush
1094 * here to evict all dirty stuff.
1095 */
1081 iommu_queue_inv_dev_entry(iommu, devid); 1096 iommu_queue_inv_dev_entry(iommu, devid);
1097 iommu_flush_tlb_pde(iommu, domain->id);
1082} 1098}
1083 1099
1084/* 1100/*
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 238989ec077d..10b2accd12ea 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -260,6 +260,14 @@ static void iommu_enable(struct amd_iommu *iommu)
260 260
261static void iommu_disable(struct amd_iommu *iommu) 261static void iommu_disable(struct amd_iommu *iommu)
262{ 262{
263 /* Disable command buffer */
264 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
265
266 /* Disable event logging and event interrupts */
267 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
268 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
269
270 /* Disable IOMMU hardware itself */
263 iommu_feature_disable(iommu, CONTROL_IOMMU_EN); 271 iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
264} 272}
265 273
@@ -478,6 +486,10 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu)
478 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 486 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
479 &entry, sizeof(entry)); 487 &entry, sizeof(entry));
480 488
489 /* set head and tail to zero manually */
490 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
491 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
492
481 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 493 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
482} 494}
483 495
@@ -1042,6 +1054,7 @@ static void enable_iommus(void)
1042 struct amd_iommu *iommu; 1054 struct amd_iommu *iommu;
1043 1055
1044 for_each_iommu(iommu) { 1056 for_each_iommu(iommu) {
1057 iommu_disable(iommu);
1045 iommu_set_device_table(iommu); 1058 iommu_set_device_table(iommu);
1046 iommu_enable_command_buffer(iommu); 1059 iommu_enable_command_buffer(iommu);
1047 iommu_enable_event_buffer(iommu); 1060 iommu_enable_event_buffer(iommu);
@@ -1066,12 +1079,6 @@ static void disable_iommus(void)
1066 1079
1067static int amd_iommu_resume(struct sys_device *dev) 1080static int amd_iommu_resume(struct sys_device *dev)
1068{ 1081{
1069 /*
1070 * Disable IOMMUs before reprogramming the hardware registers.
1071 * IOMMU is still enabled from the resume kernel.
1072 */
1073 disable_iommus();
1074
1075 /* re-load the hardware */ 1082 /* re-load the hardware */
1076 enable_iommus(); 1083 enable_iommus();
1077 1084
@@ -1079,8 +1086,8 @@ static int amd_iommu_resume(struct sys_device *dev)
1079 * we have to flush after the IOMMUs are enabled because a 1086 * we have to flush after the IOMMUs are enabled because a
1080 * disabled IOMMU will never execute the commands we send 1087 * disabled IOMMU will never execute the commands we send
1081 */ 1088 */
1082 amd_iommu_flush_all_domains();
1083 amd_iommu_flush_all_devices(); 1089 amd_iommu_flush_all_devices();
1090 amd_iommu_flush_all_domains();
1084 1091
1085 return 0; 1092 return 0;
1086} 1093}
@@ -1273,6 +1280,11 @@ free:
1273 goto out; 1280 goto out;
1274} 1281}
1275 1282
1283void amd_iommu_shutdown(void)
1284{
1285 disable_iommus();
1286}
1287
1276/**************************************************************************** 1288/****************************************************************************
1277 * 1289 *
1278 * Early detect code. This code runs at IOMMU detection time in the DMA 1290 * Early detect code. This code runs at IOMMU detection time in the DMA
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ef8d9290c7ea..b7a79207295e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -462,7 +462,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
462static void 462static void
463__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 463__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
464{ 464{
465 union entry_union eu; 465 union entry_union eu = {{0, 0}};
466
466 eu.entry = e; 467 eu.entry = e;
467 io_apic_write(apic, 0x11 + 2*pin, eu.w2); 468 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
468 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 469 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
@@ -2003,7 +2004,9 @@ void disable_IO_APIC(void)
2003 /* 2004 /*
2004 * Use virtual wire A mode when interrupt remapping is enabled. 2005 * Use virtual wire A mode when interrupt remapping is enabled.
2005 */ 2006 */
2006 disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); 2007 if (cpu_has_apic)
2008 disconnect_bsp_APIC(!intr_remapping_enabled &&
2009 ioapic_i8259.pin != -1);
2007} 2010}
2008 2011
2009#ifdef CONFIG_X86_32 2012#ifdef CONFIG_X86_32
@@ -3567,7 +3570,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3567 3570
3568#endif /* CONFIG_SMP */ 3571#endif /* CONFIG_SMP */
3569 3572
3570struct irq_chip dmar_msi_type = { 3573static struct irq_chip dmar_msi_type = {
3571 .name = "DMAR_MSI", 3574 .name = "DMAR_MSI",
3572 .unmask = dmar_msi_unmask, 3575 .unmask = dmar_msi_unmask,
3573 .mask = dmar_msi_mask, 3576 .mask = dmar_msi_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 440a8bccd91a..0c0182cc947d 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -20,23 +20,12 @@
20#include <asm/apic.h> 20#include <asm/apic.h>
21#include <asm/setup.h> 21#include <asm/setup.h>
22 22
23#include <linux/threads.h>
24#include <linux/cpumask.h>
25#include <asm/mpspec.h>
26#include <asm/fixmap.h>
27#include <asm/apicdef.h>
28#include <linux/kernel.h>
29#include <linux/string.h>
30#include <linux/smp.h> 23#include <linux/smp.h>
31#include <linux/init.h>
32#include <asm/ipi.h> 24#include <asm/ipi.h>
33 25
34#include <linux/smp.h>
35#include <linux/init.h>
36#include <linux/interrupt.h> 26#include <linux/interrupt.h>
37#include <asm/acpi.h> 27#include <asm/acpi.h>
38#include <asm/e820.h> 28#include <asm/e820.h>
39#include <asm/setup.h>
40 29
41#ifdef CONFIG_HOTPLUG_CPU 30#ifdef CONFIG_HOTPLUG_CPU
42#define DEFAULT_SEND_IPI (1) 31#define DEFAULT_SEND_IPI (1)
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 344eee4ac0a4..eafdfbd1ea95 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -44,7 +44,6 @@
44#include <asm/ipi.h> 44#include <asm/ipi.h>
45#include <linux/kernel.h> 45#include <linux/kernel.h>
46#include <linux/string.h> 46#include <linux/string.h>
47#include <linux/init.h>
48#include <linux/gfp.h> 47#include <linux/gfp.h>
49#include <linux/smp.h> 48#include <linux/smp.h>
50 49
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index ef0ae207a7c8..096d19aea2f7 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -463,7 +463,7 @@ static void uv_heartbeat(unsigned long ignored)
463 uv_set_scir_bits(bits); 463 uv_set_scir_bits(bits);
464 464
465 /* enable next timer period */ 465 /* enable next timer period */
466 mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); 466 mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
467} 467}
468 468
469static void __cpuinit uv_heartbeat_enable(int cpu) 469static void __cpuinit uv_heartbeat_enable(int cpu)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3ffdcfa9abdf..6b26d4deada0 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -108,7 +108,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
108 /* data */ 108 /* data */
109 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 109 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
110 110
111 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 111 [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } },
112 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, 112 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
113 GDT_STACK_CANARY_INIT 113 GDT_STACK_CANARY_INIT
114#endif 114#endif
@@ -487,7 +487,6 @@ out:
487static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) 487static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
488{ 488{
489 char *v = c->x86_vendor_id; 489 char *v = c->x86_vendor_id;
490 static int printed;
491 int i; 490 int i;
492 491
493 for (i = 0; i < X86_VENDOR_NUM; i++) { 492 for (i = 0; i < X86_VENDOR_NUM; i++) {
@@ -504,13 +503,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
504 } 503 }
505 } 504 }
506 505
507 if (!printed) { 506 printk_once(KERN_ERR
508 printed++; 507 "CPU: vendor_id '%s' unknown, using generic init.\n" \
509 printk(KERN_ERR 508 "CPU: Your system may be unstable.\n", v);
510 "CPU: vendor_id '%s' unknown, using generic init.\n", v);
511
512 printk(KERN_ERR "CPU: Your system may be unstable.\n");
513 }
514 509
515 c->x86_vendor = X86_VENDOR_UNKNOWN; 510 c->x86_vendor = X86_VENDOR_UNKNOWN;
516 this_cpu = &default_cpu; 511 this_cpu = &default_cpu;
@@ -853,6 +848,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
853#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 848#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
854 numa_add_cpu(smp_processor_id()); 849 numa_add_cpu(smp_processor_id());
855#endif 850#endif
851
852 /* Cap the iomem address space to what is addressable on all CPUs */
853 iomem_resource.end &= (1ULL << c->x86_phys_bits) - 1;
856} 854}
857 855
858#ifdef CONFIG_X86_64 856#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index cf52215d9eb1..81cbe64ed6b4 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1,3 +1,4 @@
1
1/* 2/*
2 * (c) 2003-2006 Advanced Micro Devices, Inc. 3 * (c) 2003-2006 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 4 * Your use of this code is subject to the terms and conditions of the
@@ -117,20 +118,17 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
117 u32 i = 0; 118 u32 i = 0;
118 119
119 if (cpu_family == CPU_HW_PSTATE) { 120 if (cpu_family == CPU_HW_PSTATE) {
120 if (data->currpstate == HW_PSTATE_INVALID) { 121 rdmsr(MSR_PSTATE_STATUS, lo, hi);
121 /* read (initial) hw pstate if not yet set */ 122 i = lo & HW_PSTATE_MASK;
122 rdmsr(MSR_PSTATE_STATUS, lo, hi); 123 data->currpstate = i;
123 i = lo & HW_PSTATE_MASK; 124
124 125 /*
125 /* 126 * a workaround for family 11h erratum 311 might cause
126 * a workaround for family 11h erratum 311 might cause 127 * an "out-of-range Pstate if the core is in Pstate-0
127 * an "out-of-range Pstate if the core is in Pstate-0 128 */
128 */ 129 if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
129 if (i >= data->numps) 130 data->currpstate = HW_PSTATE_0;
130 data->currpstate = HW_PSTATE_0; 131
131 else
132 data->currpstate = i;
133 }
134 return 0; 132 return 0;
135 } 133 }
136 do { 134 do {
@@ -510,41 +508,34 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
510 return 0; 508 return 0;
511} 509}
512 510
513static int check_supported_cpu(unsigned int cpu) 511static void check_supported_cpu(void *_rc)
514{ 512{
515 cpumask_t oldmask;
516 u32 eax, ebx, ecx, edx; 513 u32 eax, ebx, ecx, edx;
517 unsigned int rc = 0; 514 int *rc = _rc;
518
519 oldmask = current->cpus_allowed;
520 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
521 515
522 if (smp_processor_id() != cpu) { 516 *rc = -ENODEV;
523 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
524 goto out;
525 }
526 517
527 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) 518 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
528 goto out; 519 return;
529 520
530 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); 521 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
531 if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && 522 if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
532 ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) 523 ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
533 goto out; 524 return;
534 525
535 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { 526 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
536 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || 527 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
537 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { 528 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
538 printk(KERN_INFO PFX 529 printk(KERN_INFO PFX
539 "Processor cpuid %x not supported\n", eax); 530 "Processor cpuid %x not supported\n", eax);
540 goto out; 531 return;
541 } 532 }
542 533
543 eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); 534 eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
544 if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { 535 if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
545 printk(KERN_INFO PFX 536 printk(KERN_INFO PFX
546 "No frequency change capabilities detected\n"); 537 "No frequency change capabilities detected\n");
547 goto out; 538 return;
548 } 539 }
549 540
550 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); 541 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
@@ -552,21 +543,17 @@ static int check_supported_cpu(unsigned int cpu)
552 != P_STATE_TRANSITION_CAPABLE) { 543 != P_STATE_TRANSITION_CAPABLE) {
553 printk(KERN_INFO PFX 544 printk(KERN_INFO PFX
554 "Power state transitions not supported\n"); 545 "Power state transitions not supported\n");
555 goto out; 546 return;
556 } 547 }
557 } else { /* must be a HW Pstate capable processor */ 548 } else { /* must be a HW Pstate capable processor */
558 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); 549 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
559 if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) 550 if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
560 cpu_family = CPU_HW_PSTATE; 551 cpu_family = CPU_HW_PSTATE;
561 else 552 else
562 goto out; 553 return;
563 } 554 }
564 555
565 rc = 1; 556 *rc = 0;
566
567out:
568 set_cpus_allowed_ptr(current, &oldmask);
569 return rc;
570} 557}
571 558
572static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, 559static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
@@ -823,13 +810,14 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
823 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 810 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
824 return; 811 return;
825 812
826 control = data->acpi_data.states[index].control; data->irt = (control 813 control = data->acpi_data.states[index].control;
827 >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> 814 data->irt = (control >> IRT_SHIFT) & IRT_MASK;
828 RVO_SHIFT) & RVO_MASK; data->exttype = (control 815 data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
829 >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; 816 data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
830 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 817 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
831 << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = 818 data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
832 (control >> VST_SHIFT) & VST_MASK; } 819 data->vstable = (control >> VST_SHIFT) & VST_MASK;
820}
833 821
834static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 822static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
835{ 823{
@@ -1046,6 +1034,19 @@ static int get_transition_latency(struct powernow_k8_data *data)
1046 if (cur_latency > max_latency) 1034 if (cur_latency > max_latency)
1047 max_latency = cur_latency; 1035 max_latency = cur_latency;
1048 } 1036 }
1037 if (max_latency == 0) {
1038 /*
1039 * Fam 11h always returns 0 as transition latency.
1040 * This is intended and means "very fast". While cpufreq core
1041 * and governors currently can handle that gracefully, better
1042 * set it to 1 to avoid problems in the future.
1043 * For all others it's a BIOS bug.
1044 */
1045 if (!boot_cpu_data.x86 == 0x11)
1046 printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
1047 "latency\n");
1048 max_latency = 1;
1049 }
1049 /* value in usecs, needs to be in nanoseconds */ 1050 /* value in usecs, needs to be in nanoseconds */
1050 return 1000 * max_latency; 1051 return 1000 * max_latency;
1051} 1052}
@@ -1093,7 +1094,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
1093 freqs.old = find_khz_freq_from_fid(data->currfid); 1094 freqs.old = find_khz_freq_from_fid(data->currfid);
1094 freqs.new = find_khz_freq_from_fid(fid); 1095 freqs.new = find_khz_freq_from_fid(fid);
1095 1096
1096 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1097 for_each_cpu(i, data->available_cores) {
1097 freqs.cpu = i; 1098 freqs.cpu = i;
1098 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1099 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1099 } 1100 }
@@ -1101,7 +1102,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
1101 res = transition_fid_vid(data, fid, vid); 1102 res = transition_fid_vid(data, fid, vid);
1102 freqs.new = find_khz_freq_from_fid(data->currfid); 1103 freqs.new = find_khz_freq_from_fid(data->currfid);
1103 1104
1104 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1105 for_each_cpu(i, data->available_cores) {
1105 freqs.cpu = i; 1106 freqs.cpu = i;
1106 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1107 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1107 } 1108 }
@@ -1126,7 +1127,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
1126 data->currpstate); 1127 data->currpstate);
1127 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1128 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1128 1129
1129 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1130 for_each_cpu(i, data->available_cores) {
1130 freqs.cpu = i; 1131 freqs.cpu = i;
1131 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1132 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1132 } 1133 }
@@ -1134,7 +1135,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
1134 res = transition_pstate(data, pstate); 1135 res = transition_pstate(data, pstate);
1135 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1136 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1136 1137
1137 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1138 for_each_cpu(i, data->available_cores) {
1138 freqs.cpu = i; 1139 freqs.cpu = i;
1139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1140 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1140 } 1141 }
@@ -1235,21 +1236,47 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
1235 return cpufreq_frequency_table_verify(pol, data->powernow_table); 1236 return cpufreq_frequency_table_verify(pol, data->powernow_table);
1236} 1237}
1237 1238
1238static const char ACPI_PSS_BIOS_BUG_MSG[] = 1239struct init_on_cpu {
1239 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" 1240 struct powernow_k8_data *data;
1240 KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; 1241 int rc;
1242};
1243
1244static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
1245{
1246 struct init_on_cpu *init_on_cpu = _init_on_cpu;
1247
1248 if (pending_bit_stuck()) {
1249 printk(KERN_ERR PFX "failing init, change pending bit set\n");
1250 init_on_cpu->rc = -ENODEV;
1251 return;
1252 }
1253
1254 if (query_current_values_with_pending_wait(init_on_cpu->data)) {
1255 init_on_cpu->rc = -ENODEV;
1256 return;
1257 }
1258
1259 if (cpu_family == CPU_OPTERON)
1260 fidvid_msr_init();
1261
1262 init_on_cpu->rc = 0;
1263}
1241 1264
1242/* per CPU init entry point to the driver */ 1265/* per CPU init entry point to the driver */
1243static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) 1266static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1244{ 1267{
1268 static const char ACPI_PSS_BIOS_BUG_MSG[] =
1269 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
1270 KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n";
1245 struct powernow_k8_data *data; 1271 struct powernow_k8_data *data;
1246 cpumask_t oldmask; 1272 struct init_on_cpu init_on_cpu;
1247 int rc; 1273 int rc;
1248 1274
1249 if (!cpu_online(pol->cpu)) 1275 if (!cpu_online(pol->cpu))
1250 return -ENODEV; 1276 return -ENODEV;
1251 1277
1252 if (!check_supported_cpu(pol->cpu)) 1278 smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
1279 if (rc)
1253 return -ENODEV; 1280 return -ENODEV;
1254 1281
1255 data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); 1282 data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
@@ -1289,27 +1316,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1289 pol->cpuinfo.transition_latency = get_transition_latency(data); 1316 pol->cpuinfo.transition_latency = get_transition_latency(data);
1290 1317
1291 /* only run on specific CPU from here on */ 1318 /* only run on specific CPU from here on */
1292 oldmask = current->cpus_allowed; 1319 init_on_cpu.data = data;
1293 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); 1320 smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
1294 1321 &init_on_cpu, 1);
1295 if (smp_processor_id() != pol->cpu) { 1322 rc = init_on_cpu.rc;
1296 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1323 if (rc != 0)
1297 goto err_out_unmask; 1324 goto err_out_exit_acpi;
1298 }
1299
1300 if (pending_bit_stuck()) {
1301 printk(KERN_ERR PFX "failing init, change pending bit set\n");
1302 goto err_out_unmask;
1303 }
1304
1305 if (query_current_values_with_pending_wait(data))
1306 goto err_out_unmask;
1307
1308 if (cpu_family == CPU_OPTERON)
1309 fidvid_msr_init();
1310
1311 /* run on any CPU again */
1312 set_cpus_allowed_ptr(current, &oldmask);
1313 1325
1314 if (cpu_family == CPU_HW_PSTATE) 1326 if (cpu_family == CPU_HW_PSTATE)
1315 cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); 1327 cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
@@ -1346,8 +1358,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1346 1358
1347 return 0; 1359 return 0;
1348 1360
1349err_out_unmask: 1361err_out_exit_acpi:
1350 set_cpus_allowed_ptr(current, &oldmask);
1351 powernow_k8_cpu_exit_acpi(data); 1362 powernow_k8_cpu_exit_acpi(data);
1352 1363
1353err_out: 1364err_out:
@@ -1372,28 +1383,25 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1372 return 0; 1383 return 0;
1373} 1384}
1374 1385
1386static void query_values_on_cpu(void *_err)
1387{
1388 int *err = _err;
1389 struct powernow_k8_data *data = __get_cpu_var(powernow_data);
1390
1391 *err = query_current_values_with_pending_wait(data);
1392}
1393
1375static unsigned int powernowk8_get(unsigned int cpu) 1394static unsigned int powernowk8_get(unsigned int cpu)
1376{ 1395{
1377 struct powernow_k8_data *data; 1396 struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
1378 cpumask_t oldmask = current->cpus_allowed;
1379 unsigned int khz = 0; 1397 unsigned int khz = 0;
1380 unsigned int first; 1398 int err;
1381
1382 first = cpumask_first(cpu_core_mask(cpu));
1383 data = per_cpu(powernow_data, first);
1384 1399
1385 if (!data) 1400 if (!data)
1386 return -EINVAL; 1401 return -EINVAL;
1387 1402
1388 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 1403 smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1389 if (smp_processor_id() != cpu) { 1404 if (err)
1390 printk(KERN_ERR PFX
1391 "limiting to CPU %d failed in powernowk8_get\n", cpu);
1392 set_cpus_allowed_ptr(current, &oldmask);
1393 return 0;
1394 }
1395
1396 if (query_current_values_with_pending_wait(data))
1397 goto out; 1405 goto out;
1398 1406
1399 if (cpu_family == CPU_HW_PSTATE) 1407 if (cpu_family == CPU_HW_PSTATE)
@@ -1404,7 +1412,6 @@ static unsigned int powernowk8_get(unsigned int cpu)
1404 1412
1405 1413
1406out: 1414out:
1407 set_cpus_allowed_ptr(current, &oldmask);
1408 return khz; 1415 return khz;
1409} 1416}
1410 1417
@@ -1430,7 +1437,9 @@ static int __cpuinit powernowk8_init(void)
1430 unsigned int i, supported_cpus = 0; 1437 unsigned int i, supported_cpus = 0;
1431 1438
1432 for_each_online_cpu(i) { 1439 for_each_online_cpu(i) {
1433 if (check_supported_cpu(i)) 1440 int rc;
1441 smp_call_function_single(i, check_supported_cpu, &rc, 1);
1442 if (rc == 0)
1434 supported_cpus++; 1443 supported_cpus++;
1435 } 1444 }
1436 1445
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index 6c6698feade1..c9c1190b5e1f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -223,14 +223,3 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
223 223
224static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); 224static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
225static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); 225static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
226
227#ifdef CONFIG_SMP
228static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
229{
230}
231#else
232static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
233{
234 cpu_set(0, cpu_sharedcore_mask[0]);
235}
236#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 55c831ed71ce..8d672ef162ce 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -323,14 +323,8 @@ static unsigned int get_cur_freq(unsigned int cpu)
323{ 323{
324 unsigned l, h; 324 unsigned l, h;
325 unsigned clock_freq; 325 unsigned clock_freq;
326 cpumask_t saved_mask;
327 326
328 saved_mask = current->cpus_allowed; 327 rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h);
329 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
330 if (smp_processor_id() != cpu)
331 return 0;
332
333 rdmsr(MSR_IA32_PERF_STATUS, l, h);
334 clock_freq = extract_clock(l, cpu, 0); 328 clock_freq = extract_clock(l, cpu, 0);
335 329
336 if (unlikely(clock_freq == 0)) { 330 if (unlikely(clock_freq == 0)) {
@@ -340,11 +334,9 @@ static unsigned int get_cur_freq(unsigned int cpu)
340 * P-state transition (like TM2). Get the last freq set 334 * P-state transition (like TM2). Get the last freq set
341 * in PERF_CTL. 335 * in PERF_CTL.
342 */ 336 */
343 rdmsr(MSR_IA32_PERF_CTL, l, h); 337 rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h);
344 clock_freq = extract_clock(l, cpu, 1); 338 clock_freq = extract_clock(l, cpu, 1);
345 } 339 }
346
347 set_cpus_allowed_ptr(current, &saved_mask);
348 return clock_freq; 340 return clock_freq;
349} 341}
350 342
@@ -467,15 +459,10 @@ static int centrino_target (struct cpufreq_policy *policy,
467 struct cpufreq_freqs freqs; 459 struct cpufreq_freqs freqs;
468 int retval = 0; 460 int retval = 0;
469 unsigned int j, k, first_cpu, tmp; 461 unsigned int j, k, first_cpu, tmp;
470 cpumask_var_t saved_mask, covered_cpus; 462 cpumask_var_t covered_cpus;
471 463
472 if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) 464 if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)))
473 return -ENOMEM;
474 if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
475 free_cpumask_var(saved_mask);
476 return -ENOMEM; 465 return -ENOMEM;
477 }
478 cpumask_copy(saved_mask, &current->cpus_allowed);
479 466
480 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { 467 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
481 retval = -ENODEV; 468 retval = -ENODEV;
@@ -493,7 +480,7 @@ static int centrino_target (struct cpufreq_policy *policy,
493 480
494 first_cpu = 1; 481 first_cpu = 1;
495 for_each_cpu(j, policy->cpus) { 482 for_each_cpu(j, policy->cpus) {
496 const struct cpumask *mask; 483 int good_cpu;
497 484
498 /* cpufreq holds the hotplug lock, so we are safe here */ 485 /* cpufreq holds the hotplug lock, so we are safe here */
499 if (!cpu_online(j)) 486 if (!cpu_online(j))
@@ -504,32 +491,30 @@ static int centrino_target (struct cpufreq_policy *policy,
504 * Make sure we are running on CPU that wants to change freq 491 * Make sure we are running on CPU that wants to change freq
505 */ 492 */
506 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) 493 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
507 mask = policy->cpus; 494 good_cpu = cpumask_any_and(policy->cpus,
495 cpu_online_mask);
508 else 496 else
509 mask = cpumask_of(j); 497 good_cpu = j;
510 498
511 set_cpus_allowed_ptr(current, mask); 499 if (good_cpu >= nr_cpu_ids) {
512 preempt_disable();
513 if (unlikely(!cpu_isset(smp_processor_id(), *mask))) {
514 dprintk("couldn't limit to CPUs in this domain\n"); 500 dprintk("couldn't limit to CPUs in this domain\n");
515 retval = -EAGAIN; 501 retval = -EAGAIN;
516 if (first_cpu) { 502 if (first_cpu) {
517 /* We haven't started the transition yet. */ 503 /* We haven't started the transition yet. */
518 goto migrate_end; 504 goto out;
519 } 505 }
520 preempt_enable();
521 break; 506 break;
522 } 507 }
523 508
524 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; 509 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
525 510
526 if (first_cpu) { 511 if (first_cpu) {
527 rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); 512 rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
528 if (msr == (oldmsr & 0xffff)) { 513 if (msr == (oldmsr & 0xffff)) {
529 dprintk("no change needed - msr was and needs " 514 dprintk("no change needed - msr was and needs "
530 "to be %x\n", oldmsr); 515 "to be %x\n", oldmsr);
531 retval = 0; 516 retval = 0;
532 goto migrate_end; 517 goto out;
533 } 518 }
534 519
535 freqs.old = extract_clock(oldmsr, cpu, 0); 520 freqs.old = extract_clock(oldmsr, cpu, 0);
@@ -553,14 +538,11 @@ static int centrino_target (struct cpufreq_policy *policy,
553 oldmsr |= msr; 538 oldmsr |= msr;
554 } 539 }
555 540
556 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); 541 wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h);
557 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { 542 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
558 preempt_enable();
559 break; 543 break;
560 }
561 544
562 cpu_set(j, *covered_cpus); 545 cpumask_set_cpu(j, covered_cpus);
563 preempt_enable();
564 } 546 }
565 547
566 for_each_cpu(k, policy->cpus) { 548 for_each_cpu(k, policy->cpus) {
@@ -578,10 +560,8 @@ static int centrino_target (struct cpufreq_policy *policy,
578 * Best effort undo.. 560 * Best effort undo..
579 */ 561 */
580 562
581 for_each_cpu_mask_nr(j, *covered_cpus) { 563 for_each_cpu(j, covered_cpus)
582 set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); 564 wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h);
583 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
584 }
585 565
586 tmp = freqs.new; 566 tmp = freqs.new;
587 freqs.new = freqs.old; 567 freqs.new = freqs.old;
@@ -593,15 +573,9 @@ static int centrino_target (struct cpufreq_policy *policy,
593 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 573 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
594 } 574 }
595 } 575 }
596 set_cpus_allowed_ptr(current, saved_mask);
597 retval = 0; 576 retval = 0;
598 goto out;
599 577
600migrate_end:
601 preempt_enable();
602 set_cpus_allowed_ptr(current, saved_mask);
603out: 578out:
604 free_cpumask_var(saved_mask);
605 free_cpumask_var(covered_cpus); 579 free_cpumask_var(covered_cpus);
606 return retval; 580 return retval;
607} 581}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 016c1a4fa3fc..6911e91fb4f6 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -89,7 +89,8 @@ static int speedstep_find_register(void)
89 * speedstep_set_state - set the SpeedStep state 89 * speedstep_set_state - set the SpeedStep state
90 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) 90 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
91 * 91 *
92 * Tries to change the SpeedStep state. 92 * Tries to change the SpeedStep state. Can be called from
93 * smp_call_function_single.
93 */ 94 */
94static void speedstep_set_state(unsigned int state) 95static void speedstep_set_state(unsigned int state)
95{ 96{
@@ -143,6 +144,11 @@ static void speedstep_set_state(unsigned int state)
143 return; 144 return;
144} 145}
145 146
147/* Wrapper for smp_call_function_single. */
148static void _speedstep_set_state(void *_state)
149{
150 speedstep_set_state(*(unsigned int *)_state);
151}
146 152
147/** 153/**
148 * speedstep_activate - activate SpeedStep control in the chipset 154 * speedstep_activate - activate SpeedStep control in the chipset
@@ -226,22 +232,28 @@ static unsigned int speedstep_detect_chipset(void)
226 return 0; 232 return 0;
227} 233}
228 234
229static unsigned int _speedstep_get(const struct cpumask *cpus) 235struct get_freq_data {
230{
231 unsigned int speed; 236 unsigned int speed;
232 cpumask_t cpus_allowed; 237 unsigned int processor;
233 238};
234 cpus_allowed = current->cpus_allowed; 239
235 set_cpus_allowed_ptr(current, cpus); 240static void get_freq_data(void *_data)
236 speed = speedstep_get_frequency(speedstep_processor); 241{
237 set_cpus_allowed_ptr(current, &cpus_allowed); 242 struct get_freq_data *data = _data;
238 dprintk("detected %u kHz as current frequency\n", speed); 243
239 return speed; 244 data->speed = speedstep_get_frequency(data->processor);
240} 245}
241 246
242static unsigned int speedstep_get(unsigned int cpu) 247static unsigned int speedstep_get(unsigned int cpu)
243{ 248{
244 return _speedstep_get(cpumask_of(cpu)); 249 struct get_freq_data data = { .processor = cpu };
250
251 /* You're supposed to ensure CPU is online. */
252 if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0)
253 BUG();
254
255 dprintk("detected %u kHz as current frequency\n", data.speed);
256 return data.speed;
245} 257}
246 258
247/** 259/**
@@ -257,16 +269,16 @@ static int speedstep_target(struct cpufreq_policy *policy,
257 unsigned int target_freq, 269 unsigned int target_freq,
258 unsigned int relation) 270 unsigned int relation)
259{ 271{
260 unsigned int newstate = 0; 272 unsigned int newstate = 0, policy_cpu;
261 struct cpufreq_freqs freqs; 273 struct cpufreq_freqs freqs;
262 cpumask_t cpus_allowed;
263 int i; 274 int i;
264 275
265 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], 276 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
266 target_freq, relation, &newstate)) 277 target_freq, relation, &newstate))
267 return -EINVAL; 278 return -EINVAL;
268 279
269 freqs.old = _speedstep_get(policy->cpus); 280 policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
281 freqs.old = speedstep_get(policy_cpu);
270 freqs.new = speedstep_freqs[newstate].frequency; 282 freqs.new = speedstep_freqs[newstate].frequency;
271 freqs.cpu = policy->cpu; 283 freqs.cpu = policy->cpu;
272 284
@@ -276,20 +288,13 @@ static int speedstep_target(struct cpufreq_policy *policy,
276 if (freqs.old == freqs.new) 288 if (freqs.old == freqs.new)
277 return 0; 289 return 0;
278 290
279 cpus_allowed = current->cpus_allowed;
280
281 for_each_cpu(i, policy->cpus) { 291 for_each_cpu(i, policy->cpus) {
282 freqs.cpu = i; 292 freqs.cpu = i;
283 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 293 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
284 } 294 }
285 295
286 /* switch to physical CPU where state is to be changed */ 296 smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate,
287 set_cpus_allowed_ptr(current, policy->cpus); 297 true);
288
289 speedstep_set_state(newstate);
290
291 /* allow to be run on all CPUs */
292 set_cpus_allowed_ptr(current, &cpus_allowed);
293 298
294 for_each_cpu(i, policy->cpus) { 299 for_each_cpu(i, policy->cpus) {
295 freqs.cpu = i; 300 freqs.cpu = i;
@@ -312,33 +317,43 @@ static int speedstep_verify(struct cpufreq_policy *policy)
312 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); 317 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
313} 318}
314 319
320struct get_freqs {
321 struct cpufreq_policy *policy;
322 int ret;
323};
324
325static void get_freqs_on_cpu(void *_get_freqs)
326{
327 struct get_freqs *get_freqs = _get_freqs;
328
329 get_freqs->ret =
330 speedstep_get_freqs(speedstep_processor,
331 &speedstep_freqs[SPEEDSTEP_LOW].frequency,
332 &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
333 &get_freqs->policy->cpuinfo.transition_latency,
334 &speedstep_set_state);
335}
315 336
316static int speedstep_cpu_init(struct cpufreq_policy *policy) 337static int speedstep_cpu_init(struct cpufreq_policy *policy)
317{ 338{
318 int result = 0; 339 int result;
319 unsigned int speed; 340 unsigned int policy_cpu, speed;
320 cpumask_t cpus_allowed; 341 struct get_freqs gf;
321 342
322 /* only run on CPU to be set, or on its sibling */ 343 /* only run on CPU to be set, or on its sibling */
323#ifdef CONFIG_SMP 344#ifdef CONFIG_SMP
324 cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); 345 cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
325#endif 346#endif
326 347 policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
327 cpus_allowed = current->cpus_allowed;
328 set_cpus_allowed_ptr(current, policy->cpus);
329 348
330 /* detect low and high frequency and transition latency */ 349 /* detect low and high frequency and transition latency */
331 result = speedstep_get_freqs(speedstep_processor, 350 gf.policy = policy;
332 &speedstep_freqs[SPEEDSTEP_LOW].frequency, 351 smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1);
333 &speedstep_freqs[SPEEDSTEP_HIGH].frequency, 352 if (gf.ret)
334 &policy->cpuinfo.transition_latency, 353 return gf.ret;
335 &speedstep_set_state);
336 set_cpus_allowed_ptr(current, &cpus_allowed);
337 if (result)
338 return result;
339 354
340 /* get current speed setting */ 355 /* get current speed setting */
341 speed = _speedstep_get(policy->cpus); 356 speed = speedstep_get(policy_cpu);
342 if (!speed) 357 if (!speed)
343 return -EIO; 358 return -EIO;
344 359
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 2e3c6862657b..f4c290b8482f 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -226,6 +226,7 @@ static unsigned int pentium4_get_frequency(void)
226} 226}
227 227
228 228
229/* Warning: may get called from smp_call_function_single. */
229unsigned int speedstep_get_frequency(unsigned int processor) 230unsigned int speedstep_get_frequency(unsigned int processor)
230{ 231{
231 switch (processor) { 232 switch (processor) {
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index daed39ba2614..3260ab044996 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -86,6 +86,29 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
86 */ 86 */
87 if (c->x86 == 6 && c->x86_model < 15) 87 if (c->x86 == 6 && c->x86_model < 15)
88 clear_cpu_cap(c, X86_FEATURE_PAT); 88 clear_cpu_cap(c, X86_FEATURE_PAT);
89
90#ifdef CONFIG_KMEMCHECK
91 /*
92 * P4s have a "fast strings" feature which causes single-
93 * stepping REP instructions to only generate a #DB on
94 * cache-line boundaries.
95 *
96 * Ingo Molnar reported a Pentium D (model 6) and a Xeon
97 * (model 2) with the same problem.
98 */
99 if (c->x86 == 15) {
100 u64 misc_enable;
101
102 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
103
104 if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
105 printk(KERN_INFO "kmemcheck: Disabling fast string operations\n");
106
107 misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
108 wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
109 }
110 }
111#endif
89} 112}
90 113
91#ifdef CONFIG_X86_32 114#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 45004faf67ea..188a1ca5ad2b 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,12 @@
1obj-y = mce.o therm_throt.o 1obj-y = mce.o
2 2
3obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o 3obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
4obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o 4obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
5obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o 5obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
6obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o 6obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
7obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o 7obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
8obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
9obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o 8obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
10obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o 9obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
11obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o 10obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
11
12obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index 89e510424152..b945d5dbc609 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -10,10 +10,9 @@
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/mce.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14 15
15#include "mce.h"
16
17/* Machine Check Handler For AMD Athlon/Duron: */ 16/* Machine Check Handler For AMD Athlon/Duron: */
18static void k7_machine_check(struct pt_regs *regs, long error_code) 17static void k7_machine_check(struct pt_regs *regs, long error_code)
19{ 18{
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fabba15e4558..284d1de968bc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -44,7 +44,6 @@
44#include <asm/msr.h> 44#include <asm/msr.h>
45 45
46#include "mce-internal.h" 46#include "mce-internal.h"
47#include "mce.h"
48 47
49/* Handle unconfigured int18 (should never happen) */ 48/* Handle unconfigured int18 (should never happen) */
50static void unexpected_machine_check(struct pt_regs *regs, long error_code) 49static void unexpected_machine_check(struct pt_regs *regs, long error_code)
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
57void (*machine_check_vector)(struct pt_regs *, long error_code) = 56void (*machine_check_vector)(struct pt_regs *, long error_code) =
58 unexpected_machine_check; 57 unexpected_machine_check;
59 58
60int mce_disabled; 59int mce_disabled __read_mostly;
61 60
62#ifdef CONFIG_X86_NEW_MCE 61#ifdef CONFIG_X86_NEW_MCE
63 62
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
76 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors 75 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
77 * 3: never panic or SIGBUS, log all errors (for testing only) 76 * 3: never panic or SIGBUS, log all errors (for testing only)
78 */ 77 */
79static int tolerant = 1; 78static int tolerant __read_mostly = 1;
80static int banks; 79static int banks __read_mostly;
81static u64 *bank; 80static u64 *bank __read_mostly;
82static unsigned long notify_user; 81static int rip_msr __read_mostly;
83static int rip_msr; 82static int mce_bootlog __read_mostly = -1;
84static int mce_bootlog = -1; 83static int monarch_timeout __read_mostly = -1;
85static int monarch_timeout = -1; 84static int mce_panic_timeout __read_mostly;
86static int mce_panic_timeout; 85static int mce_dont_log_ce __read_mostly;
87static int mce_dont_log_ce; 86int mce_cmci_disabled __read_mostly;
88int mce_cmci_disabled; 87int mce_ignore_ce __read_mostly;
89int mce_ignore_ce; 88int mce_ser __read_mostly;
90int mce_ser; 89
91 90/* User mode helper program triggered by machine check event */
92static char trigger[128]; 91static unsigned long mce_need_notify;
93static char *trigger_argv[2] = { trigger, NULL }; 92static char mce_helper[128];
93static char *mce_helper_argv[2] = { mce_helper, NULL };
94 94
95static unsigned long dont_init_banks; 95static unsigned long dont_init_banks;
96 96
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce)
180 wmb(); 180 wmb();
181 181
182 mce->finished = 1; 182 mce->finished = 1;
183 set_bit(0, &notify_user); 183 set_bit(0, &mce_need_notify);
184} 184}
185 185
186static void print_mce(struct mce *m) 186static void print_mce(struct mce *m)
@@ -691,18 +691,21 @@ static atomic_t global_nwo;
691 * in the entry order. 691 * in the entry order.
692 * TBD double check parallel CPU hotunplug 692 * TBD double check parallel CPU hotunplug
693 */ 693 */
694static int mce_start(int no_way_out, int *order) 694static int mce_start(int *no_way_out)
695{ 695{
696 int nwo; 696 int order;
697 int cpus = num_online_cpus(); 697 int cpus = num_online_cpus();
698 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; 698 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
699 699
700 if (!timeout) { 700 if (!timeout)
701 *order = -1; 701 return -1;
702 return no_way_out;
703 }
704 702
705 atomic_add(no_way_out, &global_nwo); 703 atomic_add(*no_way_out, &global_nwo);
704 /*
705 * global_nwo should be updated before mce_callin
706 */
707 smp_wmb();
708 order = atomic_add_return(1, &mce_callin);
706 709
707 /* 710 /*
708 * Wait for everyone. 711 * Wait for everyone.
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order)
710 while (atomic_read(&mce_callin) != cpus) { 713 while (atomic_read(&mce_callin) != cpus) {
711 if (mce_timed_out(&timeout)) { 714 if (mce_timed_out(&timeout)) {
712 atomic_set(&global_nwo, 0); 715 atomic_set(&global_nwo, 0);
713 *order = -1; 716 return -1;
714 return no_way_out;
715 } 717 }
716 ndelay(SPINUNIT); 718 ndelay(SPINUNIT);
717 } 719 }
718 720
719 /* 721 /*
720 * Cache the global no_way_out state. 722 * mce_callin should be read before global_nwo
721 */ 723 */
722 nwo = atomic_read(&global_nwo); 724 smp_rmb();
723 725
724 /* 726 if (order == 1) {
725 * Monarch starts executing now, the others wait. 727 /*
726 */ 728 * Monarch: Starts executing now, the others wait.
727 if (*order == 1) { 729 */
728 atomic_set(&mce_executing, 1); 730 atomic_set(&mce_executing, 1);
729 return nwo; 731 } else {
732 /*
733 * Subject: Now start the scanning loop one by one in
734 * the original callin order.
735 * This way when there are any shared banks it will be
736 * only seen by one CPU before cleared, avoiding duplicates.
737 */
738 while (atomic_read(&mce_executing) < order) {
739 if (mce_timed_out(&timeout)) {
740 atomic_set(&global_nwo, 0);
741 return -1;
742 }
743 ndelay(SPINUNIT);
744 }
730 } 745 }
731 746
732 /* 747 /*
733 * Now start the scanning loop one by one 748 * Cache the global no_way_out state.
734 * in the original callin order.
735 * This way when there are any shared banks it will
736 * be only seen by one CPU before cleared, avoiding duplicates.
737 */ 749 */
738 while (atomic_read(&mce_executing) < *order) { 750 *no_way_out = atomic_read(&global_nwo);
739 if (mce_timed_out(&timeout)) { 751
740 atomic_set(&global_nwo, 0); 752 return order;
741 *order = -1;
742 return no_way_out;
743 }
744 ndelay(SPINUNIT);
745 }
746 return nwo;
747} 753}
748 754
749/* 755/*
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
863 * check handler. 869 * check handler.
864 */ 870 */
865 int order; 871 int order;
866
867 /* 872 /*
868 * If no_way_out gets set, there is no safe way to recover from this 873 * If no_way_out gets set, there is no safe way to recover from this
869 * MCE. If tolerant is cranked up, we'll try anyway. 874 * MCE. If tolerant is cranked up, we'll try anyway.
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
887 if (!banks) 892 if (!banks)
888 goto out; 893 goto out;
889 894
890 order = atomic_add_return(1, &mce_callin);
891 mce_setup(&m); 895 mce_setup(&m);
892 896
893 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 897 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
909 * This way we don't report duplicated events on shared banks 913 * This way we don't report duplicated events on shared banks
910 * because the first one to see it will clear it. 914 * because the first one to see it will clear it.
911 */ 915 */
912 no_way_out = mce_start(no_way_out, &order); 916 order = mce_start(&no_way_out);
913 for (i = 0; i < banks; i++) { 917 for (i = 0; i < banks; i++) {
914 __clear_bit(i, toclear); 918 __clear_bit(i, toclear);
915 if (!bank[i]) 919 if (!bank[i])
@@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data)
1118 1122
1119static void mce_do_trigger(struct work_struct *work) 1123static void mce_do_trigger(struct work_struct *work)
1120{ 1124{
1121 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); 1125 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
1122} 1126}
1123 1127
1124static DECLARE_WORK(mce_trigger_work, mce_do_trigger); 1128static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void)
1135 1139
1136 clear_thread_flag(TIF_MCE_NOTIFY); 1140 clear_thread_flag(TIF_MCE_NOTIFY);
1137 1141
1138 if (test_and_clear_bit(0, &notify_user)) { 1142 if (test_and_clear_bit(0, &mce_need_notify)) {
1139 wake_up_interruptible(&mce_wait); 1143 wake_up_interruptible(&mce_wait);
1140 1144
1141 /* 1145 /*
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void)
1143 * work_pending is always cleared before the function is 1147 * work_pending is always cleared before the function is
1144 * executed. 1148 * executed.
1145 */ 1149 */
1146 if (trigger[0] && !work_pending(&mce_trigger_work)) 1150 if (mce_helper[0] && !work_pending(&mce_trigger_work))
1147 schedule_work(&mce_trigger_work); 1151 schedule_work(&mce_trigger_work);
1148 1152
1149 if (__ratelimit(&ratelimit)) 1153 if (__ratelimit(&ratelimit))
@@ -1245,7 +1249,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
1245 * Various K7s with broken bank 0 around. Always disable 1249 * Various K7s with broken bank 0 around. Always disable
1246 * by default. 1250 * by default.
1247 */ 1251 */
1248 if (c->x86 == 6) 1252 if (c->x86 == 6 && banks > 0)
1249 bank[0] = 0; 1253 bank[0] = 0;
1250 } 1254 }
1251 1255
@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
1282 return; 1286 return;
1283 switch (c->x86_vendor) { 1287 switch (c->x86_vendor) {
1284 case X86_VENDOR_INTEL: 1288 case X86_VENDOR_INTEL:
1285 if (mce_p5_enabled()) 1289 intel_p5_mcheck_init(c);
1286 intel_p5_mcheck_init(c);
1287 break; 1290 break;
1288 case X86_VENDOR_CENTAUR: 1291 case X86_VENDOR_CENTAUR:
1289 winchip_mcheck_init(c); 1292 winchip_mcheck_init(c);
@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev)
1609static void mce_cpu_restart(void *data) 1612static void mce_cpu_restart(void *data)
1610{ 1613{
1611 del_timer_sync(&__get_cpu_var(mce_timer)); 1614 del_timer_sync(&__get_cpu_var(mce_timer));
1612 if (mce_available(&current_cpu_data)) 1615 if (!mce_available(&current_cpu_data))
1613 mce_init(); 1616 return;
1617 mce_init();
1614 mce_init_timer(); 1618 mce_init_timer();
1615} 1619}
1616 1620
@@ -1620,6 +1624,26 @@ static void mce_restart(void)
1620 on_each_cpu(mce_cpu_restart, NULL, 1); 1624 on_each_cpu(mce_cpu_restart, NULL, 1);
1621} 1625}
1622 1626
1627/* Toggle features for corrected errors */
1628static void mce_disable_ce(void *all)
1629{
1630 if (!mce_available(&current_cpu_data))
1631 return;
1632 if (all)
1633 del_timer_sync(&__get_cpu_var(mce_timer));
1634 cmci_clear();
1635}
1636
1637static void mce_enable_ce(void *all)
1638{
1639 if (!mce_available(&current_cpu_data))
1640 return;
1641 cmci_reenable();
1642 cmci_recheck();
1643 if (all)
1644 mce_init_timer();
1645}
1646
1623static struct sysdev_class mce_sysclass = { 1647static struct sysdev_class mce_sysclass = {
1624 .suspend = mce_suspend, 1648 .suspend = mce_suspend,
1625 .shutdown = mce_shutdown, 1649 .shutdown = mce_shutdown,
@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1659static ssize_t 1683static ssize_t
1660show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) 1684show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
1661{ 1685{
1662 strcpy(buf, trigger); 1686 strcpy(buf, mce_helper);
1663 strcat(buf, "\n"); 1687 strcat(buf, "\n");
1664 return strlen(trigger) + 1; 1688 return strlen(mce_helper) + 1;
1665} 1689}
1666 1690
1667static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, 1691static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
1670 char *p; 1694 char *p;
1671 int len; 1695 int len;
1672 1696
1673 strncpy(trigger, buf, sizeof(trigger)); 1697 strncpy(mce_helper, buf, sizeof(mce_helper));
1674 trigger[sizeof(trigger)-1] = 0; 1698 mce_helper[sizeof(mce_helper)-1] = 0;
1675 len = strlen(trigger); 1699 len = strlen(mce_helper);
1676 p = strchr(trigger, '\n'); 1700 p = strchr(mce_helper, '\n');
1677 1701
1678 if (*p) 1702 if (*p)
1679 *p = 0; 1703 *p = 0;
@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
1681 return len; 1705 return len;
1682} 1706}
1683 1707
1708static ssize_t set_ignore_ce(struct sys_device *s,
1709 struct sysdev_attribute *attr,
1710 const char *buf, size_t size)
1711{
1712 u64 new;
1713
1714 if (strict_strtoull(buf, 0, &new) < 0)
1715 return -EINVAL;
1716
1717 if (mce_ignore_ce ^ !!new) {
1718 if (new) {
1719 /* disable ce features */
1720 on_each_cpu(mce_disable_ce, (void *)1, 1);
1721 mce_ignore_ce = 1;
1722 } else {
1723 /* enable ce features */
1724 mce_ignore_ce = 0;
1725 on_each_cpu(mce_enable_ce, (void *)1, 1);
1726 }
1727 }
1728 return size;
1729}
1730
1731static ssize_t set_cmci_disabled(struct sys_device *s,
1732 struct sysdev_attribute *attr,
1733 const char *buf, size_t size)
1734{
1735 u64 new;
1736
1737 if (strict_strtoull(buf, 0, &new) < 0)
1738 return -EINVAL;
1739
1740 if (mce_cmci_disabled ^ !!new) {
1741 if (new) {
1742 /* disable cmci */
1743 on_each_cpu(mce_disable_ce, NULL, 1);
1744 mce_cmci_disabled = 1;
1745 } else {
1746 /* enable cmci */
1747 mce_cmci_disabled = 0;
1748 on_each_cpu(mce_enable_ce, NULL, 1);
1749 }
1750 }
1751 return size;
1752}
1753
1684static ssize_t store_int_with_restart(struct sys_device *s, 1754static ssize_t store_int_with_restart(struct sys_device *s,
1685 struct sysdev_attribute *attr, 1755 struct sysdev_attribute *attr,
1686 const char *buf, size_t size) 1756 const char *buf, size_t size)
@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
1693static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); 1763static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
1694static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 1764static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
1695static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); 1765static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
1766static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
1696 1767
1697static struct sysdev_ext_attribute attr_check_interval = { 1768static struct sysdev_ext_attribute attr_check_interval = {
1698 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, 1769 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = {
1700 &check_interval 1771 &check_interval
1701}; 1772};
1702 1773
1774static struct sysdev_ext_attribute attr_ignore_ce = {
1775 _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce),
1776 &mce_ignore_ce
1777};
1778
1779static struct sysdev_ext_attribute attr_cmci_disabled = {
1780 _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled),
1781 &mce_cmci_disabled
1782};
1783
1703static struct sysdev_attribute *mce_attrs[] = { 1784static struct sysdev_attribute *mce_attrs[] = {
1704 &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, 1785 &attr_tolerant.attr,
1786 &attr_check_interval.attr,
1787 &attr_trigger,
1705 &attr_monarch_timeout.attr, 1788 &attr_monarch_timeout.attr,
1789 &attr_dont_log_ce.attr,
1790 &attr_ignore_ce.attr,
1791 &attr_cmci_disabled.attr,
1706 NULL 1792 NULL
1707}; 1793};
1708 1794
@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized;
1712static __cpuinit int mce_create_device(unsigned int cpu) 1798static __cpuinit int mce_create_device(unsigned int cpu)
1713{ 1799{
1714 int err; 1800 int err;
1715 int i; 1801 int i, j;
1716 1802
1717 if (!mce_available(&boot_cpu_data)) 1803 if (!mce_available(&boot_cpu_data))
1718 return -EIO; 1804 return -EIO;
@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1730 if (err) 1816 if (err)
1731 goto error; 1817 goto error;
1732 } 1818 }
1733 for (i = 0; i < banks; i++) { 1819 for (j = 0; j < banks; j++) {
1734 err = sysdev_create_file(&per_cpu(mce_dev, cpu), 1820 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
1735 &bank_attrs[i]); 1821 &bank_attrs[j]);
1736 if (err) 1822 if (err)
1737 goto error2; 1823 goto error2;
1738 } 1824 }
@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1740 1826
1741 return 0; 1827 return 0;
1742error2: 1828error2:
1743 while (--i >= 0) 1829 while (--j >= 0)
1744 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); 1830 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
1745error: 1831error:
1746 while (--i >= 0) 1832 while (--i >= 0)
1747 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1833 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void)
1883 if (!mce_available(&boot_cpu_data)) 1969 if (!mce_available(&boot_cpu_data))
1884 return -EIO; 1970 return -EIO;
1885 1971
1886 alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); 1972 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
1887 1973
1888 err = mce_init_banks(); 1974 err = mce_init_banks();
1889 if (err) 1975 if (err)
@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
1915/* This has to be run for each processor */ 2001/* This has to be run for each processor */
1916void mcheck_init(struct cpuinfo_x86 *c) 2002void mcheck_init(struct cpuinfo_x86 *c)
1917{ 2003{
1918 if (mce_disabled == 1) 2004 if (mce_disabled)
1919 return; 2005 return;
1920 2006
1921 switch (c->x86_vendor) { 2007 switch (c->x86_vendor) {
@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c)
1945 2031
1946static int __init mcheck_enable(char *str) 2032static int __init mcheck_enable(char *str)
1947{ 2033{
1948 mce_disabled = -1; 2034 mce_p5_enabled = 1;
1949 return 1; 2035 return 1;
1950} 2036}
1951
1952__setup("mce", mcheck_enable); 2037__setup("mce", mcheck_enable);
1953 2038
1954#endif /* CONFIG_X86_OLD_MCE */ 2039#endif /* CONFIG_X86_OLD_MCE */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
deleted file mode 100644
index 84a552b458c8..000000000000
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ /dev/null
@@ -1,38 +0,0 @@
1#include <linux/init.h>
2#include <asm/mce.h>
3
4#ifdef CONFIG_X86_OLD_MCE
5void amd_mcheck_init(struct cpuinfo_x86 *c);
6void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
7void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
8#endif
9
10#ifdef CONFIG_X86_ANCIENT_MCE
11void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
12void winchip_mcheck_init(struct cpuinfo_x86 *c);
13extern int mce_p5_enable;
14static inline int mce_p5_enabled(void) { return mce_p5_enable; }
15static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
16#else
17static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
18static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
19static inline int mce_p5_enabled(void) { return 0; }
20static inline void enable_p5_mce(void) { }
21#endif
22
23/* Call the installed machine check handler for this CPU setup. */
24extern void (*machine_check_vector)(struct pt_regs *, long error_code);
25
26#ifdef CONFIG_X86_OLD_MCE
27
28extern int nr_mce_banks;
29
30void intel_set_thermal_handler(void);
31
32#else
33
34static inline void intel_set_thermal_handler(void) { }
35
36#endif
37
38void intel_init_thermal(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ddae21620bda..ddae21620bda 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 2b011d2d8579..e1acec0f7a32 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -1,74 +1,226 @@
1/* 1/*
2 * Common code for Intel machine checks 2 * Intel specific MCE features.
3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
4 * Copyright (C) 2008, 2009 Intel Corporation
5 * Author: Andi Kleen
3 */ 6 */
4#include <linux/interrupt.h>
5#include <linux/kernel.h>
6#include <linux/types.h>
7#include <linux/init.h>
8#include <linux/smp.h>
9 7
10#include <asm/therm_throt.h> 8#include <linux/init.h>
11#include <asm/processor.h> 9#include <linux/interrupt.h>
12#include <asm/system.h> 10#include <linux/percpu.h>
13#include <asm/apic.h> 11#include <asm/apic.h>
12#include <asm/processor.h>
14#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/mce.h>
15
16/*
17 * Support for Intel Correct Machine Check Interrupts. This allows
18 * the CPU to raise an interrupt when a corrected machine check happened.
19 * Normally we pick those up using a regular polling timer.
20 * Also supports reliable discovery of shared banks.
21 */
15 22
16#include "mce.h" 23static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
17 24
18void intel_init_thermal(struct cpuinfo_x86 *c) 25/*
26 * cmci_discover_lock protects against parallel discovery attempts
27 * which could race against each other.
28 */
29static DEFINE_SPINLOCK(cmci_discover_lock);
30
31#define CMCI_THRESHOLD 1
32
33static int cmci_supported(int *banks)
19{ 34{
20 unsigned int cpu = smp_processor_id(); 35 u64 cap;
21 int tm2 = 0;
22 u32 l, h;
23 36
24 /* Thermal monitoring depends on ACPI and clock modulation*/ 37 if (mce_cmci_disabled || mce_ignore_ce)
25 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) 38 return 0;
26 return;
27 39
28 /* 40 /*
29 * First check if its enabled already, in which case there might 41 * Vendor check is not strictly needed, but the initial
30 * be some SMM goo which handles it, so we can't even put a handler 42 * initialization is vendor keyed and this
31 * since it might be delivered via SMI already: 43 * makes sure none of the backdoors are entered otherwise.
32 */ 44 */
33 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 45 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
34 h = apic_read(APIC_LVTTHMR); 46 return 0;
35 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 47 if (!cpu_has_apic || lapic_get_maxlvt() < 6)
36 printk(KERN_DEBUG 48 return 0;
37 "CPU%d: Thermal monitoring handled by SMI\n", cpu); 49 rdmsrl(MSR_IA32_MCG_CAP, cap);
38 return; 50 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
51 return !!(cap & MCG_CMCI_P);
52}
53
54/*
55 * The interrupt handler. This is called on every event.
56 * Just call the poller directly to log any events.
57 * This could in theory increase the threshold under high load,
58 * but doesn't for now.
59 */
60static void intel_threshold_interrupt(void)
61{
62 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
63 mce_notify_irq();
64}
65
66static void print_update(char *type, int *hdr, int num)
67{
68 if (*hdr == 0)
69 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
70 *hdr = 1;
71 printk(KERN_CONT " %s:%d", type, num);
72}
73
74/*
75 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
76 * on this CPU. Use the algorithm recommended in the SDM to discover shared
77 * banks.
78 */
79static void cmci_discover(int banks, int boot)
80{
81 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
82 unsigned long flags;
83 int hdr = 0;
84 int i;
85
86 spin_lock_irqsave(&cmci_discover_lock, flags);
87 for (i = 0; i < banks; i++) {
88 u64 val;
89
90 if (test_bit(i, owned))
91 continue;
92
93 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
94
95 /* Already owned by someone else? */
96 if (val & CMCI_EN) {
97 if (test_and_clear_bit(i, owned) || boot)
98 print_update("SHD", &hdr, i);
99 __clear_bit(i, __get_cpu_var(mce_poll_banks));
100 continue;
101 }
102
103 val |= CMCI_EN | CMCI_THRESHOLD;
104 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
105 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
106
107 /* Did the enable bit stick? -- the bank supports CMCI */
108 if (val & CMCI_EN) {
109 if (!test_and_set_bit(i, owned) || boot)
110 print_update("CMCI", &hdr, i);
111 __clear_bit(i, __get_cpu_var(mce_poll_banks));
112 } else {
113 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
114 }
39 } 115 }
116 spin_unlock_irqrestore(&cmci_discover_lock, flags);
117 if (hdr)
118 printk(KERN_CONT "\n");
119}
120
121/*
122 * Just in case we missed an event during initialization check
123 * all the CMCI owned banks.
124 */
125void cmci_recheck(void)
126{
127 unsigned long flags;
128 int banks;
129
130 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
131 return;
132 local_irq_save(flags);
133 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
134 local_irq_restore(flags);
135}
40 136
41 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) 137/*
42 tm2 = 1; 138 * Disable CMCI on this CPU for all banks it owns when it goes down.
139 * This allows other CPUs to claim the banks on rediscovery.
140 */
141void cmci_clear(void)
142{
143 unsigned long flags;
144 int i;
145 int banks;
146 u64 val;
43 147
44 /* Check whether a vector already exists */ 148 if (!cmci_supported(&banks))
45 if (h & APIC_VECTOR_MASK) {
46 printk(KERN_DEBUG
47 "CPU%d: Thermal LVT vector (%#x) already installed\n",
48 cpu, (h & APIC_VECTOR_MASK));
49 return; 149 return;
150 spin_lock_irqsave(&cmci_discover_lock, flags);
151 for (i = 0; i < banks; i++) {
152 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
153 continue;
154 /* Disable CMCI */
155 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
156 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
157 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
158 __clear_bit(i, __get_cpu_var(mce_banks_owned));
50 } 159 }
160 spin_unlock_irqrestore(&cmci_discover_lock, flags);
161}
162
163/*
164 * After a CPU went down cycle through all the others and rediscover
165 * Must run in process context.
166 */
167void cmci_rediscover(int dying)
168{
169 int banks;
170 int cpu;
171 cpumask_var_t old;
172
173 if (!cmci_supported(&banks))
174 return;
175 if (!alloc_cpumask_var(&old, GFP_KERNEL))
176 return;
177 cpumask_copy(old, &current->cpus_allowed);
51 178
52 /* We'll mask the thermal vector in the lapic till we're ready: */ 179 for_each_online_cpu(cpu) {
53 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; 180 if (cpu == dying)
54 apic_write(APIC_LVTTHMR, h); 181 continue;
182 if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
183 continue;
184 /* Recheck banks in case CPUs don't all have the same */
185 if (cmci_supported(&banks))
186 cmci_discover(banks, 0);
187 }
55 188
56 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); 189 set_cpus_allowed_ptr(current, old);
57 wrmsr(MSR_IA32_THERM_INTERRUPT, 190 free_cpumask_var(old);
58 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); 191}
59 192
60 intel_set_thermal_handler(); 193/*
194 * Reenable CMCI on this CPU in case a CPU down failed.
195 */
196void cmci_reenable(void)
197{
198 int banks;
199 if (cmci_supported(&banks))
200 cmci_discover(banks, 0);
201}
61 202
62 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 203static void intel_init_cmci(void)
63 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); 204{
205 int banks;
64 206
65 /* Unmask the thermal vector: */ 207 if (!cmci_supported(&banks))
66 l = apic_read(APIC_LVTTHMR); 208 return;
67 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
68 209
69 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 210 mce_threshold_vector = intel_threshold_interrupt;
70 cpu, tm2 ? "TM2" : "TM1"); 211 cmci_discover(banks, 1);
212 /*
213 * For CPU #0 this runs with still disabled APIC, but that's
214 * ok because only the vector is set up. We still do another
215 * check for the banks later for CPU #0 just to make sure
216 * to not miss any events.
217 */
218 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
219 cmci_recheck();
220}
71 221
72 /* enable thermal throttle processing */ 222void mce_intel_feature_init(struct cpuinfo_x86 *c)
73 atomic_set(&therm_throt_en, 1); 223{
224 intel_init_thermal(c);
225 intel_init_cmci();
74} 226}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
deleted file mode 100644
index f2ef6952c400..000000000000
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ /dev/null
@@ -1,248 +0,0 @@
1/*
2 * Intel specific MCE features.
3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
4 * Copyright (C) 2008, 2009 Intel Corporation
5 * Author: Andi Kleen
6 */
7
8#include <linux/init.h>
9#include <linux/interrupt.h>
10#include <linux/percpu.h>
11#include <asm/processor.h>
12#include <asm/apic.h>
13#include <asm/msr.h>
14#include <asm/mce.h>
15#include <asm/hw_irq.h>
16#include <asm/idle.h>
17#include <asm/therm_throt.h>
18
19#include "mce.h"
20
21asmlinkage void smp_thermal_interrupt(void)
22{
23 __u64 msr_val;
24
25 ack_APIC_irq();
26
27 exit_idle();
28 irq_enter();
29
30 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
31 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
32 mce_log_therm_throt_event(msr_val);
33
34 inc_irq_stat(irq_thermal_count);
35 irq_exit();
36}
37
38/*
39 * Support for Intel Correct Machine Check Interrupts. This allows
40 * the CPU to raise an interrupt when a corrected machine check happened.
41 * Normally we pick those up using a regular polling timer.
42 * Also supports reliable discovery of shared banks.
43 */
44
45static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
46
47/*
48 * cmci_discover_lock protects against parallel discovery attempts
49 * which could race against each other.
50 */
51static DEFINE_SPINLOCK(cmci_discover_lock);
52
53#define CMCI_THRESHOLD 1
54
55static int cmci_supported(int *banks)
56{
57 u64 cap;
58
59 if (mce_cmci_disabled || mce_ignore_ce)
60 return 0;
61
62 /*
63 * Vendor check is not strictly needed, but the initial
64 * initialization is vendor keyed and this
65 * makes sure none of the backdoors are entered otherwise.
66 */
67 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
68 return 0;
69 if (!cpu_has_apic || lapic_get_maxlvt() < 6)
70 return 0;
71 rdmsrl(MSR_IA32_MCG_CAP, cap);
72 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
73 return !!(cap & MCG_CMCI_P);
74}
75
76/*
77 * The interrupt handler. This is called on every event.
78 * Just call the poller directly to log any events.
79 * This could in theory increase the threshold under high load,
80 * but doesn't for now.
81 */
82static void intel_threshold_interrupt(void)
83{
84 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
85 mce_notify_irq();
86}
87
88static void print_update(char *type, int *hdr, int num)
89{
90 if (*hdr == 0)
91 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
92 *hdr = 1;
93 printk(KERN_CONT " %s:%d", type, num);
94}
95
96/*
97 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
98 * on this CPU. Use the algorithm recommended in the SDM to discover shared
99 * banks.
100 */
101static void cmci_discover(int banks, int boot)
102{
103 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
104 unsigned long flags;
105 int hdr = 0;
106 int i;
107
108 spin_lock_irqsave(&cmci_discover_lock, flags);
109 for (i = 0; i < banks; i++) {
110 u64 val;
111
112 if (test_bit(i, owned))
113 continue;
114
115 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
116
117 /* Already owned by someone else? */
118 if (val & CMCI_EN) {
119 if (test_and_clear_bit(i, owned) || boot)
120 print_update("SHD", &hdr, i);
121 __clear_bit(i, __get_cpu_var(mce_poll_banks));
122 continue;
123 }
124
125 val |= CMCI_EN | CMCI_THRESHOLD;
126 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
127 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
128
129 /* Did the enable bit stick? -- the bank supports CMCI */
130 if (val & CMCI_EN) {
131 if (!test_and_set_bit(i, owned) || boot)
132 print_update("CMCI", &hdr, i);
133 __clear_bit(i, __get_cpu_var(mce_poll_banks));
134 } else {
135 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
136 }
137 }
138 spin_unlock_irqrestore(&cmci_discover_lock, flags);
139 if (hdr)
140 printk(KERN_CONT "\n");
141}
142
143/*
144 * Just in case we missed an event during initialization check
145 * all the CMCI owned banks.
146 */
147void cmci_recheck(void)
148{
149 unsigned long flags;
150 int banks;
151
152 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
153 return;
154 local_irq_save(flags);
155 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
156 local_irq_restore(flags);
157}
158
159/*
160 * Disable CMCI on this CPU for all banks it owns when it goes down.
161 * This allows other CPUs to claim the banks on rediscovery.
162 */
163void cmci_clear(void)
164{
165 unsigned long flags;
166 int i;
167 int banks;
168 u64 val;
169
170 if (!cmci_supported(&banks))
171 return;
172 spin_lock_irqsave(&cmci_discover_lock, flags);
173 for (i = 0; i < banks; i++) {
174 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
175 continue;
176 /* Disable CMCI */
177 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
178 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
179 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
180 __clear_bit(i, __get_cpu_var(mce_banks_owned));
181 }
182 spin_unlock_irqrestore(&cmci_discover_lock, flags);
183}
184
185/*
186 * After a CPU went down cycle through all the others and rediscover
187 * Must run in process context.
188 */
189void cmci_rediscover(int dying)
190{
191 int banks;
192 int cpu;
193 cpumask_var_t old;
194
195 if (!cmci_supported(&banks))
196 return;
197 if (!alloc_cpumask_var(&old, GFP_KERNEL))
198 return;
199 cpumask_copy(old, &current->cpus_allowed);
200
201 for_each_online_cpu(cpu) {
202 if (cpu == dying)
203 continue;
204 if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
205 continue;
206 /* Recheck banks in case CPUs don't all have the same */
207 if (cmci_supported(&banks))
208 cmci_discover(banks, 0);
209 }
210
211 set_cpus_allowed_ptr(current, old);
212 free_cpumask_var(old);
213}
214
215/*
216 * Reenable CMCI on this CPU in case a CPU down failed.
217 */
218void cmci_reenable(void)
219{
220 int banks;
221 if (cmci_supported(&banks))
222 cmci_discover(banks, 0);
223}
224
225static void intel_init_cmci(void)
226{
227 int banks;
228
229 if (!cmci_supported(&banks))
230 return;
231
232 mce_threshold_vector = intel_threshold_interrupt;
233 cmci_discover(banks, 1);
234 /*
235 * For CPU #0 this runs with still disabled APIC, but that's
236 * ok because only the vector is set up. We still do another
237 * check for the banks later for CPU #0 just to make sure
238 * to not miss any events.
239 */
240 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
241 cmci_recheck();
242}
243
244void mce_intel_feature_init(struct cpuinfo_x86 *c)
245{
246 intel_init_thermal(c);
247 intel_init_cmci();
248}
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 70b710420f74..f5f2d6f71fb6 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -17,10 +17,9 @@
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <asm/system.h> 19#include <asm/system.h>
20#include <asm/mce.h>
20#include <asm/msr.h> 21#include <asm/msr.h>
21 22
22#include "mce.h"
23
24static int firstbank; 23static int firstbank;
25 24
26#define MCE_RATE (15*HZ) /* timer rate is 15s */ 25#define MCE_RATE (15*HZ) /* timer rate is 15s */
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 82cee108a2d3..4482aea9aa2e 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -1,21 +1,15 @@
1/* 1/*
2 * P4 specific Machine Check Exception Reporting 2 * P4 specific Machine Check Exception Reporting
3 */ 3 */
4
5#include <linux/interrupt.h>
6#include <linux/kernel.h> 4#include <linux/kernel.h>
7#include <linux/types.h> 5#include <linux/types.h>
8#include <linux/init.h> 6#include <linux/init.h>
9#include <linux/smp.h> 7#include <linux/smp.h>
10 8
11#include <asm/therm_throt.h>
12#include <asm/processor.h> 9#include <asm/processor.h>
13#include <asm/system.h> 10#include <asm/mce.h>
14#include <asm/apic.h>
15#include <asm/msr.h> 11#include <asm/msr.h>
16 12
17#include "mce.h"
18
19/* as supported by the P4/Xeon family */ 13/* as supported by the P4/Xeon family */
20struct intel_mce_extended_msrs { 14struct intel_mce_extended_msrs {
21 u32 eax; 15 u32 eax;
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
33 27
34static int mce_num_extended_msrs; 28static int mce_num_extended_msrs;
35 29
36
37#ifdef CONFIG_X86_MCE_P4THERMAL
38
39static void unexpected_thermal_interrupt(struct pt_regs *regs)
40{
41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
42 smp_processor_id());
43 add_taint(TAINT_MACHINE_CHECK);
44}
45
46/* P4/Xeon Thermal transition interrupt handler: */
47static void intel_thermal_interrupt(struct pt_regs *regs)
48{
49 __u64 msr_val;
50
51 ack_APIC_irq();
52
53 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
54 therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
55}
56
57/* Thermal interrupt handler for this CPU setup: */
58static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
59 unexpected_thermal_interrupt;
60
61void smp_thermal_interrupt(struct pt_regs *regs)
62{
63 irq_enter();
64 vendor_thermal_interrupt(regs);
65 __get_cpu_var(irq_stat).irq_thermal_count++;
66 irq_exit();
67}
68
69void intel_set_thermal_handler(void)
70{
71 vendor_thermal_interrupt = intel_thermal_interrupt;
72}
73
74#endif /* CONFIG_X86_MCE_P4THERMAL */
75
76/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ 30/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
77static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) 31static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
78{ 32{
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 015f481ab1b0..5c0e6533d9bc 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -10,12 +10,11 @@
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/mce.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14 15
15#include "mce.h"
16
17/* By default disabled */ 16/* By default disabled */
18int mce_p5_enable; 17int mce_p5_enabled __read_mostly;
19 18
20/* Machine check handler for Pentium class Intel CPUs: */ 19/* Machine check handler for Pentium class Intel CPUs: */
21static void pentium_machine_check(struct pt_regs *regs, long error_code) 20static void pentium_machine_check(struct pt_regs *regs, long error_code)
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
43{ 42{
44 u32 l, h; 43 u32 l, h;
45 44
46 /* Check for MCE support: */ 45 /* Default P5 to off as its often misconnected: */
47 if (!cpu_has(c, X86_FEATURE_MCE)) 46 if (!mce_p5_enabled)
48 return; 47 return;
49 48
50#ifdef CONFIG_X86_OLD_MCE 49 /* Check for MCE support: */
51 /* Default P5 to off as its often misconnected: */ 50 if (!cpu_has(c, X86_FEATURE_MCE))
52 if (mce_disabled != -1)
53 return; 51 return;
54#endif
55 52
56 machine_check_vector = pentium_machine_check; 53 machine_check_vector = pentium_machine_check;
57 /* Make sure the vector pointer is visible before we enable MCEs: */ 54 /* Make sure the vector pointer is visible before we enable MCEs: */
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 43c24e667457..01e4f8178183 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -10,10 +10,9 @@
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/mce.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14 15
15#include "mce.h"
16
17/* Machine Check Handler For PII/PIII */ 16/* Machine Check Handler For PII/PIII */
18static void intel_machine_check(struct pt_regs *regs, long error_code) 17static void intel_machine_check(struct pt_regs *regs, long error_code)
19{ 18{
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 7b1ae2e20ba5..bff8dd191dd5 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -13,13 +13,23 @@
13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. 13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
14 * Inspired by Ross Biro's and Al Borchers' counter code. 14 * Inspired by Ross Biro's and Al Borchers' counter code.
15 */ 15 */
16#include <linux/interrupt.h>
16#include <linux/notifier.h> 17#include <linux/notifier.h>
17#include <linux/jiffies.h> 18#include <linux/jiffies.h>
19#include <linux/kernel.h>
18#include <linux/percpu.h> 20#include <linux/percpu.h>
19#include <linux/sysdev.h> 21#include <linux/sysdev.h>
22#include <linux/types.h>
23#include <linux/init.h>
24#include <linux/smp.h>
20#include <linux/cpu.h> 25#include <linux/cpu.h>
21 26
22#include <asm/therm_throt.h> 27#include <asm/processor.h>
28#include <asm/system.h>
29#include <asm/apic.h>
30#include <asm/idle.h>
31#include <asm/mce.h>
32#include <asm/msr.h>
23 33
24/* How long to wait between reporting thermal events */ 34/* How long to wait between reporting thermal events */
25#define CHECK_INTERVAL (300 * HZ) 35#define CHECK_INTERVAL (300 * HZ)
@@ -27,7 +37,7 @@
27static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
28static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
29 39
30atomic_t therm_throt_en = ATOMIC_INIT(0); 40static atomic_t therm_throt_en = ATOMIC_INIT(0);
31 41
32#ifdef CONFIG_SYSFS 42#ifdef CONFIG_SYSFS
33#define define_therm_throt_sysdev_one_ro(_name) \ 43#define define_therm_throt_sysdev_one_ro(_name) \
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
82 * 1 : Event should be logged further, and a message has been 92 * 1 : Event should be logged further, and a message has been
83 * printed to the syslog. 93 * printed to the syslog.
84 */ 94 */
85int therm_throt_process(int curr) 95static int therm_throt_process(int curr)
86{ 96{
87 unsigned int cpu = smp_processor_id(); 97 unsigned int cpu = smp_processor_id();
88 __u64 tmp_jiffs = get_jiffies_64(); 98 __u64 tmp_jiffs = get_jiffies_64();
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
186 196
187 return 0; 197 return 0;
188} 198}
189
190device_initcall(thermal_throttle_init_device); 199device_initcall(thermal_throttle_init_device);
200
191#endif /* CONFIG_SYSFS */ 201#endif /* CONFIG_SYSFS */
202
203/* Thermal transition interrupt handler */
204static void intel_thermal_interrupt(void)
205{
206 __u64 msr_val;
207
208 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
209 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
210 mce_log_therm_throt_event(msr_val);
211}
212
213static void unexpected_thermal_interrupt(void)
214{
215 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
216 smp_processor_id());
217 add_taint(TAINT_MACHINE_CHECK);
218}
219
220static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
221
222asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
223{
224 exit_idle();
225 irq_enter();
226 inc_irq_stat(irq_thermal_count);
227 smp_thermal_vector();
228 irq_exit();
229 /* Ack only at the end to avoid potential reentry */
230 ack_APIC_irq();
231}
232
233void intel_init_thermal(struct cpuinfo_x86 *c)
234{
235 unsigned int cpu = smp_processor_id();
236 int tm2 = 0;
237 u32 l, h;
238
239 /* Thermal monitoring depends on ACPI and clock modulation*/
240 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
241 return;
242
243 /*
244 * First check if its enabled already, in which case there might
245 * be some SMM goo which handles it, so we can't even put a handler
246 * since it might be delivered via SMI already:
247 */
248 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
249 h = apic_read(APIC_LVTTHMR);
250 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
251 printk(KERN_DEBUG
252 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
253 return;
254 }
255
256 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
257 tm2 = 1;
258
259 /* Check whether a vector already exists */
260 if (h & APIC_VECTOR_MASK) {
261 printk(KERN_DEBUG
262 "CPU%d: Thermal LVT vector (%#x) already installed\n",
263 cpu, (h & APIC_VECTOR_MASK));
264 return;
265 }
266
267 /* We'll mask the thermal vector in the lapic till we're ready: */
268 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
269 apic_write(APIC_LVTTHMR, h);
270
271 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
272 wrmsr(MSR_IA32_THERM_INTERRUPT,
273 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
274
275 smp_thermal_vector = intel_thermal_interrupt;
276
277 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
278 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
279
280 /* Unmask the thermal vector: */
281 l = apic_read(APIC_LVTTHMR);
282 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
283
284 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
285 cpu, tm2 ? "TM2" : "TM1");
286
287 /* enable thermal throttle processing */
288 atomic_set(&therm_throt_en, 1);
289}
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 81b02487090b..54060f565974 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -9,10 +9,9 @@
9 9
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/system.h> 11#include <asm/system.h>
12#include <asm/mce.h>
12#include <asm/msr.h> 13#include <asm/msr.h>
13 14
14#include "mce.h"
15
16/* Machine check handler for WinChip C6: */ 15/* Machine check handler for WinChip C6: */
17static void winchip_machine_check(struct pt_regs *regs, long error_code) 16static void winchip_machine_check(struct pt_regs *regs, long error_code)
18{ 17{
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d6f5b9fbde32..5c481f6205bf 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -716,11 +716,15 @@ static void probe_nmi_watchdog(void)
716 wd_ops = &k7_wd_ops; 716 wd_ops = &k7_wd_ops;
717 break; 717 break;
718 case X86_VENDOR_INTEL: 718 case X86_VENDOR_INTEL:
719 /* 719 /* Work around where perfctr1 doesn't have a working enable
720 * Work around Core Duo (Yonah) errata AE49 where perfctr1 720 * bit as described in the following errata:
721 * doesn't have a working enable bit. 721 * AE49 Core Duo and Intel Core Solo 65 nm
722 * AN49 Intel Pentium Dual-Core
723 * AF49 Dual-Core Intel Xeon Processor LV
722 */ 724 */
723 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { 725 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
726 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
727 boot_cpu_data.x86_mask == 4))) {
724 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; 728 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
725 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; 729 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
726 } 730 }
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 2ac1f0c2beb3..b07af8861244 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -182,6 +182,11 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
182 .notifier_call = cpuid_class_cpu_callback, 182 .notifier_call = cpuid_class_cpu_callback,
183}; 183};
184 184
185static char *cpuid_nodename(struct device *dev)
186{
187 return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
188}
189
185static int __init cpuid_init(void) 190static int __init cpuid_init(void)
186{ 191{
187 int i, err = 0; 192 int i, err = 0;
@@ -198,6 +203,7 @@ static int __init cpuid_init(void)
198 err = PTR_ERR(cpuid_class); 203 err = PTR_ERR(cpuid_class);
199 goto out_chrdev; 204 goto out_chrdev;
200 } 205 }
206 cpuid_class->nodename = cpuid_nodename;
201 for_each_online_cpu(i) { 207 for_each_online_cpu(i) {
202 err = cpuid_device_create(i); 208 err = cpuid_device_create(i);
203 if (err != 0) 209 if (err != 0)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index ff958248e61d..5e409dc298a4 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -27,6 +27,7 @@
27#include <asm/cpu.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30#include <asm/iommu.h>
30 31
31 32
32#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 33#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
@@ -103,5 +104,10 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
103#ifdef CONFIG_HPET_TIMER 104#ifdef CONFIG_HPET_TIMER
104 hpet_disable(); 105 hpet_disable();
105#endif 106#endif
107
108#ifdef CONFIG_X86_64
109 pci_iommu_shutdown();
110#endif
111
106 crash_save_cpu(regs, safe_smp_processor_id()); 112 crash_save_cpu(regs, safe_smp_processor_id());
107} 113}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1736acc4d7aa..96f7ac0bbf01 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -240,10 +240,35 @@ static void __init do_add_efi_memmap(void)
240 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; 240 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
241 int e820_type; 241 int e820_type;
242 242
243 if (md->attribute & EFI_MEMORY_WB) 243 switch (md->type) {
244 e820_type = E820_RAM; 244 case EFI_LOADER_CODE:
245 else 245 case EFI_LOADER_DATA:
246 case EFI_BOOT_SERVICES_CODE:
247 case EFI_BOOT_SERVICES_DATA:
248 case EFI_CONVENTIONAL_MEMORY:
249 if (md->attribute & EFI_MEMORY_WB)
250 e820_type = E820_RAM;
251 else
252 e820_type = E820_RESERVED;
253 break;
254 case EFI_ACPI_RECLAIM_MEMORY:
255 e820_type = E820_ACPI;
256 break;
257 case EFI_ACPI_MEMORY_NVS:
258 e820_type = E820_NVS;
259 break;
260 case EFI_UNUSABLE_MEMORY:
261 e820_type = E820_UNUSABLE;
262 break;
263 default:
264 /*
265 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
266 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
267 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
268 */
246 e820_type = E820_RESERVED; 269 e820_type = E820_RESERVED;
270 break;
271 }
247 e820_add_region(start, size, e820_type); 272 e820_add_region(start, size, e820_type);
248 } 273 }
249 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 274 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 0d4b28564c14..c097e7d607c6 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -48,7 +48,6 @@
48#include <asm/segment.h> 48#include <asm/segment.h>
49#include <asm/smp.h> 49#include <asm/smp.h>
50#include <asm/page_types.h> 50#include <asm/page_types.h>
51#include <asm/desc.h>
52#include <asm/percpu.h> 51#include <asm/percpu.h>
53#include <asm/dwarf2.h> 52#include <asm/dwarf2.h>
54#include <asm/processor-flags.h> 53#include <asm/processor-flags.h>
@@ -84,7 +83,7 @@
84#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 83#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
85#else 84#else
86#define preempt_stop(clobbers) 85#define preempt_stop(clobbers)
87#define resume_kernel restore_nocheck 86#define resume_kernel restore_all
88#endif 87#endif
89 88
90.macro TRACE_IRQS_IRET 89.macro TRACE_IRQS_IRET
@@ -372,7 +371,7 @@ END(ret_from_exception)
372ENTRY(resume_kernel) 371ENTRY(resume_kernel)
373 DISABLE_INTERRUPTS(CLBR_ANY) 372 DISABLE_INTERRUPTS(CLBR_ANY)
374 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? 373 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
375 jnz restore_nocheck 374 jnz restore_all
376need_resched: 375need_resched:
377 movl TI_flags(%ebp), %ecx # need_resched set ? 376 movl TI_flags(%ebp), %ecx # need_resched set ?
378 testb $_TIF_NEED_RESCHED, %cl 377 testb $_TIF_NEED_RESCHED, %cl
@@ -540,6 +539,8 @@ syscall_exit:
540 jne syscall_exit_work 539 jne syscall_exit_work
541 540
542restore_all: 541restore_all:
542 TRACE_IRQS_IRET
543restore_all_notrace:
543 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 544 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
544 # Warning: PT_OLDSS(%esp) contains the wrong/random values if we 545 # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
545 # are returning to the kernel. 546 # are returning to the kernel.
@@ -551,8 +552,6 @@ restore_all:
551 CFI_REMEMBER_STATE 552 CFI_REMEMBER_STATE
552 je ldt_ss # returning to user-space with LDT SS 553 je ldt_ss # returning to user-space with LDT SS
553restore_nocheck: 554restore_nocheck:
554 TRACE_IRQS_IRET
555restore_nocheck_notrace:
556 RESTORE_REGS 4 # skip orig_eax/error_code 555 RESTORE_REGS 4 # skip orig_eax/error_code
557 CFI_ADJUST_CFA_OFFSET -4 556 CFI_ADJUST_CFA_OFFSET -4
558irq_return: 557irq_return:
@@ -588,22 +587,34 @@ ldt_ss:
588 jne restore_nocheck 587 jne restore_nocheck
589#endif 588#endif
590 589
591 /* If returning to userspace with 16bit stack, 590/*
592 * try to fix the higher word of ESP, as the CPU 591 * Setup and switch to ESPFIX stack
593 * won't restore it. 592 *
594 * This is an "official" bug of all the x86-compatible 593 * We're returning to userspace with a 16 bit stack. The CPU will not
595 * CPUs, which we can try to work around to make 594 * restore the high word of ESP for us on executing iret... This is an
596 * dosemu and wine happy. */ 595 * "official" bug of all the x86-compatible CPUs, which we can work
597 movl PT_OLDESP(%esp), %eax 596 * around to make dosemu and wine happy. We do this by preloading the
598 movl %esp, %edx 597 * high word of ESP with the high word of the userspace ESP while
599 call patch_espfix_desc 598 * compensating for the offset by changing to the ESPFIX segment with
599 * a base address that matches for the difference.
600 */
601 mov %esp, %edx /* load kernel esp */
602 mov PT_OLDESP(%esp), %eax /* load userspace esp */
603 mov %dx, %ax /* eax: new kernel esp */
604 sub %eax, %edx /* offset (low word is 0) */
605 PER_CPU(gdt_page, %ebx)
606 shr $16, %edx
607 mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
608 mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
600 pushl $__ESPFIX_SS 609 pushl $__ESPFIX_SS
601 CFI_ADJUST_CFA_OFFSET 4 610 CFI_ADJUST_CFA_OFFSET 4
602 pushl %eax 611 push %eax /* new kernel esp */
603 CFI_ADJUST_CFA_OFFSET 4 612 CFI_ADJUST_CFA_OFFSET 4
613 /* Disable interrupts, but do not irqtrace this section: we
614 * will soon execute iret and the tracer was already set to
615 * the irqstate after the iret */
604 DISABLE_INTERRUPTS(CLBR_EAX) 616 DISABLE_INTERRUPTS(CLBR_EAX)
605 TRACE_IRQS_OFF 617 lss (%esp), %esp /* switch to espfix segment */
606 lss (%esp), %esp
607 CFI_ADJUST_CFA_OFFSET -8 618 CFI_ADJUST_CFA_OFFSET -8
608 jmp restore_nocheck 619 jmp restore_nocheck
609 CFI_ENDPROC 620 CFI_ENDPROC
@@ -716,15 +727,24 @@ PTREGSCALL(vm86)
716PTREGSCALL(vm86old) 727PTREGSCALL(vm86old)
717 728
718.macro FIXUP_ESPFIX_STACK 729.macro FIXUP_ESPFIX_STACK
719 /* since we are on a wrong stack, we cant make it a C code :( */ 730/*
731 * Switch back for ESPFIX stack to the normal zerobased stack
732 *
733 * We can't call C functions using the ESPFIX stack. This code reads
734 * the high word of the segment base from the GDT and swiches to the
735 * normal stack and adjusts ESP with the matching offset.
736 */
737 /* fixup the stack */
720 PER_CPU(gdt_page, %ebx) 738 PER_CPU(gdt_page, %ebx)
721 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) 739 mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
722 addl %esp, %eax 740 mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
741 shl $16, %eax
742 addl %esp, %eax /* the adjusted stack pointer */
723 pushl $__KERNEL_DS 743 pushl $__KERNEL_DS
724 CFI_ADJUST_CFA_OFFSET 4 744 CFI_ADJUST_CFA_OFFSET 4
725 pushl %eax 745 pushl %eax
726 CFI_ADJUST_CFA_OFFSET 4 746 CFI_ADJUST_CFA_OFFSET 4
727 lss (%esp), %esp 747 lss (%esp), %esp /* switch to the normal stack segment */
728 CFI_ADJUST_CFA_OFFSET -8 748 CFI_ADJUST_CFA_OFFSET -8
729.endm 749.endm
730.macro UNWIND_ESPFIX_STACK 750.macro UNWIND_ESPFIX_STACK
@@ -1331,7 +1351,7 @@ nmi_stack_correct:
1331 xorl %edx,%edx # zero error code 1351 xorl %edx,%edx # zero error code
1332 movl %esp,%eax # pt_regs pointer 1352 movl %esp,%eax # pt_regs pointer
1333 call do_nmi 1353 call do_nmi
1334 jmp restore_nocheck_notrace 1354 jmp restore_all_notrace
1335 CFI_ENDPROC 1355 CFI_ENDPROC
1336 1356
1337nmi_stack_fixup: 1357nmi_stack_fixup:
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index dc5ed4bdd88d..8663afb56535 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -13,7 +13,6 @@
13#include <asm/segment.h> 13#include <asm/segment.h>
14#include <asm/page_types.h> 14#include <asm/page_types.h>
15#include <asm/pgtable_types.h> 15#include <asm/pgtable_types.h>
16#include <asm/desc.h>
17#include <asm/cache.h> 16#include <asm/cache.h>
18#include <asm/thread_info.h> 17#include <asm/thread_info.h>
19#include <asm/asm-offsets.h> 18#include <asm/asm-offsets.h>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 54b29bb24e71..fa54f78e2a05 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -12,7 +12,6 @@
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <linux/threads.h> 13#include <linux/threads.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <asm/desc.h>
16#include <asm/segment.h> 15#include <asm/segment.h>
17#include <asm/pgtable.h> 16#include <asm/pgtable.h>
18#include <asm/page.h> 17#include <asm/page.h>
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 81408b93f887..dedc2bddf7a5 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -510,7 +510,8 @@ static int hpet_setup_irq(struct hpet_dev *dev)
510{ 510{
511 511
512 if (request_irq(dev->irq, hpet_interrupt_handler, 512 if (request_irq(dev->irq, hpet_interrupt_handler,
513 IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev)) 513 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
514 dev->name, dev))
514 return -1; 515 return -1;
515 516
516 disable_irq(dev->irq); 517 disable_irq(dev->irq);
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c2e0bb0890d4..5cf36c053ac4 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -7,6 +7,7 @@
7#include <linux/spinlock.h> 7#include <linux/spinlock.h>
8#include <linux/jiffies.h> 8#include <linux/jiffies.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/timex.h>
10#include <linux/delay.h> 11#include <linux/delay.h>
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/io.h> 13#include <linux/io.h>
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index df3bf269beab..270ff83efc11 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -12,7 +12,6 @@
12 12
13static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 13static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
15struct mm_struct init_mm = INIT_MM(init_mm);
16 15
17/* 16/*
18 * Initial thread structure. 17 * Initial thread structure.
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9c4461501fcb..9371448290ac 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -236,6 +236,7 @@ static const struct file_operations microcode_fops = {
236static struct miscdevice microcode_dev = { 236static struct miscdevice microcode_dev = {
237 .minor = MICROCODE_MINOR, 237 .minor = MICROCODE_MINOR,
238 .name = "microcode", 238 .name = "microcode",
239 .devnode = "cpu/microcode",
239 .fops = &microcode_fops, 240 .fops = &microcode_fops,
240}; 241};
241 242
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 3cf3413ec626..98fd6cd4e3a4 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -196,6 +196,11 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
196 .notifier_call = msr_class_cpu_callback, 196 .notifier_call = msr_class_cpu_callback,
197}; 197};
198 198
199static char *msr_nodename(struct device *dev)
200{
201 return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
202}
203
199static int __init msr_init(void) 204static int __init msr_init(void)
200{ 205{
201 int i, err = 0; 206 int i, err = 0;
@@ -212,6 +217,7 @@ static int __init msr_init(void)
212 err = PTR_ERR(msr_class); 217 err = PTR_ERR(msr_class);
213 goto out_chrdev; 218 goto out_chrdev;
214 } 219 }
220 msr_class->nodename = msr_nodename;
215 for_each_online_cpu(i) { 221 for_each_online_cpu(i) {
216 err = msr_device_create(i); 222 err = msr_device_create(i);
217 if (err != 0) 223 if (err != 0)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 745579bc8256..328592fb6044 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -290,6 +290,8 @@ static int __init pci_iommu_init(void)
290void pci_iommu_shutdown(void) 290void pci_iommu_shutdown(void)
291{ 291{
292 gart_iommu_shutdown(); 292 gart_iommu_shutdown();
293
294 amd_iommu_shutdown();
293} 295}
294/* Must execute after PCI subsystem */ 296/* Must execute after PCI subsystem */
295fs_initcall(pci_iommu_init); 297fs_initcall(pci_iommu_init);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3bb2be1649bd..994dd6a4a2a0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -63,7 +63,7 @@ void arch_task_cache_init(void)
63 task_xstate_cachep = 63 task_xstate_cachep =
64 kmem_cache_create("task_xstate", xstate_size, 64 kmem_cache_create("task_xstate", xstate_size,
65 __alignof__(union thread_xstate), 65 __alignof__(union thread_xstate),
66 SLAB_PANIC, NULL); 66 SLAB_PANIC | SLAB_NOTRACK, NULL);
67} 67}
68 68
69/* 69/*
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4aaf7e48394f..c3eb207181fe 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -77,6 +77,13 @@ void save_stack_trace(struct stack_trace *trace)
77} 77}
78EXPORT_SYMBOL_GPL(save_stack_trace); 78EXPORT_SYMBOL_GPL(save_stack_trace);
79 79
80void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
81{
82 dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
83 if (trace->nr_entries < trace->max_entries)
84 trace->entries[trace->nr_entries++] = ULONG_MAX;
85}
86
80void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 87void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
81{ 88{
82 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 89 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1e1e27b7d438..a0f48f5671c0 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -45,6 +45,7 @@
45#include <linux/edac.h> 45#include <linux/edac.h>
46#endif 46#endif
47 47
48#include <asm/kmemcheck.h>
48#include <asm/stacktrace.h> 49#include <asm/stacktrace.h>
49#include <asm/processor.h> 50#include <asm/processor.h>
50#include <asm/debugreg.h> 51#include <asm/debugreg.h>
@@ -53,6 +54,7 @@
53#include <asm/traps.h> 54#include <asm/traps.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
55#include <asm/i387.h> 56#include <asm/i387.h>
57#include <asm/mce.h>
56 58
57#include <asm/mach_traps.h> 59#include <asm/mach_traps.h>
58 60
@@ -64,8 +66,6 @@
64#include <asm/setup.h> 66#include <asm/setup.h>
65#include <asm/traps.h> 67#include <asm/traps.h>
66 68
67#include "cpu/mcheck/mce.h"
68
69asmlinkage int system_call(void); 69asmlinkage int system_call(void);
70 70
71/* Do we ignore FPU interrupts ? */ 71/* Do we ignore FPU interrupts ? */
@@ -534,6 +534,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
534 534
535 get_debugreg(condition, 6); 535 get_debugreg(condition, 6);
536 536
537 /* Catch kmemcheck conditions first of all! */
538 if (condition & DR_STEP && kmemcheck_trap(regs))
539 return;
540
537 /* 541 /*
538 * The processor cleared BTF, so don't mark that we need it set. 542 * The processor cleared BTF, so don't mark that we need it set.
539 */ 543 */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3e1c057e98fe..b0597ad02c93 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -9,6 +9,7 @@
9#include <linux/delay.h> 9#include <linux/delay.h>
10#include <linux/clocksource.h> 10#include <linux/clocksource.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/timex.h>
12 13
13#include <asm/hpet.h> 14#include <asm/hpet.h>
14#include <asm/timer.h> 15#include <asm/timer.h>
@@ -631,17 +632,15 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
631 void *data) 632 void *data)
632{ 633{
633 struct cpufreq_freqs *freq = data; 634 struct cpufreq_freqs *freq = data;
634 unsigned long *lpj, dummy; 635 unsigned long *lpj;
635 636
636 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) 637 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
637 return 0; 638 return 0;
638 639
639 lpj = &dummy; 640 lpj = &boot_cpu_data.loops_per_jiffy;
640 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
641#ifdef CONFIG_SMP 641#ifdef CONFIG_SMP
642 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
642 lpj = &cpu_data(freq->cpu).loops_per_jiffy; 643 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
643#else
644 lpj = &boot_cpu_data.loops_per_jiffy;
645#endif 644#endif
646 645
647 if (!ref_freq) { 646 if (!ref_freq) {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 32d6ae8fb60e..e770bf349ec4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1277,7 +1277,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
1277 struct page *pages; 1277 struct page *pages;
1278 struct vmcs *vmcs; 1278 struct vmcs *vmcs;
1279 1279
1280 pages = alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); 1280 pages = alloc_pages_exact_node(node, GFP_KERNEL, vmcs_config.order);
1281 if (!pages) 1281 if (!pages)
1282 return NULL; 1282 return NULL;
1283 vmcs = page_address(pages); 1283 vmcs = page_address(pages);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fdd30d08ab52..eefdeee8a871 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,6 +10,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
10 10
11obj-$(CONFIG_HIGHMEM) += highmem_32.o 11obj-$(CONFIG_HIGHMEM) += highmem_32.o
12 12
13obj-$(CONFIG_KMEMCHECK) += kmemcheck/
14
13obj-$(CONFIG_MMIOTRACE) += mmiotrace.o 15obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
14mmiotrace-y := kmmio.o pf_in.o mmio-mod.o 16mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
15obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 17obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c6acc6326374..c403526d5d15 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -14,6 +14,7 @@
14 14
15#include <asm/traps.h> /* dotraplinkage, ... */ 15#include <asm/traps.h> /* dotraplinkage, ... */
16#include <asm/pgalloc.h> /* pgd_*(), ... */ 16#include <asm/pgalloc.h> /* pgd_*(), ... */
17#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
17 18
18/* 19/*
19 * Page fault error code bits: 20 * Page fault error code bits:
@@ -951,11 +952,17 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
951 tsk = current; 952 tsk = current;
952 mm = tsk->mm; 953 mm = tsk->mm;
953 954
954 prefetchw(&mm->mmap_sem);
955
956 /* Get the faulting address: */ 955 /* Get the faulting address: */
957 address = read_cr2(); 956 address = read_cr2();
958 957
958 /*
959 * Detect and handle instructions that would cause a page fault for
960 * both a tracked kernel page and a userspace page.
961 */
962 if (kmemcheck_active(regs))
963 kmemcheck_hide(regs);
964 prefetchw(&mm->mmap_sem);
965
959 if (unlikely(kmmio_fault(regs, address))) 966 if (unlikely(kmmio_fault(regs, address)))
960 return; 967 return;
961 968
@@ -973,9 +980,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
973 * protection error (error_code & 9) == 0. 980 * protection error (error_code & 9) == 0.
974 */ 981 */
975 if (unlikely(fault_in_kernel_space(address))) { 982 if (unlikely(fault_in_kernel_space(address))) {
976 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && 983 if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
977 vmalloc_fault(address) >= 0) 984 if (vmalloc_fault(address) >= 0)
978 return; 985 return;
986
987 if (kmemcheck_fault(regs, address, error_code))
988 return;
989 }
979 990
980 /* Can handle a stale RO->RW TLB: */ 991 /* Can handle a stale RO->RW TLB: */
981 if (spurious_fault(error_code, address)) 992 if (spurious_fault(error_code, address))
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 6340cef6798a..f97480941269 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -247,10 +247,15 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
247 start &= PAGE_MASK; 247 start &= PAGE_MASK;
248 addr = start; 248 addr = start;
249 len = (unsigned long) nr_pages << PAGE_SHIFT; 249 len = (unsigned long) nr_pages << PAGE_SHIFT;
250
250 end = start + len; 251 end = start + len;
251 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 252 if (end < start)
252 (void __user *)start, len))) 253 goto slow_irqon;
254
255#ifdef CONFIG_X86_64
256 if (end >> __VIRTUAL_MASK_SHIFT)
253 goto slow_irqon; 257 goto slow_irqon;
258#endif
254 259
255 /* 260 /*
256 * XXX: batch / limit 'nr', to avoid large irq off latency 261 * XXX: batch / limit 'nr', to avoid large irq off latency
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 34c1bfb64f1c..f53b57e4086f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -213,7 +213,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
213 if (!after_bootmem) 213 if (!after_bootmem)
214 init_gbpages(); 214 init_gbpages();
215 215
216#ifdef CONFIG_DEBUG_PAGEALLOC 216#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
217 /* 217 /*
218 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. 218 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
219 * This will simplify cpa(), which otherwise needs to support splitting 219 * This will simplify cpa(), which otherwise needs to support splitting
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9ff3c0816d15..3cd7711bb949 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -111,7 +111,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
111 pte_t *page_table = NULL; 111 pte_t *page_table = NULL;
112 112
113 if (after_bootmem) { 113 if (after_bootmem) {
114#ifdef CONFIG_DEBUG_PAGEALLOC 114#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
115 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); 115 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
116#endif 116#endif
117 if (!page_table) 117 if (!page_table)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 52bb9519bb86..c4378f4fd4a5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -104,7 +104,7 @@ static __ref void *spp_getpage(void)
104 void *ptr; 104 void *ptr;
105 105
106 if (after_bootmem) 106 if (after_bootmem)
107 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 107 ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
108 else 108 else
109 ptr = alloc_bootmem_pages(PAGE_SIZE); 109 ptr = alloc_bootmem_pages(PAGE_SIZE);
110 110
@@ -281,7 +281,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
281 void *adr; 281 void *adr;
282 282
283 if (after_bootmem) { 283 if (after_bootmem) {
284 adr = (void *)get_zeroed_page(GFP_ATOMIC); 284 adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
285 *phys = __pa(adr); 285 *phys = __pa(adr);
286 286
287 return adr; 287 return adr;
@@ -527,7 +527,7 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
527 return phys_pud_init(pud, addr, end, page_size_mask); 527 return phys_pud_init(pud, addr, end, page_size_mask);
528} 528}
529 529
530unsigned long __init 530unsigned long __meminit
531kernel_physical_mapping_init(unsigned long start, 531kernel_physical_mapping_init(unsigned long start,
532 unsigned long end, 532 unsigned long end,
533 unsigned long page_size_mask) 533 unsigned long page_size_mask)
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile
new file mode 100644
index 000000000000..520b3bce4095
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/Makefile
@@ -0,0 +1 @@
obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
new file mode 100644
index 000000000000..4901d0dafda6
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -0,0 +1,228 @@
1#include <linux/interrupt.h>
2#include <linux/kdebug.h>
3#include <linux/kmemcheck.h>
4#include <linux/kernel.h>
5#include <linux/types.h>
6#include <linux/ptrace.h>
7#include <linux/stacktrace.h>
8#include <linux/string.h>
9
10#include "error.h"
11#include "shadow.h"
12
13enum kmemcheck_error_type {
14 KMEMCHECK_ERROR_INVALID_ACCESS,
15 KMEMCHECK_ERROR_BUG,
16};
17
18#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT)
19
20struct kmemcheck_error {
21 enum kmemcheck_error_type type;
22
23 union {
24 /* KMEMCHECK_ERROR_INVALID_ACCESS */
25 struct {
26 /* Kind of access that caused the error */
27 enum kmemcheck_shadow state;
28 /* Address and size of the erroneous read */
29 unsigned long address;
30 unsigned int size;
31 };
32 };
33
34 struct pt_regs regs;
35 struct stack_trace trace;
36 unsigned long trace_entries[32];
37
38 /* We compress it to a char. */
39 unsigned char shadow_copy[SHADOW_COPY_SIZE];
40 unsigned char memory_copy[SHADOW_COPY_SIZE];
41};
42
43/*
44 * Create a ring queue of errors to output. We can't call printk() directly
45 * from the kmemcheck traps, since this may call the console drivers and
46 * result in a recursive fault.
47 */
48static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE];
49static unsigned int error_count;
50static unsigned int error_rd;
51static unsigned int error_wr;
52static unsigned int error_missed_count;
53
54static struct kmemcheck_error *error_next_wr(void)
55{
56 struct kmemcheck_error *e;
57
58 if (error_count == ARRAY_SIZE(error_fifo)) {
59 ++error_missed_count;
60 return NULL;
61 }
62
63 e = &error_fifo[error_wr];
64 if (++error_wr == ARRAY_SIZE(error_fifo))
65 error_wr = 0;
66 ++error_count;
67 return e;
68}
69
70static struct kmemcheck_error *error_next_rd(void)
71{
72 struct kmemcheck_error *e;
73
74 if (error_count == 0)
75 return NULL;
76
77 e = &error_fifo[error_rd];
78 if (++error_rd == ARRAY_SIZE(error_fifo))
79 error_rd = 0;
80 --error_count;
81 return e;
82}
83
84void kmemcheck_error_recall(void)
85{
86 static const char *desc[] = {
87 [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated",
88 [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized",
89 [KMEMCHECK_SHADOW_INITIALIZED] = "initialized",
90 [KMEMCHECK_SHADOW_FREED] = "freed",
91 };
92
93 static const char short_desc[] = {
94 [KMEMCHECK_SHADOW_UNALLOCATED] = 'a',
95 [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u',
96 [KMEMCHECK_SHADOW_INITIALIZED] = 'i',
97 [KMEMCHECK_SHADOW_FREED] = 'f',
98 };
99
100 struct kmemcheck_error *e;
101 unsigned int i;
102
103 e = error_next_rd();
104 if (!e)
105 return;
106
107 switch (e->type) {
108 case KMEMCHECK_ERROR_INVALID_ACCESS:
109 printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read "
110 "from %s memory (%p)\n",
111 8 * e->size, e->state < ARRAY_SIZE(desc) ?
112 desc[e->state] : "(invalid shadow state)",
113 (void *) e->address);
114
115 printk(KERN_INFO);
116 for (i = 0; i < SHADOW_COPY_SIZE; ++i)
117 printk("%02x", e->memory_copy[i]);
118 printk("\n");
119
120 printk(KERN_INFO);
121 for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
122 if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
123 printk(" %c", short_desc[e->shadow_copy[i]]);
124 else
125 printk(" ?");
126 }
127 printk("\n");
128 printk(KERN_INFO "%*c\n", 2 + 2
129 * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
130 break;
131 case KMEMCHECK_ERROR_BUG:
132 printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n");
133 break;
134 }
135
136 __show_regs(&e->regs, 1);
137 print_stack_trace(&e->trace, 0);
138}
139
140static void do_wakeup(unsigned long data)
141{
142 while (error_count > 0)
143 kmemcheck_error_recall();
144
145 if (error_missed_count > 0) {
146 printk(KERN_WARNING "kmemcheck: Lost %d error reports because "
147 "the queue was too small\n", error_missed_count);
148 error_missed_count = 0;
149 }
150}
151
152static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0);
153
154/*
155 * Save the context of an error report.
156 */
157void kmemcheck_error_save(enum kmemcheck_shadow state,
158 unsigned long address, unsigned int size, struct pt_regs *regs)
159{
160 static unsigned long prev_ip;
161
162 struct kmemcheck_error *e;
163 void *shadow_copy;
164 void *memory_copy;
165
166 /* Don't report several adjacent errors from the same EIP. */
167 if (regs->ip == prev_ip)
168 return;
169 prev_ip = regs->ip;
170
171 e = error_next_wr();
172 if (!e)
173 return;
174
175 e->type = KMEMCHECK_ERROR_INVALID_ACCESS;
176
177 e->state = state;
178 e->address = address;
179 e->size = size;
180
181 /* Save regs */
182 memcpy(&e->regs, regs, sizeof(*regs));
183
184 /* Save stack trace */
185 e->trace.nr_entries = 0;
186 e->trace.entries = e->trace_entries;
187 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
188 e->trace.skip = 0;
189 save_stack_trace_bp(&e->trace, regs->bp);
190
191 /* Round address down to nearest 16 bytes */
192 shadow_copy = kmemcheck_shadow_lookup(address
193 & ~(SHADOW_COPY_SIZE - 1));
194 BUG_ON(!shadow_copy);
195
196 memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE);
197
198 kmemcheck_show_addr(address);
199 memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1));
200 memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE);
201 kmemcheck_hide_addr(address);
202
203 tasklet_hi_schedule_first(&kmemcheck_tasklet);
204}
205
206/*
207 * Save the context of a kmemcheck bug.
208 */
209void kmemcheck_error_save_bug(struct pt_regs *regs)
210{
211 struct kmemcheck_error *e;
212
213 e = error_next_wr();
214 if (!e)
215 return;
216
217 e->type = KMEMCHECK_ERROR_BUG;
218
219 memcpy(&e->regs, regs, sizeof(*regs));
220
221 e->trace.nr_entries = 0;
222 e->trace.entries = e->trace_entries;
223 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
224 e->trace.skip = 1;
225 save_stack_trace(&e->trace);
226
227 tasklet_hi_schedule_first(&kmemcheck_tasklet);
228}
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h
new file mode 100644
index 000000000000..0efc2e8d0a20
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/error.h
@@ -0,0 +1,15 @@
1#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H
2#define ARCH__X86__MM__KMEMCHECK__ERROR_H
3
4#include <linux/ptrace.h>
5
6#include "shadow.h"
7
8void kmemcheck_error_save(enum kmemcheck_shadow state,
9 unsigned long address, unsigned int size, struct pt_regs *regs);
10
11void kmemcheck_error_save_bug(struct pt_regs *regs);
12
13void kmemcheck_error_recall(void);
14
15#endif
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
new file mode 100644
index 000000000000..2c55ed098654
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -0,0 +1,640 @@
1/**
2 * kmemcheck - a heavyweight memory checker for the linux kernel
3 * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no>
4 * (With a lot of help from Ingo Molnar and Pekka Enberg.)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2) as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/init.h>
12#include <linux/interrupt.h>
13#include <linux/kallsyms.h>
14#include <linux/kernel.h>
15#include <linux/kmemcheck.h>
16#include <linux/mm.h>
17#include <linux/module.h>
18#include <linux/page-flags.h>
19#include <linux/percpu.h>
20#include <linux/ptrace.h>
21#include <linux/string.h>
22#include <linux/types.h>
23
24#include <asm/cacheflush.h>
25#include <asm/kmemcheck.h>
26#include <asm/pgtable.h>
27#include <asm/tlbflush.h>
28
29#include "error.h"
30#include "opcode.h"
31#include "pte.h"
32#include "selftest.h"
33#include "shadow.h"
34
35
36#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
37# define KMEMCHECK_ENABLED 0
38#endif
39
40#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
41# define KMEMCHECK_ENABLED 1
42#endif
43
44#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
45# define KMEMCHECK_ENABLED 2
46#endif
47
48int kmemcheck_enabled = KMEMCHECK_ENABLED;
49
50int __init kmemcheck_init(void)
51{
52#ifdef CONFIG_SMP
53 /*
54 * Limit SMP to use a single CPU. We rely on the fact that this code
55 * runs before SMP is set up.
56 */
57 if (setup_max_cpus > 1) {
58 printk(KERN_INFO
59 "kmemcheck: Limiting number of CPUs to 1.\n");
60 setup_max_cpus = 1;
61 }
62#endif
63
64 if (!kmemcheck_selftest()) {
65 printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
66 kmemcheck_enabled = 0;
67 return -EINVAL;
68 }
69
70 printk(KERN_INFO "kmemcheck: Initialized\n");
71 return 0;
72}
73
74early_initcall(kmemcheck_init);
75
76/*
77 * We need to parse the kmemcheck= option before any memory is allocated.
78 */
79static int __init param_kmemcheck(char *str)
80{
81 if (!str)
82 return -EINVAL;
83
84 sscanf(str, "%d", &kmemcheck_enabled);
85 return 0;
86}
87
88early_param("kmemcheck", param_kmemcheck);
89
90int kmemcheck_show_addr(unsigned long address)
91{
92 pte_t *pte;
93
94 pte = kmemcheck_pte_lookup(address);
95 if (!pte)
96 return 0;
97
98 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
99 __flush_tlb_one(address);
100 return 1;
101}
102
103int kmemcheck_hide_addr(unsigned long address)
104{
105 pte_t *pte;
106
107 pte = kmemcheck_pte_lookup(address);
108 if (!pte)
109 return 0;
110
111 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
112 __flush_tlb_one(address);
113 return 1;
114}
115
116struct kmemcheck_context {
117 bool busy;
118 int balance;
119
120 /*
121 * There can be at most two memory operands to an instruction, but
122 * each address can cross a page boundary -- so we may need up to
123 * four addresses that must be hidden/revealed for each fault.
124 */
125 unsigned long addr[4];
126 unsigned long n_addrs;
127 unsigned long flags;
128
129 /* Data size of the instruction that caused a fault. */
130 unsigned int size;
131};
132
133static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
134
135bool kmemcheck_active(struct pt_regs *regs)
136{
137 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
138
139 return data->balance > 0;
140}
141
142/* Save an address that needs to be shown/hidden */
143static void kmemcheck_save_addr(unsigned long addr)
144{
145 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
146
147 BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
148 data->addr[data->n_addrs++] = addr;
149}
150
151static unsigned int kmemcheck_show_all(void)
152{
153 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
154 unsigned int i;
155 unsigned int n;
156
157 n = 0;
158 for (i = 0; i < data->n_addrs; ++i)
159 n += kmemcheck_show_addr(data->addr[i]);
160
161 return n;
162}
163
164static unsigned int kmemcheck_hide_all(void)
165{
166 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
167 unsigned int i;
168 unsigned int n;
169
170 n = 0;
171 for (i = 0; i < data->n_addrs; ++i)
172 n += kmemcheck_hide_addr(data->addr[i]);
173
174 return n;
175}
176
177/*
178 * Called from the #PF handler.
179 */
180void kmemcheck_show(struct pt_regs *regs)
181{
182 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
183
184 BUG_ON(!irqs_disabled());
185
186 if (unlikely(data->balance != 0)) {
187 kmemcheck_show_all();
188 kmemcheck_error_save_bug(regs);
189 data->balance = 0;
190 return;
191 }
192
193 /*
194 * None of the addresses actually belonged to kmemcheck. Note that
195 * this is not an error.
196 */
197 if (kmemcheck_show_all() == 0)
198 return;
199
200 ++data->balance;
201
202 /*
203 * The IF needs to be cleared as well, so that the faulting
204 * instruction can run "uninterrupted". Otherwise, we might take
205 * an interrupt and start executing that before we've had a chance
206 * to hide the page again.
207 *
208 * NOTE: In the rare case of multiple faults, we must not override
209 * the original flags:
210 */
211 if (!(regs->flags & X86_EFLAGS_TF))
212 data->flags = regs->flags;
213
214 regs->flags |= X86_EFLAGS_TF;
215 regs->flags &= ~X86_EFLAGS_IF;
216}
217
218/*
219 * Called from the #DB handler.
220 */
221void kmemcheck_hide(struct pt_regs *regs)
222{
223 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
224 int n;
225
226 BUG_ON(!irqs_disabled());
227
228 if (data->balance == 0)
229 return;
230
231 if (unlikely(data->balance != 1)) {
232 kmemcheck_show_all();
233 kmemcheck_error_save_bug(regs);
234 data->n_addrs = 0;
235 data->balance = 0;
236
237 if (!(data->flags & X86_EFLAGS_TF))
238 regs->flags &= ~X86_EFLAGS_TF;
239 if (data->flags & X86_EFLAGS_IF)
240 regs->flags |= X86_EFLAGS_IF;
241 return;
242 }
243
244 if (kmemcheck_enabled)
245 n = kmemcheck_hide_all();
246 else
247 n = kmemcheck_show_all();
248
249 if (n == 0)
250 return;
251
252 --data->balance;
253
254 data->n_addrs = 0;
255
256 if (!(data->flags & X86_EFLAGS_TF))
257 regs->flags &= ~X86_EFLAGS_TF;
258 if (data->flags & X86_EFLAGS_IF)
259 regs->flags |= X86_EFLAGS_IF;
260}
261
262void kmemcheck_show_pages(struct page *p, unsigned int n)
263{
264 unsigned int i;
265
266 for (i = 0; i < n; ++i) {
267 unsigned long address;
268 pte_t *pte;
269 unsigned int level;
270
271 address = (unsigned long) page_address(&p[i]);
272 pte = lookup_address(address, &level);
273 BUG_ON(!pte);
274 BUG_ON(level != PG_LEVEL_4K);
275
276 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
277 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
278 __flush_tlb_one(address);
279 }
280}
281
282bool kmemcheck_page_is_tracked(struct page *p)
283{
284 /* This will also check the "hidden" flag of the PTE. */
285 return kmemcheck_pte_lookup((unsigned long) page_address(p));
286}
287
288void kmemcheck_hide_pages(struct page *p, unsigned int n)
289{
290 unsigned int i;
291
292 for (i = 0; i < n; ++i) {
293 unsigned long address;
294 pte_t *pte;
295 unsigned int level;
296
297 address = (unsigned long) page_address(&p[i]);
298 pte = lookup_address(address, &level);
299 BUG_ON(!pte);
300 BUG_ON(level != PG_LEVEL_4K);
301
302 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
303 set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
304 __flush_tlb_one(address);
305 }
306}
307
308/* Access may NOT cross page boundary */
309static void kmemcheck_read_strict(struct pt_regs *regs,
310 unsigned long addr, unsigned int size)
311{
312 void *shadow;
313 enum kmemcheck_shadow status;
314
315 shadow = kmemcheck_shadow_lookup(addr);
316 if (!shadow)
317 return;
318
319 kmemcheck_save_addr(addr);
320 status = kmemcheck_shadow_test(shadow, size);
321 if (status == KMEMCHECK_SHADOW_INITIALIZED)
322 return;
323
324 if (kmemcheck_enabled)
325 kmemcheck_error_save(status, addr, size, regs);
326
327 if (kmemcheck_enabled == 2)
328 kmemcheck_enabled = 0;
329
330 /* Don't warn about it again. */
331 kmemcheck_shadow_set(shadow, size);
332}
333
334/* Access may cross page boundary */
335static void kmemcheck_read(struct pt_regs *regs,
336 unsigned long addr, unsigned int size)
337{
338 unsigned long page = addr & PAGE_MASK;
339 unsigned long next_addr = addr + size - 1;
340 unsigned long next_page = next_addr & PAGE_MASK;
341
342 if (likely(page == next_page)) {
343 kmemcheck_read_strict(regs, addr, size);
344 return;
345 }
346
347 /*
348 * What we do is basically to split the access across the
349 * two pages and handle each part separately. Yes, this means
350 * that we may now see reads that are 3 + 5 bytes, for
351 * example (and if both are uninitialized, there will be two
352 * reports), but it makes the code a lot simpler.
353 */
354 kmemcheck_read_strict(regs, addr, next_page - addr);
355 kmemcheck_read_strict(regs, next_page, next_addr - next_page);
356}
357
358static void kmemcheck_write_strict(struct pt_regs *regs,
359 unsigned long addr, unsigned int size)
360{
361 void *shadow;
362
363 shadow = kmemcheck_shadow_lookup(addr);
364 if (!shadow)
365 return;
366
367 kmemcheck_save_addr(addr);
368 kmemcheck_shadow_set(shadow, size);
369}
370
371static void kmemcheck_write(struct pt_regs *regs,
372 unsigned long addr, unsigned int size)
373{
374 unsigned long page = addr & PAGE_MASK;
375 unsigned long next_addr = addr + size - 1;
376 unsigned long next_page = next_addr & PAGE_MASK;
377
378 if (likely(page == next_page)) {
379 kmemcheck_write_strict(regs, addr, size);
380 return;
381 }
382
383 /* See comment in kmemcheck_read(). */
384 kmemcheck_write_strict(regs, addr, next_page - addr);
385 kmemcheck_write_strict(regs, next_page, next_addr - next_page);
386}
387
388/*
389 * Copying is hard. We have two addresses, each of which may be split across
390 * a page (and each page will have different shadow addresses).
391 */
392static void kmemcheck_copy(struct pt_regs *regs,
393 unsigned long src_addr, unsigned long dst_addr, unsigned int size)
394{
395 uint8_t shadow[8];
396 enum kmemcheck_shadow status;
397
398 unsigned long page;
399 unsigned long next_addr;
400 unsigned long next_page;
401
402 uint8_t *x;
403 unsigned int i;
404 unsigned int n;
405
406 BUG_ON(size > sizeof(shadow));
407
408 page = src_addr & PAGE_MASK;
409 next_addr = src_addr + size - 1;
410 next_page = next_addr & PAGE_MASK;
411
412 if (likely(page == next_page)) {
413 /* Same page */
414 x = kmemcheck_shadow_lookup(src_addr);
415 if (x) {
416 kmemcheck_save_addr(src_addr);
417 for (i = 0; i < size; ++i)
418 shadow[i] = x[i];
419 } else {
420 for (i = 0; i < size; ++i)
421 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
422 }
423 } else {
424 n = next_page - src_addr;
425 BUG_ON(n > sizeof(shadow));
426
427 /* First page */
428 x = kmemcheck_shadow_lookup(src_addr);
429 if (x) {
430 kmemcheck_save_addr(src_addr);
431 for (i = 0; i < n; ++i)
432 shadow[i] = x[i];
433 } else {
434 /* Not tracked */
435 for (i = 0; i < n; ++i)
436 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
437 }
438
439 /* Second page */
440 x = kmemcheck_shadow_lookup(next_page);
441 if (x) {
442 kmemcheck_save_addr(next_page);
443 for (i = n; i < size; ++i)
444 shadow[i] = x[i - n];
445 } else {
446 /* Not tracked */
447 for (i = n; i < size; ++i)
448 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
449 }
450 }
451
452 page = dst_addr & PAGE_MASK;
453 next_addr = dst_addr + size - 1;
454 next_page = next_addr & PAGE_MASK;
455
456 if (likely(page == next_page)) {
457 /* Same page */
458 x = kmemcheck_shadow_lookup(dst_addr);
459 if (x) {
460 kmemcheck_save_addr(dst_addr);
461 for (i = 0; i < size; ++i) {
462 x[i] = shadow[i];
463 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
464 }
465 }
466 } else {
467 n = next_page - dst_addr;
468 BUG_ON(n > sizeof(shadow));
469
470 /* First page */
471 x = kmemcheck_shadow_lookup(dst_addr);
472 if (x) {
473 kmemcheck_save_addr(dst_addr);
474 for (i = 0; i < n; ++i) {
475 x[i] = shadow[i];
476 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
477 }
478 }
479
480 /* Second page */
481 x = kmemcheck_shadow_lookup(next_page);
482 if (x) {
483 kmemcheck_save_addr(next_page);
484 for (i = n; i < size; ++i) {
485 x[i - n] = shadow[i];
486 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
487 }
488 }
489 }
490
491 status = kmemcheck_shadow_test(shadow, size);
492 if (status == KMEMCHECK_SHADOW_INITIALIZED)
493 return;
494
495 if (kmemcheck_enabled)
496 kmemcheck_error_save(status, src_addr, size, regs);
497
498 if (kmemcheck_enabled == 2)
499 kmemcheck_enabled = 0;
500}
501
502enum kmemcheck_method {
503 KMEMCHECK_READ,
504 KMEMCHECK_WRITE,
505};
506
507static void kmemcheck_access(struct pt_regs *regs,
508 unsigned long fallback_address, enum kmemcheck_method fallback_method)
509{
510 const uint8_t *insn;
511 const uint8_t *insn_primary;
512 unsigned int size;
513
514 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
515
516 /* Recursive fault -- ouch. */
517 if (data->busy) {
518 kmemcheck_show_addr(fallback_address);
519 kmemcheck_error_save_bug(regs);
520 return;
521 }
522
523 data->busy = true;
524
525 insn = (const uint8_t *) regs->ip;
526 insn_primary = kmemcheck_opcode_get_primary(insn);
527
528 kmemcheck_opcode_decode(insn, &size);
529
530 switch (insn_primary[0]) {
531#ifdef CONFIG_KMEMCHECK_BITOPS_OK
532 /* AND, OR, XOR */
533 /*
534 * Unfortunately, these instructions have to be excluded from
535 * our regular checking since they access only some (and not
536 * all) bits. This clears out "bogus" bitfield-access warnings.
537 */
538 case 0x80:
539 case 0x81:
540 case 0x82:
541 case 0x83:
542 switch ((insn_primary[1] >> 3) & 7) {
543 /* OR */
544 case 1:
545 /* AND */
546 case 4:
547 /* XOR */
548 case 6:
549 kmemcheck_write(regs, fallback_address, size);
550 goto out;
551
552 /* ADD */
553 case 0:
554 /* ADC */
555 case 2:
556 /* SBB */
557 case 3:
558 /* SUB */
559 case 5:
560 /* CMP */
561 case 7:
562 break;
563 }
564 break;
565#endif
566
567 /* MOVS, MOVSB, MOVSW, MOVSD */
568 case 0xa4:
569 case 0xa5:
570 /*
571 * These instructions are special because they take two
572 * addresses, but we only get one page fault.
573 */
574 kmemcheck_copy(regs, regs->si, regs->di, size);
575 goto out;
576
577 /* CMPS, CMPSB, CMPSW, CMPSD */
578 case 0xa6:
579 case 0xa7:
580 kmemcheck_read(regs, regs->si, size);
581 kmemcheck_read(regs, regs->di, size);
582 goto out;
583 }
584
585 /*
586 * If the opcode isn't special in any way, we use the data from the
587 * page fault handler to determine the address and type of memory
588 * access.
589 */
590 switch (fallback_method) {
591 case KMEMCHECK_READ:
592 kmemcheck_read(regs, fallback_address, size);
593 goto out;
594 case KMEMCHECK_WRITE:
595 kmemcheck_write(regs, fallback_address, size);
596 goto out;
597 }
598
599out:
600 data->busy = false;
601}
602
603bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
604 unsigned long error_code)
605{
606 pte_t *pte;
607
608 /*
609 * XXX: Is it safe to assume that memory accesses from virtual 86
610 * mode or non-kernel code segments will _never_ access kernel
611 * memory (e.g. tracked pages)? For now, we need this to avoid
612 * invoking kmemcheck for PnP BIOS calls.
613 */
614 if (regs->flags & X86_VM_MASK)
615 return false;
616 if (regs->cs != __KERNEL_CS)
617 return false;
618
619 pte = kmemcheck_pte_lookup(address);
620 if (!pte)
621 return false;
622
623 if (error_code & 2)
624 kmemcheck_access(regs, address, KMEMCHECK_WRITE);
625 else
626 kmemcheck_access(regs, address, KMEMCHECK_READ);
627
628 kmemcheck_show(regs);
629 return true;
630}
631
632bool kmemcheck_trap(struct pt_regs *regs)
633{
634 if (!kmemcheck_active(regs))
635 return false;
636
637 /* We're done. */
638 kmemcheck_hide(regs);
639 return true;
640}
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
new file mode 100644
index 000000000000..63c19e27aa6f
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/opcode.c
@@ -0,0 +1,106 @@
1#include <linux/types.h>
2
3#include "opcode.h"
4
5static bool opcode_is_prefix(uint8_t b)
6{
7 return
8 /* Group 1 */
9 b == 0xf0 || b == 0xf2 || b == 0xf3
10 /* Group 2 */
11 || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
12 || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e
13 /* Group 3 */
14 || b == 0x66
15 /* Group 4 */
16 || b == 0x67;
17}
18
19#ifdef CONFIG_X86_64
20static bool opcode_is_rex_prefix(uint8_t b)
21{
22 return (b & 0xf0) == 0x40;
23}
24#else
25static bool opcode_is_rex_prefix(uint8_t b)
26{
27 return false;
28}
29#endif
30
31#define REX_W (1 << 3)
32
33/*
34 * This is a VERY crude opcode decoder. We only need to find the size of the
35 * load/store that caused our #PF and this should work for all the opcodes
36 * that we care about. Moreover, the ones who invented this instruction set
37 * should be shot.
38 */
39void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size)
40{
41 /* Default operand size */
42 int operand_size_override = 4;
43
44 /* prefixes */
45 for (; opcode_is_prefix(*op); ++op) {
46 if (*op == 0x66)
47 operand_size_override = 2;
48 }
49
50 /* REX prefix */
51 if (opcode_is_rex_prefix(*op)) {
52 uint8_t rex = *op;
53
54 ++op;
55 if (rex & REX_W) {
56 switch (*op) {
57 case 0x63:
58 *size = 4;
59 return;
60 case 0x0f:
61 ++op;
62
63 switch (*op) {
64 case 0xb6:
65 case 0xbe:
66 *size = 1;
67 return;
68 case 0xb7:
69 case 0xbf:
70 *size = 2;
71 return;
72 }
73
74 break;
75 }
76
77 *size = 8;
78 return;
79 }
80 }
81
82 /* escape opcode */
83 if (*op == 0x0f) {
84 ++op;
85
86 /*
87 * This is move with zero-extend and sign-extend, respectively;
88 * we don't have to think about 0xb6/0xbe, because this is
89 * already handled in the conditional below.
90 */
91 if (*op == 0xb7 || *op == 0xbf)
92 operand_size_override = 2;
93 }
94
95 *size = (*op & 1) ? operand_size_override : 1;
96}
97
98const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op)
99{
100 /* skip prefixes */
101 while (opcode_is_prefix(*op))
102 ++op;
103 if (opcode_is_rex_prefix(*op))
104 ++op;
105 return op;
106}
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h
new file mode 100644
index 000000000000..6956aad66b5b
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/opcode.h
@@ -0,0 +1,9 @@
1#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H
2#define ARCH__X86__MM__KMEMCHECK__OPCODE_H
3
4#include <linux/types.h>
5
6void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size);
7const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op);
8
9#endif
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c
new file mode 100644
index 000000000000..4ead26eeaf96
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/pte.c
@@ -0,0 +1,22 @@
1#include <linux/mm.h>
2
3#include <asm/pgtable.h>
4
5#include "pte.h"
6
7pte_t *kmemcheck_pte_lookup(unsigned long address)
8{
9 pte_t *pte;
10 unsigned int level;
11
12 pte = lookup_address(address, &level);
13 if (!pte)
14 return NULL;
15 if (level != PG_LEVEL_4K)
16 return NULL;
17 if (!pte_hidden(*pte))
18 return NULL;
19
20 return pte;
21}
22
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h
new file mode 100644
index 000000000000..9f5966456492
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/pte.h
@@ -0,0 +1,10 @@
1#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H
2#define ARCH__X86__MM__KMEMCHECK__PTE_H
3
4#include <linux/mm.h>
5
6#include <asm/pgtable.h>
7
8pte_t *kmemcheck_pte_lookup(unsigned long address);
9
10#endif
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c
new file mode 100644
index 000000000000..036efbea8b28
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/selftest.c
@@ -0,0 +1,69 @@
1#include <linux/kernel.h>
2
3#include "opcode.h"
4#include "selftest.h"
5
6struct selftest_opcode {
7 unsigned int expected_size;
8 const uint8_t *insn;
9 const char *desc;
10};
11
12static const struct selftest_opcode selftest_opcodes[] = {
13 /* REP MOVS */
14 {1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"},
15 {4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"},
16
17 /* MOVZX / MOVZXD */
18 {1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"},
19 {1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"},
20
21 /* MOVSX / MOVSXD */
22 {1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"},
23 {1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"},
24
25#ifdef CONFIG_X86_64
26 /* MOVZX / MOVZXD */
27 {1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"},
28 {2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"},
29
30 /* MOVSX / MOVSXD */
31 {1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"},
32 {2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"},
33 {4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"},
34#endif
35};
36
37static bool selftest_opcode_one(const struct selftest_opcode *op)
38{
39 unsigned size;
40
41 kmemcheck_opcode_decode(op->insn, &size);
42
43 if (size == op->expected_size)
44 return true;
45
46 printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n",
47 op->desc, op->expected_size, size);
48 return false;
49}
50
51static bool selftest_opcodes_all(void)
52{
53 bool pass = true;
54 unsigned int i;
55
56 for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i)
57 pass = pass && selftest_opcode_one(&selftest_opcodes[i]);
58
59 return pass;
60}
61
62bool kmemcheck_selftest(void)
63{
64 bool pass = true;
65
66 pass = pass && selftest_opcodes_all();
67
68 return pass;
69}
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h
new file mode 100644
index 000000000000..8fed4fe11f95
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/selftest.h
@@ -0,0 +1,6 @@
1#ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H
2#define ARCH_X86_MM_KMEMCHECK_SELFTEST_H
3
4bool kmemcheck_selftest(void);
5
6#endif
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
new file mode 100644
index 000000000000..e773b6bd0079
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -0,0 +1,162 @@
1#include <linux/kmemcheck.h>
2#include <linux/module.h>
3#include <linux/mm.h>
4#include <linux/module.h>
5
6#include <asm/page.h>
7#include <asm/pgtable.h>
8
9#include "pte.h"
10#include "shadow.h"
11
12/*
13 * Return the shadow address for the given address. Returns NULL if the
14 * address is not tracked.
15 *
16 * We need to be extremely careful not to follow any invalid pointers,
17 * because this function can be called for *any* possible address.
18 */
19void *kmemcheck_shadow_lookup(unsigned long address)
20{
21 pte_t *pte;
22 struct page *page;
23
24 if (!virt_addr_valid(address))
25 return NULL;
26
27 pte = kmemcheck_pte_lookup(address);
28 if (!pte)
29 return NULL;
30
31 page = virt_to_page(address);
32 if (!page->shadow)
33 return NULL;
34 return page->shadow + (address & (PAGE_SIZE - 1));
35}
36
37static void mark_shadow(void *address, unsigned int n,
38 enum kmemcheck_shadow status)
39{
40 unsigned long addr = (unsigned long) address;
41 unsigned long last_addr = addr + n - 1;
42 unsigned long page = addr & PAGE_MASK;
43 unsigned long last_page = last_addr & PAGE_MASK;
44 unsigned int first_n;
45 void *shadow;
46
47 /* If the memory range crosses a page boundary, stop there. */
48 if (page == last_page)
49 first_n = n;
50 else
51 first_n = page + PAGE_SIZE - addr;
52
53 shadow = kmemcheck_shadow_lookup(addr);
54 if (shadow)
55 memset(shadow, status, first_n);
56
57 addr += first_n;
58 n -= first_n;
59
60 /* Do full-page memset()s. */
61 while (n >= PAGE_SIZE) {
62 shadow = kmemcheck_shadow_lookup(addr);
63 if (shadow)
64 memset(shadow, status, PAGE_SIZE);
65
66 addr += PAGE_SIZE;
67 n -= PAGE_SIZE;
68 }
69
70 /* Do the remaining page, if any. */
71 if (n > 0) {
72 shadow = kmemcheck_shadow_lookup(addr);
73 if (shadow)
74 memset(shadow, status, n);
75 }
76}
77
78void kmemcheck_mark_unallocated(void *address, unsigned int n)
79{
80 mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED);
81}
82
83void kmemcheck_mark_uninitialized(void *address, unsigned int n)
84{
85 mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED);
86}
87
88/*
89 * Fill the shadow memory of the given address such that the memory at that
90 * address is marked as being initialized.
91 */
92void kmemcheck_mark_initialized(void *address, unsigned int n)
93{
94 mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED);
95}
96EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized);
97
98void kmemcheck_mark_freed(void *address, unsigned int n)
99{
100 mark_shadow(address, n, KMEMCHECK_SHADOW_FREED);
101}
102
103void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n)
104{
105 unsigned int i;
106
107 for (i = 0; i < n; ++i)
108 kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE);
109}
110
111void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n)
112{
113 unsigned int i;
114
115 for (i = 0; i < n; ++i)
116 kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE);
117}
118
119void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
120{
121 unsigned int i;
122
123 for (i = 0; i < n; ++i)
124 kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE);
125}
126
127enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
128{
129 uint8_t *x;
130 unsigned int i;
131
132 x = shadow;
133
134#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
135 /*
136 * Make sure _some_ bytes are initialized. Gcc frequently generates
137 * code to access neighboring bytes.
138 */
139 for (i = 0; i < size; ++i) {
140 if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
141 return x[i];
142 }
143#else
144 /* All bytes must be initialized. */
145 for (i = 0; i < size; ++i) {
146 if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
147 return x[i];
148 }
149#endif
150
151 return x[0];
152}
153
154void kmemcheck_shadow_set(void *shadow, unsigned int size)
155{
156 uint8_t *x;
157 unsigned int i;
158
159 x = shadow;
160 for (i = 0; i < size; ++i)
161 x[i] = KMEMCHECK_SHADOW_INITIALIZED;
162}
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
new file mode 100644
index 000000000000..af46d9ab9d86
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -0,0 +1,16 @@
1#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H
2#define ARCH__X86__MM__KMEMCHECK__SHADOW_H
3
4enum kmemcheck_shadow {
5 KMEMCHECK_SHADOW_UNALLOCATED,
6 KMEMCHECK_SHADOW_UNINITIALIZED,
7 KMEMCHECK_SHADOW_INITIALIZED,
8 KMEMCHECK_SHADOW_FREED,
9};
10
11void *kmemcheck_shadow_lookup(unsigned long address);
12
13enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
14void kmemcheck_shadow_set(void *shadow, unsigned int size);
15
16#endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 6ce9518fe2ac..3cfe9ced8a4c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -470,7 +470,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
470 470
471 if (!debug_pagealloc) 471 if (!debug_pagealloc)
472 spin_unlock(&cpa_lock); 472 spin_unlock(&cpa_lock);
473 base = alloc_pages(GFP_KERNEL, 0); 473 base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
474 if (!debug_pagealloc) 474 if (!debug_pagealloc)
475 spin_lock(&cpa_lock); 475 spin_lock(&cpa_lock);
476 if (!base) 476 if (!base)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7aa03a5389f5..8e43bdd45456 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -4,9 +4,11 @@
4#include <asm/tlb.h> 4#include <asm/tlb.h>
5#include <asm/fixmap.h> 5#include <asm/fixmap.h>
6 6
7#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
8
7pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 9pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
8{ 10{
9 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); 11 return (pte_t *)__get_free_page(PGALLOC_GFP);
10} 12}
11 13
12pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) 14pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -14,9 +16,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
14 struct page *pte; 16 struct page *pte;
15 17
16#ifdef CONFIG_HIGHPTE 18#ifdef CONFIG_HIGHPTE
17 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); 19 pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
18#else 20#else
19 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); 21 pte = alloc_pages(PGALLOC_GFP, 0);
20#endif 22#endif
21 if (pte) 23 if (pte)
22 pgtable_page_ctor(pte); 24 pgtable_page_ctor(pte);
@@ -161,7 +163,7 @@ static int preallocate_pmds(pmd_t *pmds[])
161 bool failed = false; 163 bool failed = false;
162 164
163 for(i = 0; i < PREALLOCATED_PMDS; i++) { 165 for(i = 0; i < PREALLOCATED_PMDS; i++) {
164 pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); 166 pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
165 if (pmd == NULL) 167 if (pmd == NULL)
166 failed = true; 168 failed = true;
167 pmds[i] = pmd; 169 pmds[i] = pmd;
@@ -228,7 +230,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
228 pmd_t *pmds[PREALLOCATED_PMDS]; 230 pmd_t *pmds[PREALLOCATED_PMDS];
229 unsigned long flags; 231 unsigned long flags;
230 232
231 pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 233 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
232 234
233 if (pgd == NULL) 235 if (pgd == NULL)
234 goto out; 236 goto out;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index a85bef20a3b9..0fb56db16d18 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -116,7 +116,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
116 struct pci_bus *bus; 116 struct pci_bus *bus;
117 struct pci_dev *dev; 117 struct pci_dev *dev;
118 int idx; 118 int idx;
119 struct resource *r, *pr; 119 struct resource *r;
120 120
121 /* Depth-First Search on bus tree */ 121 /* Depth-First Search on bus tree */
122 list_for_each_entry(bus, bus_list, node) { 122 list_for_each_entry(bus, bus_list, node) {
@@ -126,9 +126,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
126 r = &dev->resource[idx]; 126 r = &dev->resource[idx];
127 if (!r->flags) 127 if (!r->flags)
128 continue; 128 continue;
129 pr = pci_find_parent_resource(dev, r); 129 if (!r->start ||
130 if (!r->start || !pr || 130 pci_claim_resource(dev, idx) < 0) {
131 request_resource(pr, r) < 0) {
132 dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); 131 dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx);
133 /* 132 /*
134 * Something is wrong with the region. 133 * Something is wrong with the region.
@@ -149,7 +148,7 @@ static void __init pcibios_allocate_resources(int pass)
149 struct pci_dev *dev = NULL; 148 struct pci_dev *dev = NULL;
150 int idx, disabled; 149 int idx, disabled;
151 u16 command; 150 u16 command;
152 struct resource *r, *pr; 151 struct resource *r;
153 152
154 for_each_pci_dev(dev) { 153 for_each_pci_dev(dev) {
155 pci_read_config_word(dev, PCI_COMMAND, &command); 154 pci_read_config_word(dev, PCI_COMMAND, &command);
@@ -168,8 +167,7 @@ static void __init pcibios_allocate_resources(int pass)
168 (unsigned long long) r->start, 167 (unsigned long long) r->start,
169 (unsigned long long) r->end, 168 (unsigned long long) r->end,
170 r->flags, disabled, pass); 169 r->flags, disabled, pass);
171 pr = pci_find_parent_resource(dev, r); 170 if (pci_claim_resource(dev, idx) < 0) {
172 if (!pr || request_resource(pr, r) < 0) {
173 dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); 171 dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx);
174 /* We'll assign a new address later */ 172 /* We'll assign a new address later */
175 r->end -= r->start; 173 r->end -= r->start;
@@ -197,7 +195,7 @@ static void __init pcibios_allocate_resources(int pass)
197static int __init pcibios_assign_resources(void) 195static int __init pcibios_assign_resources(void)
198{ 196{
199 struct pci_dev *dev = NULL; 197 struct pci_dev *dev = NULL;
200 struct resource *r, *pr; 198 struct resource *r;
201 199
202 if (!(pci_probe & PCI_ASSIGN_ROMS)) { 200 if (!(pci_probe & PCI_ASSIGN_ROMS)) {
203 /* 201 /*
@@ -209,8 +207,7 @@ static int __init pcibios_assign_resources(void)
209 r = &dev->resource[PCI_ROM_RESOURCE]; 207 r = &dev->resource[PCI_ROM_RESOURCE];
210 if (!r->flags || !r->start) 208 if (!r->flags || !r->start)
211 continue; 209 continue;
212 pr = pci_find_parent_resource(dev, r); 210 if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) {
213 if (!pr || request_resource(pr, r) < 0) {
214 r->end -= r->start; 211 r->end -= r->start;
215 r->start = 0; 212 r->start = 0;
216 } 213 }
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 16a9020c8f11..88112b49f02c 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -123,6 +123,7 @@ quiet_cmd_vdso = VDSO $@
123 -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) 123 -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
124 124
125VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv) 125VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv)
126GCOV_PROFILE := n
126 127
127# 128#
128# Install the unstripped copy of vdso*.so listed in $(vdso-install-y). 129# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).