aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-06-17 21:16:55 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-06-17 21:16:55 -0400
commit4b337c5f245b6587ba844ac7bb13c313a2912f7b (patch)
tree999c6a6580b76a083c8efb9dabff709d1c49fcd0 /arch/x86
parent492b057c426e4aa747484958e18e9da29003985d (diff)
parent3fe0344faf7fdcb158bd5c1a9aec960a8d70c8e8 (diff)
Merge commit 'origin/master' into next
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/include/asm/dma-mapping.h7
-rw-r--r--arch/x86/include/asm/kmap_types.h23
-rw-r--r--arch/x86/include/asm/kmemcheck.h42
-rw-r--r--arch/x86/include/asm/pgtable.h5
-rw-r--r--arch/x86/include/asm/pgtable_types.h9
-rw-r--r--arch/x86/include/asm/string_32.h8
-rw-r--r--arch/x86/include/asm/string_64.h8
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/timex.h4
-rw-r--r--arch/x86/include/asm/xor.h5
-rw-r--r--arch/x86/kernel/cpu/common.c11
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c191
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h11
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c60
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c93
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c23
-rw-r--r--arch/x86/kernel/cpuid.c6
-rw-r--r--arch/x86/kernel/i8253.c1
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/microcode_core.c1
-rw-r--r--arch/x86/kernel/msr.c6
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/stacktrace.c7
-rw-r--r--arch/x86/kernel/traps.c5
-rw-r--r--arch/x86/kernel/tsc.c9
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c18
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/x86/mm/kmemcheck/Makefile1
-rw-r--r--arch/x86/mm/kmemcheck/error.c228
-rw-r--r--arch/x86/mm/kmemcheck/error.h15
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c640
-rw-r--r--arch/x86/mm/kmemcheck/opcode.c106
-rw-r--r--arch/x86/mm/kmemcheck/opcode.h9
-rw-r--r--arch/x86/mm/kmemcheck/pte.c22
-rw-r--r--arch/x86/mm/kmemcheck/pte.h10
-rw-r--r--arch/x86/mm/kmemcheck/selftest.c69
-rw-r--r--arch/x86/mm/kmemcheck/selftest.h6
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c162
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h16
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/mm/pgtable.c12
48 files changed, 1637 insertions, 240 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 356d2ec8e2fb..cf42fc305419 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -46,6 +46,7 @@ config X86
46 select HAVE_KERNEL_GZIP 46 select HAVE_KERNEL_GZIP
47 select HAVE_KERNEL_BZIP2 47 select HAVE_KERNEL_BZIP2
48 select HAVE_KERNEL_LZMA 48 select HAVE_KERNEL_LZMA
49 select HAVE_ARCH_KMEMCHECK
49 50
50config OUTPUT_FORMAT 51config OUTPUT_FORMAT
51 string 52 string
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index edbd0ca62067..1b68659c41b4 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -81,6 +81,11 @@ ifdef CONFIG_CC_STACKPROTECTOR
81 endif 81 endif
82endif 82endif
83 83
84# Don't unroll struct assignments with kmemcheck enabled
85ifeq ($(CONFIG_KMEMCHECK),y)
86 KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
87endif
88
84# Stackpointer is addressed different for 32 bit and 64 bit x86 89# Stackpointer is addressed different for 32 bit and 64 bit x86
85sp-$(CONFIG_X86_32) := esp 90sp-$(CONFIG_X86_32) := esp
86sp-$(CONFIG_X86_64) := rsp 91sp-$(CONFIG_X86_64) := rsp
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index f82fdc412c64..b93405b228b4 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -6,6 +6,7 @@
6 * Documentation/DMA-API.txt for documentation. 6 * Documentation/DMA-API.txt for documentation.
7 */ 7 */
8 8
9#include <linux/kmemcheck.h>
9#include <linux/scatterlist.h> 10#include <linux/scatterlist.h>
10#include <linux/dma-debug.h> 11#include <linux/dma-debug.h>
11#include <linux/dma-attrs.h> 12#include <linux/dma-attrs.h>
@@ -60,6 +61,7 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size,
60 dma_addr_t addr; 61 dma_addr_t addr;
61 62
62 BUG_ON(!valid_dma_direction(dir)); 63 BUG_ON(!valid_dma_direction(dir));
64 kmemcheck_mark_initialized(ptr, size);
63 addr = ops->map_page(hwdev, virt_to_page(ptr), 65 addr = ops->map_page(hwdev, virt_to_page(ptr),
64 (unsigned long)ptr & ~PAGE_MASK, size, 66 (unsigned long)ptr & ~PAGE_MASK, size,
65 dir, NULL); 67 dir, NULL);
@@ -87,8 +89,12 @@ dma_map_sg(struct device *hwdev, struct scatterlist *sg,
87{ 89{
88 struct dma_map_ops *ops = get_dma_ops(hwdev); 90 struct dma_map_ops *ops = get_dma_ops(hwdev);
89 int ents; 91 int ents;
92 struct scatterlist *s;
93 int i;
90 94
91 BUG_ON(!valid_dma_direction(dir)); 95 BUG_ON(!valid_dma_direction(dir));
96 for_each_sg(sg, s, nents, i)
97 kmemcheck_mark_initialized(sg_virt(s), s->length);
92 ents = ops->map_sg(hwdev, sg, nents, dir, NULL); 98 ents = ops->map_sg(hwdev, sg, nents, dir, NULL);
93 debug_dma_map_sg(hwdev, sg, nents, ents, dir); 99 debug_dma_map_sg(hwdev, sg, nents, ents, dir);
94 100
@@ -200,6 +206,7 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
200 dma_addr_t addr; 206 dma_addr_t addr;
201 207
202 BUG_ON(!valid_dma_direction(dir)); 208 BUG_ON(!valid_dma_direction(dir));
209 kmemcheck_mark_initialized(page_address(page) + offset, size);
203 addr = ops->map_page(dev, page, offset, size, dir, NULL); 210 addr = ops->map_page(dev, page, offset, size, dir, NULL);
204 debug_dma_map_page(dev, page, offset, size, dir, addr, false); 211 debug_dma_map_page(dev, page, offset, size, dir, addr, false);
205 212
diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h
index 5759c165a5cf..9e00a731a7fb 100644
--- a/arch/x86/include/asm/kmap_types.h
+++ b/arch/x86/include/asm/kmap_types.h
@@ -2,28 +2,11 @@
2#define _ASM_X86_KMAP_TYPES_H 2#define _ASM_X86_KMAP_TYPES_H
3 3
4#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) 4#if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM)
5# define D(n) __KM_FENCE_##n , 5#define __WITH_KM_FENCE
6#else
7# define D(n)
8#endif 6#endif
9 7
10enum km_type { 8#include <asm-generic/kmap_types.h>
11D(0) KM_BOUNCE_READ,
12D(1) KM_SKB_SUNRPC_DATA,
13D(2) KM_SKB_DATA_SOFTIRQ,
14D(3) KM_USER0,
15D(4) KM_USER1,
16D(5) KM_BIO_SRC_IRQ,
17D(6) KM_BIO_DST_IRQ,
18D(7) KM_PTE0,
19D(8) KM_PTE1,
20D(9) KM_IRQ0,
21D(10) KM_IRQ1,
22D(11) KM_SOFTIRQ0,
23D(12) KM_SOFTIRQ1,
24D(13) KM_TYPE_NR
25};
26 9
27#undef D 10#undef __WITH_KM_FENCE
28 11
29#endif /* _ASM_X86_KMAP_TYPES_H */ 12#endif /* _ASM_X86_KMAP_TYPES_H */
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h
new file mode 100644
index 000000000000..ed01518f297e
--- /dev/null
+++ b/arch/x86/include/asm/kmemcheck.h
@@ -0,0 +1,42 @@
1#ifndef ASM_X86_KMEMCHECK_H
2#define ASM_X86_KMEMCHECK_H
3
4#include <linux/types.h>
5#include <asm/ptrace.h>
6
7#ifdef CONFIG_KMEMCHECK
8bool kmemcheck_active(struct pt_regs *regs);
9
10void kmemcheck_show(struct pt_regs *regs);
11void kmemcheck_hide(struct pt_regs *regs);
12
13bool kmemcheck_fault(struct pt_regs *regs,
14 unsigned long address, unsigned long error_code);
15bool kmemcheck_trap(struct pt_regs *regs);
16#else
17static inline bool kmemcheck_active(struct pt_regs *regs)
18{
19 return false;
20}
21
22static inline void kmemcheck_show(struct pt_regs *regs)
23{
24}
25
26static inline void kmemcheck_hide(struct pt_regs *regs)
27{
28}
29
30static inline bool kmemcheck_fault(struct pt_regs *regs,
31 unsigned long address, unsigned long error_code)
32{
33 return false;
34}
35
36static inline bool kmemcheck_trap(struct pt_regs *regs)
37{
38 return false;
39}
40#endif /* CONFIG_KMEMCHECK */
41
42#endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 18ef7ebf2631..3cc06e3fceb8 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -317,6 +317,11 @@ static inline int pte_present(pte_t a)
317 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); 317 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
318} 318}
319 319
320static inline int pte_hidden(pte_t pte)
321{
322 return pte_flags(pte) & _PAGE_HIDDEN;
323}
324
320static inline int pmd_present(pmd_t pmd) 325static inline int pmd_present(pmd_t pmd)
321{ 326{
322 return pmd_flags(pmd) & _PAGE_PRESENT; 327 return pmd_flags(pmd) & _PAGE_PRESENT;
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 4d258ad76a0f..54cb697f4900 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -18,7 +18,7 @@
18#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ 18#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
19#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ 19#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
20#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ 20#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
21#define _PAGE_BIT_UNUSED3 11 21#define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */
22#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ 22#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
23#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 23#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
24#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 24#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
@@ -41,13 +41,18 @@
41#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) 41#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
42#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) 42#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
43#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) 43#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
44#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
45#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) 44#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
46#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) 45#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
47#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) 46#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
48#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) 47#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
49#define __HAVE_ARCH_PTE_SPECIAL 48#define __HAVE_ARCH_PTE_SPECIAL
50 49
50#ifdef CONFIG_KMEMCHECK
51#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
52#else
53#define _PAGE_HIDDEN (_AT(pteval_t, 0))
54#endif
55
51#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 56#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
52#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) 57#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
53#else 58#else
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 0e0e3ba827f7..c86f452256de 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -177,10 +177,18 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
177 * No 3D Now! 177 * No 3D Now!
178 */ 178 */
179 179
180#ifndef CONFIG_KMEMCHECK
180#define memcpy(t, f, n) \ 181#define memcpy(t, f, n) \
181 (__builtin_constant_p((n)) \ 182 (__builtin_constant_p((n)) \
182 ? __constant_memcpy((t), (f), (n)) \ 183 ? __constant_memcpy((t), (f), (n)) \
183 : __memcpy((t), (f), (n))) 184 : __memcpy((t), (f), (n)))
185#else
186/*
187 * kmemcheck becomes very happy if we use the REP instructions unconditionally,
188 * because it means that we know both memory operands in advance.
189 */
190#define memcpy(t, f, n) __memcpy((t), (f), (n))
191#endif
184 192
185#endif 193#endif
186 194
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 2afe164bf1e6..19e2c468fc2c 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -27,6 +27,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
27 function. */ 27 function. */
28 28
29#define __HAVE_ARCH_MEMCPY 1 29#define __HAVE_ARCH_MEMCPY 1
30#ifndef CONFIG_KMEMCHECK
30#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 31#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
31extern void *memcpy(void *to, const void *from, size_t len); 32extern void *memcpy(void *to, const void *from, size_t len);
32#else 33#else
@@ -42,6 +43,13 @@ extern void *__memcpy(void *to, const void *from, size_t len);
42 __ret; \ 43 __ret; \
43}) 44})
44#endif 45#endif
46#else
47/*
48 * kmemcheck becomes very happy if we use the REP instructions unconditionally,
49 * because it means that we know both memory operands in advance.
50 */
51#define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
52#endif
45 53
46#define __HAVE_ARCH_MEMSET 54#define __HAVE_ARCH_MEMSET
47void *memset(void *s, int c, size_t n); 55void *memset(void *s, int c, size_t n);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 602c769fc98c..b0783520988b 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -154,9 +154,9 @@ struct thread_info {
154 154
155/* thread information allocation */ 155/* thread information allocation */
156#ifdef CONFIG_DEBUG_STACK_USAGE 156#ifdef CONFIG_DEBUG_STACK_USAGE
157#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) 157#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
158#else 158#else
159#define THREAD_FLAGS GFP_KERNEL 159#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK)
160#endif 160#endif
161 161
162#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR 162#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
index b5c9d45c981f..1375cfc93960 100644
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -4,9 +4,7 @@
4#include <asm/processor.h> 4#include <asm/processor.h>
5#include <asm/tsc.h> 5#include <asm/tsc.h>
6 6
7/* The PIT ticks at this frequency (in HZ): */ 7/* Assume we use the PIT time source for the clock tick */
8#define PIT_TICK_RATE 1193182
9
10#define CLOCK_TICK_RATE PIT_TICK_RATE 8#define CLOCK_TICK_RATE PIT_TICK_RATE
11 9
12#define ARCH_HAS_READ_CURRENT_TIMER 10#define ARCH_HAS_READ_CURRENT_TIMER
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 11b3bb86e17b..7fcf6f3dbcc3 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,5 +1,10 @@
1#ifdef CONFIG_KMEMCHECK
2/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
3# include <asm-generic/xor.h>
4#else
1#ifdef CONFIG_X86_32 5#ifdef CONFIG_X86_32
2# include "xor_32.h" 6# include "xor_32.h"
3#else 7#else
4# include "xor_64.h" 8# include "xor_64.h"
5#endif 9#endif
10#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3ffdcfa9abdf..9fa33886c0d7 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -487,7 +487,6 @@ out:
487static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) 487static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
488{ 488{
489 char *v = c->x86_vendor_id; 489 char *v = c->x86_vendor_id;
490 static int printed;
491 int i; 490 int i;
492 491
493 for (i = 0; i < X86_VENDOR_NUM; i++) { 492 for (i = 0; i < X86_VENDOR_NUM; i++) {
@@ -504,13 +503,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
504 } 503 }
505 } 504 }
506 505
507 if (!printed) { 506 printk_once(KERN_ERR
508 printed++; 507 "CPU: vendor_id '%s' unknown, using generic init.\n" \
509 printk(KERN_ERR 508 "CPU: Your system may be unstable.\n", v);
510 "CPU: vendor_id '%s' unknown, using generic init.\n", v);
511
512 printk(KERN_ERR "CPU: Your system may be unstable.\n");
513 }
514 509
515 c->x86_vendor = X86_VENDOR_UNKNOWN; 510 c->x86_vendor = X86_VENDOR_UNKNOWN;
516 this_cpu = &default_cpu; 511 this_cpu = &default_cpu;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index cf52215d9eb1..81cbe64ed6b4 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1,3 +1,4 @@
1
1/* 2/*
2 * (c) 2003-2006 Advanced Micro Devices, Inc. 3 * (c) 2003-2006 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 4 * Your use of this code is subject to the terms and conditions of the
@@ -117,20 +118,17 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
117 u32 i = 0; 118 u32 i = 0;
118 119
119 if (cpu_family == CPU_HW_PSTATE) { 120 if (cpu_family == CPU_HW_PSTATE) {
120 if (data->currpstate == HW_PSTATE_INVALID) { 121 rdmsr(MSR_PSTATE_STATUS, lo, hi);
121 /* read (initial) hw pstate if not yet set */ 122 i = lo & HW_PSTATE_MASK;
122 rdmsr(MSR_PSTATE_STATUS, lo, hi); 123 data->currpstate = i;
123 i = lo & HW_PSTATE_MASK; 124
124 125 /*
125 /* 126 * a workaround for family 11h erratum 311 might cause
126 * a workaround for family 11h erratum 311 might cause 127 * an "out-of-range Pstate if the core is in Pstate-0
127 * an "out-of-range Pstate if the core is in Pstate-0 128 */
128 */ 129 if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
129 if (i >= data->numps) 130 data->currpstate = HW_PSTATE_0;
130 data->currpstate = HW_PSTATE_0; 131
131 else
132 data->currpstate = i;
133 }
134 return 0; 132 return 0;
135 } 133 }
136 do { 134 do {
@@ -510,41 +508,34 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
510 return 0; 508 return 0;
511} 509}
512 510
513static int check_supported_cpu(unsigned int cpu) 511static void check_supported_cpu(void *_rc)
514{ 512{
515 cpumask_t oldmask;
516 u32 eax, ebx, ecx, edx; 513 u32 eax, ebx, ecx, edx;
517 unsigned int rc = 0; 514 int *rc = _rc;
518
519 oldmask = current->cpus_allowed;
520 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
521 515
522 if (smp_processor_id() != cpu) { 516 *rc = -ENODEV;
523 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
524 goto out;
525 }
526 517
527 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) 518 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
528 goto out; 519 return;
529 520
530 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); 521 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
531 if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && 522 if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
532 ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) 523 ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
533 goto out; 524 return;
534 525
535 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { 526 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
536 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || 527 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
537 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { 528 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
538 printk(KERN_INFO PFX 529 printk(KERN_INFO PFX
539 "Processor cpuid %x not supported\n", eax); 530 "Processor cpuid %x not supported\n", eax);
540 goto out; 531 return;
541 } 532 }
542 533
543 eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); 534 eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
544 if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { 535 if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
545 printk(KERN_INFO PFX 536 printk(KERN_INFO PFX
546 "No frequency change capabilities detected\n"); 537 "No frequency change capabilities detected\n");
547 goto out; 538 return;
548 } 539 }
549 540
550 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); 541 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
@@ -552,21 +543,17 @@ static int check_supported_cpu(unsigned int cpu)
552 != P_STATE_TRANSITION_CAPABLE) { 543 != P_STATE_TRANSITION_CAPABLE) {
553 printk(KERN_INFO PFX 544 printk(KERN_INFO PFX
554 "Power state transitions not supported\n"); 545 "Power state transitions not supported\n");
555 goto out; 546 return;
556 } 547 }
557 } else { /* must be a HW Pstate capable processor */ 548 } else { /* must be a HW Pstate capable processor */
558 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); 549 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
559 if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) 550 if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
560 cpu_family = CPU_HW_PSTATE; 551 cpu_family = CPU_HW_PSTATE;
561 else 552 else
562 goto out; 553 return;
563 } 554 }
564 555
565 rc = 1; 556 *rc = 0;
566
567out:
568 set_cpus_allowed_ptr(current, &oldmask);
569 return rc;
570} 557}
571 558
572static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, 559static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
@@ -823,13 +810,14 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
823 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 810 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
824 return; 811 return;
825 812
826 control = data->acpi_data.states[index].control; data->irt = (control 813 control = data->acpi_data.states[index].control;
827 >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> 814 data->irt = (control >> IRT_SHIFT) & IRT_MASK;
828 RVO_SHIFT) & RVO_MASK; data->exttype = (control 815 data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
829 >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; 816 data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
830 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 817 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
831 << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = 818 data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
832 (control >> VST_SHIFT) & VST_MASK; } 819 data->vstable = (control >> VST_SHIFT) & VST_MASK;
820}
833 821
834static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 822static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
835{ 823{
@@ -1046,6 +1034,19 @@ static int get_transition_latency(struct powernow_k8_data *data)
1046 if (cur_latency > max_latency) 1034 if (cur_latency > max_latency)
1047 max_latency = cur_latency; 1035 max_latency = cur_latency;
1048 } 1036 }
1037 if (max_latency == 0) {
1038 /*
1039 * Fam 11h always returns 0 as transition latency.
1040 * This is intended and means "very fast". While cpufreq core
1041 * and governors currently can handle that gracefully, better
1042 * set it to 1 to avoid problems in the future.
1043 * For all others it's a BIOS bug.
1044 */
1045 if (!boot_cpu_data.x86 == 0x11)
1046 printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
1047 "latency\n");
1048 max_latency = 1;
1049 }
1049 /* value in usecs, needs to be in nanoseconds */ 1050 /* value in usecs, needs to be in nanoseconds */
1050 return 1000 * max_latency; 1051 return 1000 * max_latency;
1051} 1052}
@@ -1093,7 +1094,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
1093 freqs.old = find_khz_freq_from_fid(data->currfid); 1094 freqs.old = find_khz_freq_from_fid(data->currfid);
1094 freqs.new = find_khz_freq_from_fid(fid); 1095 freqs.new = find_khz_freq_from_fid(fid);
1095 1096
1096 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1097 for_each_cpu(i, data->available_cores) {
1097 freqs.cpu = i; 1098 freqs.cpu = i;
1098 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1099 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1099 } 1100 }
@@ -1101,7 +1102,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
1101 res = transition_fid_vid(data, fid, vid); 1102 res = transition_fid_vid(data, fid, vid);
1102 freqs.new = find_khz_freq_from_fid(data->currfid); 1103 freqs.new = find_khz_freq_from_fid(data->currfid);
1103 1104
1104 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1105 for_each_cpu(i, data->available_cores) {
1105 freqs.cpu = i; 1106 freqs.cpu = i;
1106 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1107 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1107 } 1108 }
@@ -1126,7 +1127,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
1126 data->currpstate); 1127 data->currpstate);
1127 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1128 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1128 1129
1129 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1130 for_each_cpu(i, data->available_cores) {
1130 freqs.cpu = i; 1131 freqs.cpu = i;
1131 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1132 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1132 } 1133 }
@@ -1134,7 +1135,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
1134 res = transition_pstate(data, pstate); 1135 res = transition_pstate(data, pstate);
1135 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1136 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1136 1137
1137 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1138 for_each_cpu(i, data->available_cores) {
1138 freqs.cpu = i; 1139 freqs.cpu = i;
1139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1140 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1140 } 1141 }
@@ -1235,21 +1236,47 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
1235 return cpufreq_frequency_table_verify(pol, data->powernow_table); 1236 return cpufreq_frequency_table_verify(pol, data->powernow_table);
1236} 1237}
1237 1238
1238static const char ACPI_PSS_BIOS_BUG_MSG[] = 1239struct init_on_cpu {
1239 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" 1240 struct powernow_k8_data *data;
1240 KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; 1241 int rc;
1242};
1243
1244static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
1245{
1246 struct init_on_cpu *init_on_cpu = _init_on_cpu;
1247
1248 if (pending_bit_stuck()) {
1249 printk(KERN_ERR PFX "failing init, change pending bit set\n");
1250 init_on_cpu->rc = -ENODEV;
1251 return;
1252 }
1253
1254 if (query_current_values_with_pending_wait(init_on_cpu->data)) {
1255 init_on_cpu->rc = -ENODEV;
1256 return;
1257 }
1258
1259 if (cpu_family == CPU_OPTERON)
1260 fidvid_msr_init();
1261
1262 init_on_cpu->rc = 0;
1263}
1241 1264
1242/* per CPU init entry point to the driver */ 1265/* per CPU init entry point to the driver */
1243static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) 1266static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1244{ 1267{
1268 static const char ACPI_PSS_BIOS_BUG_MSG[] =
1269 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
1270 KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n";
1245 struct powernow_k8_data *data; 1271 struct powernow_k8_data *data;
1246 cpumask_t oldmask; 1272 struct init_on_cpu init_on_cpu;
1247 int rc; 1273 int rc;
1248 1274
1249 if (!cpu_online(pol->cpu)) 1275 if (!cpu_online(pol->cpu))
1250 return -ENODEV; 1276 return -ENODEV;
1251 1277
1252 if (!check_supported_cpu(pol->cpu)) 1278 smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
1279 if (rc)
1253 return -ENODEV; 1280 return -ENODEV;
1254 1281
1255 data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); 1282 data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
@@ -1289,27 +1316,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1289 pol->cpuinfo.transition_latency = get_transition_latency(data); 1316 pol->cpuinfo.transition_latency = get_transition_latency(data);
1290 1317
1291 /* only run on specific CPU from here on */ 1318 /* only run on specific CPU from here on */
1292 oldmask = current->cpus_allowed; 1319 init_on_cpu.data = data;
1293 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); 1320 smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
1294 1321 &init_on_cpu, 1);
1295 if (smp_processor_id() != pol->cpu) { 1322 rc = init_on_cpu.rc;
1296 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1323 if (rc != 0)
1297 goto err_out_unmask; 1324 goto err_out_exit_acpi;
1298 }
1299
1300 if (pending_bit_stuck()) {
1301 printk(KERN_ERR PFX "failing init, change pending bit set\n");
1302 goto err_out_unmask;
1303 }
1304
1305 if (query_current_values_with_pending_wait(data))
1306 goto err_out_unmask;
1307
1308 if (cpu_family == CPU_OPTERON)
1309 fidvid_msr_init();
1310
1311 /* run on any CPU again */
1312 set_cpus_allowed_ptr(current, &oldmask);
1313 1325
1314 if (cpu_family == CPU_HW_PSTATE) 1326 if (cpu_family == CPU_HW_PSTATE)
1315 cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); 1327 cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
@@ -1346,8 +1358,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1346 1358
1347 return 0; 1359 return 0;
1348 1360
1349err_out_unmask: 1361err_out_exit_acpi:
1350 set_cpus_allowed_ptr(current, &oldmask);
1351 powernow_k8_cpu_exit_acpi(data); 1362 powernow_k8_cpu_exit_acpi(data);
1352 1363
1353err_out: 1364err_out:
@@ -1372,28 +1383,25 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1372 return 0; 1383 return 0;
1373} 1384}
1374 1385
1386static void query_values_on_cpu(void *_err)
1387{
1388 int *err = _err;
1389 struct powernow_k8_data *data = __get_cpu_var(powernow_data);
1390
1391 *err = query_current_values_with_pending_wait(data);
1392}
1393
1375static unsigned int powernowk8_get(unsigned int cpu) 1394static unsigned int powernowk8_get(unsigned int cpu)
1376{ 1395{
1377 struct powernow_k8_data *data; 1396 struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
1378 cpumask_t oldmask = current->cpus_allowed;
1379 unsigned int khz = 0; 1397 unsigned int khz = 0;
1380 unsigned int first; 1398 int err;
1381
1382 first = cpumask_first(cpu_core_mask(cpu));
1383 data = per_cpu(powernow_data, first);
1384 1399
1385 if (!data) 1400 if (!data)
1386 return -EINVAL; 1401 return -EINVAL;
1387 1402
1388 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 1403 smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1389 if (smp_processor_id() != cpu) { 1404 if (err)
1390 printk(KERN_ERR PFX
1391 "limiting to CPU %d failed in powernowk8_get\n", cpu);
1392 set_cpus_allowed_ptr(current, &oldmask);
1393 return 0;
1394 }
1395
1396 if (query_current_values_with_pending_wait(data))
1397 goto out; 1405 goto out;
1398 1406
1399 if (cpu_family == CPU_HW_PSTATE) 1407 if (cpu_family == CPU_HW_PSTATE)
@@ -1404,7 +1412,6 @@ static unsigned int powernowk8_get(unsigned int cpu)
1404 1412
1405 1413
1406out: 1414out:
1407 set_cpus_allowed_ptr(current, &oldmask);
1408 return khz; 1415 return khz;
1409} 1416}
1410 1417
@@ -1430,7 +1437,9 @@ static int __cpuinit powernowk8_init(void)
1430 unsigned int i, supported_cpus = 0; 1437 unsigned int i, supported_cpus = 0;
1431 1438
1432 for_each_online_cpu(i) { 1439 for_each_online_cpu(i) {
1433 if (check_supported_cpu(i)) 1440 int rc;
1441 smp_call_function_single(i, check_supported_cpu, &rc, 1);
1442 if (rc == 0)
1434 supported_cpus++; 1443 supported_cpus++;
1435 } 1444 }
1436 1445
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index 6c6698feade1..c9c1190b5e1f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -223,14 +223,3 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
223 223
224static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); 224static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
225static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); 225static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
226
227#ifdef CONFIG_SMP
228static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
229{
230}
231#else
232static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
233{
234 cpu_set(0, cpu_sharedcore_mask[0]);
235}
236#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 55c831ed71ce..8d672ef162ce 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -323,14 +323,8 @@ static unsigned int get_cur_freq(unsigned int cpu)
323{ 323{
324 unsigned l, h; 324 unsigned l, h;
325 unsigned clock_freq; 325 unsigned clock_freq;
326 cpumask_t saved_mask;
327 326
328 saved_mask = current->cpus_allowed; 327 rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h);
329 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
330 if (smp_processor_id() != cpu)
331 return 0;
332
333 rdmsr(MSR_IA32_PERF_STATUS, l, h);
334 clock_freq = extract_clock(l, cpu, 0); 328 clock_freq = extract_clock(l, cpu, 0);
335 329
336 if (unlikely(clock_freq == 0)) { 330 if (unlikely(clock_freq == 0)) {
@@ -340,11 +334,9 @@ static unsigned int get_cur_freq(unsigned int cpu)
340 * P-state transition (like TM2). Get the last freq set 334 * P-state transition (like TM2). Get the last freq set
341 * in PERF_CTL. 335 * in PERF_CTL.
342 */ 336 */
343 rdmsr(MSR_IA32_PERF_CTL, l, h); 337 rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h);
344 clock_freq = extract_clock(l, cpu, 1); 338 clock_freq = extract_clock(l, cpu, 1);
345 } 339 }
346
347 set_cpus_allowed_ptr(current, &saved_mask);
348 return clock_freq; 340 return clock_freq;
349} 341}
350 342
@@ -467,15 +459,10 @@ static int centrino_target (struct cpufreq_policy *policy,
467 struct cpufreq_freqs freqs; 459 struct cpufreq_freqs freqs;
468 int retval = 0; 460 int retval = 0;
469 unsigned int j, k, first_cpu, tmp; 461 unsigned int j, k, first_cpu, tmp;
470 cpumask_var_t saved_mask, covered_cpus; 462 cpumask_var_t covered_cpus;
471 463
472 if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) 464 if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)))
473 return -ENOMEM;
474 if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
475 free_cpumask_var(saved_mask);
476 return -ENOMEM; 465 return -ENOMEM;
477 }
478 cpumask_copy(saved_mask, &current->cpus_allowed);
479 466
480 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { 467 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
481 retval = -ENODEV; 468 retval = -ENODEV;
@@ -493,7 +480,7 @@ static int centrino_target (struct cpufreq_policy *policy,
493 480
494 first_cpu = 1; 481 first_cpu = 1;
495 for_each_cpu(j, policy->cpus) { 482 for_each_cpu(j, policy->cpus) {
496 const struct cpumask *mask; 483 int good_cpu;
497 484
498 /* cpufreq holds the hotplug lock, so we are safe here */ 485 /* cpufreq holds the hotplug lock, so we are safe here */
499 if (!cpu_online(j)) 486 if (!cpu_online(j))
@@ -504,32 +491,30 @@ static int centrino_target (struct cpufreq_policy *policy,
504 * Make sure we are running on CPU that wants to change freq 491 * Make sure we are running on CPU that wants to change freq
505 */ 492 */
506 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) 493 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
507 mask = policy->cpus; 494 good_cpu = cpumask_any_and(policy->cpus,
495 cpu_online_mask);
508 else 496 else
509 mask = cpumask_of(j); 497 good_cpu = j;
510 498
511 set_cpus_allowed_ptr(current, mask); 499 if (good_cpu >= nr_cpu_ids) {
512 preempt_disable();
513 if (unlikely(!cpu_isset(smp_processor_id(), *mask))) {
514 dprintk("couldn't limit to CPUs in this domain\n"); 500 dprintk("couldn't limit to CPUs in this domain\n");
515 retval = -EAGAIN; 501 retval = -EAGAIN;
516 if (first_cpu) { 502 if (first_cpu) {
517 /* We haven't started the transition yet. */ 503 /* We haven't started the transition yet. */
518 goto migrate_end; 504 goto out;
519 } 505 }
520 preempt_enable();
521 break; 506 break;
522 } 507 }
523 508
524 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; 509 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
525 510
526 if (first_cpu) { 511 if (first_cpu) {
527 rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); 512 rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
528 if (msr == (oldmsr & 0xffff)) { 513 if (msr == (oldmsr & 0xffff)) {
529 dprintk("no change needed - msr was and needs " 514 dprintk("no change needed - msr was and needs "
530 "to be %x\n", oldmsr); 515 "to be %x\n", oldmsr);
531 retval = 0; 516 retval = 0;
532 goto migrate_end; 517 goto out;
533 } 518 }
534 519
535 freqs.old = extract_clock(oldmsr, cpu, 0); 520 freqs.old = extract_clock(oldmsr, cpu, 0);
@@ -553,14 +538,11 @@ static int centrino_target (struct cpufreq_policy *policy,
553 oldmsr |= msr; 538 oldmsr |= msr;
554 } 539 }
555 540
556 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); 541 wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h);
557 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { 542 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
558 preempt_enable();
559 break; 543 break;
560 }
561 544
562 cpu_set(j, *covered_cpus); 545 cpumask_set_cpu(j, covered_cpus);
563 preempt_enable();
564 } 546 }
565 547
566 for_each_cpu(k, policy->cpus) { 548 for_each_cpu(k, policy->cpus) {
@@ -578,10 +560,8 @@ static int centrino_target (struct cpufreq_policy *policy,
578 * Best effort undo.. 560 * Best effort undo..
579 */ 561 */
580 562
581 for_each_cpu_mask_nr(j, *covered_cpus) { 563 for_each_cpu(j, covered_cpus)
582 set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); 564 wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h);
583 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
584 }
585 565
586 tmp = freqs.new; 566 tmp = freqs.new;
587 freqs.new = freqs.old; 567 freqs.new = freqs.old;
@@ -593,15 +573,9 @@ static int centrino_target (struct cpufreq_policy *policy,
593 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 573 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
594 } 574 }
595 } 575 }
596 set_cpus_allowed_ptr(current, saved_mask);
597 retval = 0; 576 retval = 0;
598 goto out;
599 577
600migrate_end:
601 preempt_enable();
602 set_cpus_allowed_ptr(current, saved_mask);
603out: 578out:
604 free_cpumask_var(saved_mask);
605 free_cpumask_var(covered_cpus); 579 free_cpumask_var(covered_cpus);
606 return retval; 580 return retval;
607} 581}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 016c1a4fa3fc..6911e91fb4f6 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -89,7 +89,8 @@ static int speedstep_find_register(void)
89 * speedstep_set_state - set the SpeedStep state 89 * speedstep_set_state - set the SpeedStep state
90 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) 90 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
91 * 91 *
92 * Tries to change the SpeedStep state. 92 * Tries to change the SpeedStep state. Can be called from
93 * smp_call_function_single.
93 */ 94 */
94static void speedstep_set_state(unsigned int state) 95static void speedstep_set_state(unsigned int state)
95{ 96{
@@ -143,6 +144,11 @@ static void speedstep_set_state(unsigned int state)
143 return; 144 return;
144} 145}
145 146
147/* Wrapper for smp_call_function_single. */
148static void _speedstep_set_state(void *_state)
149{
150 speedstep_set_state(*(unsigned int *)_state);
151}
146 152
147/** 153/**
148 * speedstep_activate - activate SpeedStep control in the chipset 154 * speedstep_activate - activate SpeedStep control in the chipset
@@ -226,22 +232,28 @@ static unsigned int speedstep_detect_chipset(void)
226 return 0; 232 return 0;
227} 233}
228 234
229static unsigned int _speedstep_get(const struct cpumask *cpus) 235struct get_freq_data {
230{
231 unsigned int speed; 236 unsigned int speed;
232 cpumask_t cpus_allowed; 237 unsigned int processor;
233 238};
234 cpus_allowed = current->cpus_allowed; 239
235 set_cpus_allowed_ptr(current, cpus); 240static void get_freq_data(void *_data)
236 speed = speedstep_get_frequency(speedstep_processor); 241{
237 set_cpus_allowed_ptr(current, &cpus_allowed); 242 struct get_freq_data *data = _data;
238 dprintk("detected %u kHz as current frequency\n", speed); 243
239 return speed; 244 data->speed = speedstep_get_frequency(data->processor);
240} 245}
241 246
242static unsigned int speedstep_get(unsigned int cpu) 247static unsigned int speedstep_get(unsigned int cpu)
243{ 248{
244 return _speedstep_get(cpumask_of(cpu)); 249 struct get_freq_data data = { .processor = cpu };
250
251 /* You're supposed to ensure CPU is online. */
252 if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0)
253 BUG();
254
255 dprintk("detected %u kHz as current frequency\n", data.speed);
256 return data.speed;
245} 257}
246 258
247/** 259/**
@@ -257,16 +269,16 @@ static int speedstep_target(struct cpufreq_policy *policy,
257 unsigned int target_freq, 269 unsigned int target_freq,
258 unsigned int relation) 270 unsigned int relation)
259{ 271{
260 unsigned int newstate = 0; 272 unsigned int newstate = 0, policy_cpu;
261 struct cpufreq_freqs freqs; 273 struct cpufreq_freqs freqs;
262 cpumask_t cpus_allowed;
263 int i; 274 int i;
264 275
265 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], 276 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
266 target_freq, relation, &newstate)) 277 target_freq, relation, &newstate))
267 return -EINVAL; 278 return -EINVAL;
268 279
269 freqs.old = _speedstep_get(policy->cpus); 280 policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
281 freqs.old = speedstep_get(policy_cpu);
270 freqs.new = speedstep_freqs[newstate].frequency; 282 freqs.new = speedstep_freqs[newstate].frequency;
271 freqs.cpu = policy->cpu; 283 freqs.cpu = policy->cpu;
272 284
@@ -276,20 +288,13 @@ static int speedstep_target(struct cpufreq_policy *policy,
276 if (freqs.old == freqs.new) 288 if (freqs.old == freqs.new)
277 return 0; 289 return 0;
278 290
279 cpus_allowed = current->cpus_allowed;
280
281 for_each_cpu(i, policy->cpus) { 291 for_each_cpu(i, policy->cpus) {
282 freqs.cpu = i; 292 freqs.cpu = i;
283 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 293 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
284 } 294 }
285 295
286 /* switch to physical CPU where state is to be changed */ 296 smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate,
287 set_cpus_allowed_ptr(current, policy->cpus); 297 true);
288
289 speedstep_set_state(newstate);
290
291 /* allow to be run on all CPUs */
292 set_cpus_allowed_ptr(current, &cpus_allowed);
293 298
294 for_each_cpu(i, policy->cpus) { 299 for_each_cpu(i, policy->cpus) {
295 freqs.cpu = i; 300 freqs.cpu = i;
@@ -312,33 +317,43 @@ static int speedstep_verify(struct cpufreq_policy *policy)
312 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); 317 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
313} 318}
314 319
320struct get_freqs {
321 struct cpufreq_policy *policy;
322 int ret;
323};
324
325static void get_freqs_on_cpu(void *_get_freqs)
326{
327 struct get_freqs *get_freqs = _get_freqs;
328
329 get_freqs->ret =
330 speedstep_get_freqs(speedstep_processor,
331 &speedstep_freqs[SPEEDSTEP_LOW].frequency,
332 &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
333 &get_freqs->policy->cpuinfo.transition_latency,
334 &speedstep_set_state);
335}
315 336
316static int speedstep_cpu_init(struct cpufreq_policy *policy) 337static int speedstep_cpu_init(struct cpufreq_policy *policy)
317{ 338{
318 int result = 0; 339 int result;
319 unsigned int speed; 340 unsigned int policy_cpu, speed;
320 cpumask_t cpus_allowed; 341 struct get_freqs gf;
321 342
322 /* only run on CPU to be set, or on its sibling */ 343 /* only run on CPU to be set, or on its sibling */
323#ifdef CONFIG_SMP 344#ifdef CONFIG_SMP
324 cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); 345 cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
325#endif 346#endif
326 347 policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
327 cpus_allowed = current->cpus_allowed;
328 set_cpus_allowed_ptr(current, policy->cpus);
329 348
330 /* detect low and high frequency and transition latency */ 349 /* detect low and high frequency and transition latency */
331 result = speedstep_get_freqs(speedstep_processor, 350 gf.policy = policy;
332 &speedstep_freqs[SPEEDSTEP_LOW].frequency, 351 smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1);
333 &speedstep_freqs[SPEEDSTEP_HIGH].frequency, 352 if (gf.ret)
334 &policy->cpuinfo.transition_latency, 353 return gf.ret;
335 &speedstep_set_state);
336 set_cpus_allowed_ptr(current, &cpus_allowed);
337 if (result)
338 return result;
339 354
340 /* get current speed setting */ 355 /* get current speed setting */
341 speed = _speedstep_get(policy->cpus); 356 speed = speedstep_get(policy_cpu);
342 if (!speed) 357 if (!speed)
343 return -EIO; 358 return -EIO;
344 359
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 2e3c6862657b..f4c290b8482f 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -226,6 +226,7 @@ static unsigned int pentium4_get_frequency(void)
226} 226}
227 227
228 228
229/* Warning: may get called from smp_call_function_single. */
229unsigned int speedstep_get_frequency(unsigned int processor) 230unsigned int speedstep_get_frequency(unsigned int processor)
230{ 231{
231 switch (processor) { 232 switch (processor) {
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index daed39ba2614..3260ab044996 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -86,6 +86,29 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
86 */ 86 */
87 if (c->x86 == 6 && c->x86_model < 15) 87 if (c->x86 == 6 && c->x86_model < 15)
88 clear_cpu_cap(c, X86_FEATURE_PAT); 88 clear_cpu_cap(c, X86_FEATURE_PAT);
89
90#ifdef CONFIG_KMEMCHECK
91 /*
92 * P4s have a "fast strings" feature which causes single-
93 * stepping REP instructions to only generate a #DB on
94 * cache-line boundaries.
95 *
96 * Ingo Molnar reported a Pentium D (model 6) and a Xeon
97 * (model 2) with the same problem.
98 */
99 if (c->x86 == 15) {
100 u64 misc_enable;
101
102 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
103
104 if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
105 printk(KERN_INFO "kmemcheck: Disabling fast string operations\n");
106
107 misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
108 wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
109 }
110 }
111#endif
89} 112}
90 113
91#ifdef CONFIG_X86_32 114#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 2ac1f0c2beb3..b07af8861244 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -182,6 +182,11 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
182 .notifier_call = cpuid_class_cpu_callback, 182 .notifier_call = cpuid_class_cpu_callback,
183}; 183};
184 184
185static char *cpuid_nodename(struct device *dev)
186{
187 return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
188}
189
185static int __init cpuid_init(void) 190static int __init cpuid_init(void)
186{ 191{
187 int i, err = 0; 192 int i, err = 0;
@@ -198,6 +203,7 @@ static int __init cpuid_init(void)
198 err = PTR_ERR(cpuid_class); 203 err = PTR_ERR(cpuid_class);
199 goto out_chrdev; 204 goto out_chrdev;
200 } 205 }
206 cpuid_class->nodename = cpuid_nodename;
201 for_each_online_cpu(i) { 207 for_each_online_cpu(i) {
202 err = cpuid_device_create(i); 208 err = cpuid_device_create(i);
203 if (err != 0) 209 if (err != 0)
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c2e0bb0890d4..5cf36c053ac4 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -7,6 +7,7 @@
7#include <linux/spinlock.h> 7#include <linux/spinlock.h>
8#include <linux/jiffies.h> 8#include <linux/jiffies.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/timex.h>
10#include <linux/delay.h> 11#include <linux/delay.h>
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/io.h> 13#include <linux/io.h>
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index df3bf269beab..270ff83efc11 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -12,7 +12,6 @@
12 12
13static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 13static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
15struct mm_struct init_mm = INIT_MM(init_mm);
16 15
17/* 16/*
18 * Initial thread structure. 17 * Initial thread structure.
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9c4461501fcb..9371448290ac 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -236,6 +236,7 @@ static const struct file_operations microcode_fops = {
236static struct miscdevice microcode_dev = { 236static struct miscdevice microcode_dev = {
237 .minor = MICROCODE_MINOR, 237 .minor = MICROCODE_MINOR,
238 .name = "microcode", 238 .name = "microcode",
239 .devnode = "cpu/microcode",
239 .fops = &microcode_fops, 240 .fops = &microcode_fops,
240}; 241};
241 242
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 3cf3413ec626..98fd6cd4e3a4 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -196,6 +196,11 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
196 .notifier_call = msr_class_cpu_callback, 196 .notifier_call = msr_class_cpu_callback,
197}; 197};
198 198
199static char *msr_nodename(struct device *dev)
200{
201 return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
202}
203
199static int __init msr_init(void) 204static int __init msr_init(void)
200{ 205{
201 int i, err = 0; 206 int i, err = 0;
@@ -212,6 +217,7 @@ static int __init msr_init(void)
212 err = PTR_ERR(msr_class); 217 err = PTR_ERR(msr_class);
213 goto out_chrdev; 218 goto out_chrdev;
214 } 219 }
220 msr_class->nodename = msr_nodename;
215 for_each_online_cpu(i) { 221 for_each_online_cpu(i) {
216 err = msr_device_create(i); 222 err = msr_device_create(i);
217 if (err != 0) 223 if (err != 0)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3bb2be1649bd..994dd6a4a2a0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -63,7 +63,7 @@ void arch_task_cache_init(void)
63 task_xstate_cachep = 63 task_xstate_cachep =
64 kmem_cache_create("task_xstate", xstate_size, 64 kmem_cache_create("task_xstate", xstate_size,
65 __alignof__(union thread_xstate), 65 __alignof__(union thread_xstate),
66 SLAB_PANIC, NULL); 66 SLAB_PANIC | SLAB_NOTRACK, NULL);
67} 67}
68 68
69/* 69/*
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4aaf7e48394f..c3eb207181fe 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -77,6 +77,13 @@ void save_stack_trace(struct stack_trace *trace)
77} 77}
78EXPORT_SYMBOL_GPL(save_stack_trace); 78EXPORT_SYMBOL_GPL(save_stack_trace);
79 79
80void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
81{
82 dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
83 if (trace->nr_entries < trace->max_entries)
84 trace->entries[trace->nr_entries++] = ULONG_MAX;
85}
86
80void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 87void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
81{ 88{
82 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 89 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1e1e27b7d438..5f935f0d5861 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -45,6 +45,7 @@
45#include <linux/edac.h> 45#include <linux/edac.h>
46#endif 46#endif
47 47
48#include <asm/kmemcheck.h>
48#include <asm/stacktrace.h> 49#include <asm/stacktrace.h>
49#include <asm/processor.h> 50#include <asm/processor.h>
50#include <asm/debugreg.h> 51#include <asm/debugreg.h>
@@ -534,6 +535,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
534 535
535 get_debugreg(condition, 6); 536 get_debugreg(condition, 6);
536 537
538 /* Catch kmemcheck conditions first of all! */
539 if (condition & DR_STEP && kmemcheck_trap(regs))
540 return;
541
537 /* 542 /*
538 * The processor cleared BTF, so don't mark that we need it set. 543 * The processor cleared BTF, so don't mark that we need it set.
539 */ 544 */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3e1c057e98fe..b0597ad02c93 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -9,6 +9,7 @@
9#include <linux/delay.h> 9#include <linux/delay.h>
10#include <linux/clocksource.h> 10#include <linux/clocksource.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/timex.h>
12 13
13#include <asm/hpet.h> 14#include <asm/hpet.h>
14#include <asm/timer.h> 15#include <asm/timer.h>
@@ -631,17 +632,15 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
631 void *data) 632 void *data)
632{ 633{
633 struct cpufreq_freqs *freq = data; 634 struct cpufreq_freqs *freq = data;
634 unsigned long *lpj, dummy; 635 unsigned long *lpj;
635 636
636 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) 637 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
637 return 0; 638 return 0;
638 639
639 lpj = &dummy; 640 lpj = &boot_cpu_data.loops_per_jiffy;
640 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
641#ifdef CONFIG_SMP 641#ifdef CONFIG_SMP
642 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
642 lpj = &cpu_data(freq->cpu).loops_per_jiffy; 643 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
643#else
644 lpj = &boot_cpu_data.loops_per_jiffy;
645#endif 644#endif
646 645
647 if (!ref_freq) { 646 if (!ref_freq) {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 32d6ae8fb60e..e770bf349ec4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1277,7 +1277,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
1277 struct page *pages; 1277 struct page *pages;
1278 struct vmcs *vmcs; 1278 struct vmcs *vmcs;
1279 1279
1280 pages = alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); 1280 pages = alloc_pages_exact_node(node, GFP_KERNEL, vmcs_config.order);
1281 if (!pages) 1281 if (!pages)
1282 return NULL; 1282 return NULL;
1283 vmcs = page_address(pages); 1283 vmcs = page_address(pages);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fdd30d08ab52..eefdeee8a871 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,6 +10,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
10 10
11obj-$(CONFIG_HIGHMEM) += highmem_32.o 11obj-$(CONFIG_HIGHMEM) += highmem_32.o
12 12
13obj-$(CONFIG_KMEMCHECK) += kmemcheck/
14
13obj-$(CONFIG_MMIOTRACE) += mmiotrace.o 15obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
14mmiotrace-y := kmmio.o pf_in.o mmio-mod.o 16mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
15obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 17obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c6acc6326374..baa0e86adfbc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -14,6 +14,7 @@
14 14
15#include <asm/traps.h> /* dotraplinkage, ... */ 15#include <asm/traps.h> /* dotraplinkage, ... */
16#include <asm/pgalloc.h> /* pgd_*(), ... */ 16#include <asm/pgalloc.h> /* pgd_*(), ... */
17#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
17 18
18/* 19/*
19 * Page fault error code bits: 20 * Page fault error code bits:
@@ -956,6 +957,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
956 /* Get the faulting address: */ 957 /* Get the faulting address: */
957 address = read_cr2(); 958 address = read_cr2();
958 959
960 /*
961 * Detect and handle instructions that would cause a page fault for
962 * both a tracked kernel page and a userspace page.
963 */
964 if (kmemcheck_active(regs))
965 kmemcheck_hide(regs);
966
959 if (unlikely(kmmio_fault(regs, address))) 967 if (unlikely(kmmio_fault(regs, address)))
960 return; 968 return;
961 969
@@ -973,9 +981,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
973 * protection error (error_code & 9) == 0. 981 * protection error (error_code & 9) == 0.
974 */ 982 */
975 if (unlikely(fault_in_kernel_space(address))) { 983 if (unlikely(fault_in_kernel_space(address))) {
976 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && 984 if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
977 vmalloc_fault(address) >= 0) 985 if (vmalloc_fault(address) >= 0)
978 return; 986 return;
987
988 if (kmemcheck_fault(regs, address, error_code))
989 return;
990 }
979 991
980 /* Can handle a stale RO->RW TLB: */ 992 /* Can handle a stale RO->RW TLB: */
981 if (spurious_fault(error_code, address)) 993 if (spurious_fault(error_code, address))
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 34c1bfb64f1c..f53b57e4086f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -213,7 +213,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
213 if (!after_bootmem) 213 if (!after_bootmem)
214 init_gbpages(); 214 init_gbpages();
215 215
216#ifdef CONFIG_DEBUG_PAGEALLOC 216#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
217 /* 217 /*
218 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. 218 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
219 * This will simplify cpa(), which otherwise needs to support splitting 219 * This will simplify cpa(), which otherwise needs to support splitting
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9ff3c0816d15..3cd7711bb949 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -111,7 +111,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
111 pte_t *page_table = NULL; 111 pte_t *page_table = NULL;
112 112
113 if (after_bootmem) { 113 if (after_bootmem) {
114#ifdef CONFIG_DEBUG_PAGEALLOC 114#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
115 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); 115 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
116#endif 116#endif
117 if (!page_table) 117 if (!page_table)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 52bb9519bb86..9c543290a813 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -104,7 +104,7 @@ static __ref void *spp_getpage(void)
104 void *ptr; 104 void *ptr;
105 105
106 if (after_bootmem) 106 if (after_bootmem)
107 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 107 ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
108 else 108 else
109 ptr = alloc_bootmem_pages(PAGE_SIZE); 109 ptr = alloc_bootmem_pages(PAGE_SIZE);
110 110
@@ -281,7 +281,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
281 void *adr; 281 void *adr;
282 282
283 if (after_bootmem) { 283 if (after_bootmem) {
284 adr = (void *)get_zeroed_page(GFP_ATOMIC); 284 adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
285 *phys = __pa(adr); 285 *phys = __pa(adr);
286 286
287 return adr; 287 return adr;
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile
new file mode 100644
index 000000000000..520b3bce4095
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/Makefile
@@ -0,0 +1 @@
obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
new file mode 100644
index 000000000000..4901d0dafda6
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -0,0 +1,228 @@
1#include <linux/interrupt.h>
2#include <linux/kdebug.h>
3#include <linux/kmemcheck.h>
4#include <linux/kernel.h>
5#include <linux/types.h>
6#include <linux/ptrace.h>
7#include <linux/stacktrace.h>
8#include <linux/string.h>
9
10#include "error.h"
11#include "shadow.h"
12
13enum kmemcheck_error_type {
14 KMEMCHECK_ERROR_INVALID_ACCESS,
15 KMEMCHECK_ERROR_BUG,
16};
17
18#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT)
19
20struct kmemcheck_error {
21 enum kmemcheck_error_type type;
22
23 union {
24 /* KMEMCHECK_ERROR_INVALID_ACCESS */
25 struct {
26 /* Kind of access that caused the error */
27 enum kmemcheck_shadow state;
28 /* Address and size of the erroneous read */
29 unsigned long address;
30 unsigned int size;
31 };
32 };
33
34 struct pt_regs regs;
35 struct stack_trace trace;
36 unsigned long trace_entries[32];
37
38 /* We compress it to a char. */
39 unsigned char shadow_copy[SHADOW_COPY_SIZE];
40 unsigned char memory_copy[SHADOW_COPY_SIZE];
41};
42
43/*
44 * Create a ring queue of errors to output. We can't call printk() directly
45 * from the kmemcheck traps, since this may call the console drivers and
46 * result in a recursive fault.
47 */
48static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE];
49static unsigned int error_count;
50static unsigned int error_rd;
51static unsigned int error_wr;
52static unsigned int error_missed_count;
53
54static struct kmemcheck_error *error_next_wr(void)
55{
56 struct kmemcheck_error *e;
57
58 if (error_count == ARRAY_SIZE(error_fifo)) {
59 ++error_missed_count;
60 return NULL;
61 }
62
63 e = &error_fifo[error_wr];
64 if (++error_wr == ARRAY_SIZE(error_fifo))
65 error_wr = 0;
66 ++error_count;
67 return e;
68}
69
70static struct kmemcheck_error *error_next_rd(void)
71{
72 struct kmemcheck_error *e;
73
74 if (error_count == 0)
75 return NULL;
76
77 e = &error_fifo[error_rd];
78 if (++error_rd == ARRAY_SIZE(error_fifo))
79 error_rd = 0;
80 --error_count;
81 return e;
82}
83
84void kmemcheck_error_recall(void)
85{
86 static const char *desc[] = {
87 [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated",
88 [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized",
89 [KMEMCHECK_SHADOW_INITIALIZED] = "initialized",
90 [KMEMCHECK_SHADOW_FREED] = "freed",
91 };
92
93 static const char short_desc[] = {
94 [KMEMCHECK_SHADOW_UNALLOCATED] = 'a',
95 [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u',
96 [KMEMCHECK_SHADOW_INITIALIZED] = 'i',
97 [KMEMCHECK_SHADOW_FREED] = 'f',
98 };
99
100 struct kmemcheck_error *e;
101 unsigned int i;
102
103 e = error_next_rd();
104 if (!e)
105 return;
106
107 switch (e->type) {
108 case KMEMCHECK_ERROR_INVALID_ACCESS:
109 printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read "
110 "from %s memory (%p)\n",
111 8 * e->size, e->state < ARRAY_SIZE(desc) ?
112 desc[e->state] : "(invalid shadow state)",
113 (void *) e->address);
114
115 printk(KERN_INFO);
116 for (i = 0; i < SHADOW_COPY_SIZE; ++i)
117 printk("%02x", e->memory_copy[i]);
118 printk("\n");
119
120 printk(KERN_INFO);
121 for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
122 if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
123 printk(" %c", short_desc[e->shadow_copy[i]]);
124 else
125 printk(" ?");
126 }
127 printk("\n");
128 printk(KERN_INFO "%*c\n", 2 + 2
129 * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
130 break;
131 case KMEMCHECK_ERROR_BUG:
132 printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n");
133 break;
134 }
135
136 __show_regs(&e->regs, 1);
137 print_stack_trace(&e->trace, 0);
138}
139
140static void do_wakeup(unsigned long data)
141{
142 while (error_count > 0)
143 kmemcheck_error_recall();
144
145 if (error_missed_count > 0) {
146 printk(KERN_WARNING "kmemcheck: Lost %d error reports because "
147 "the queue was too small\n", error_missed_count);
148 error_missed_count = 0;
149 }
150}
151
152static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0);
153
154/*
155 * Save the context of an error report.
156 */
157void kmemcheck_error_save(enum kmemcheck_shadow state,
158 unsigned long address, unsigned int size, struct pt_regs *regs)
159{
160 static unsigned long prev_ip;
161
162 struct kmemcheck_error *e;
163 void *shadow_copy;
164 void *memory_copy;
165
166 /* Don't report several adjacent errors from the same EIP. */
167 if (regs->ip == prev_ip)
168 return;
169 prev_ip = regs->ip;
170
171 e = error_next_wr();
172 if (!e)
173 return;
174
175 e->type = KMEMCHECK_ERROR_INVALID_ACCESS;
176
177 e->state = state;
178 e->address = address;
179 e->size = size;
180
181 /* Save regs */
182 memcpy(&e->regs, regs, sizeof(*regs));
183
184 /* Save stack trace */
185 e->trace.nr_entries = 0;
186 e->trace.entries = e->trace_entries;
187 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
188 e->trace.skip = 0;
189 save_stack_trace_bp(&e->trace, regs->bp);
190
191 /* Round address down to nearest 16 bytes */
192 shadow_copy = kmemcheck_shadow_lookup(address
193 & ~(SHADOW_COPY_SIZE - 1));
194 BUG_ON(!shadow_copy);
195
196 memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE);
197
198 kmemcheck_show_addr(address);
199 memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1));
200 memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE);
201 kmemcheck_hide_addr(address);
202
203 tasklet_hi_schedule_first(&kmemcheck_tasklet);
204}
205
206/*
207 * Save the context of a kmemcheck bug.
208 */
209void kmemcheck_error_save_bug(struct pt_regs *regs)
210{
211 struct kmemcheck_error *e;
212
213 e = error_next_wr();
214 if (!e)
215 return;
216
217 e->type = KMEMCHECK_ERROR_BUG;
218
219 memcpy(&e->regs, regs, sizeof(*regs));
220
221 e->trace.nr_entries = 0;
222 e->trace.entries = e->trace_entries;
223 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
224 e->trace.skip = 1;
225 save_stack_trace(&e->trace);
226
227 tasklet_hi_schedule_first(&kmemcheck_tasklet);
228}
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h
new file mode 100644
index 000000000000..0efc2e8d0a20
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/error.h
@@ -0,0 +1,15 @@
1#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H
2#define ARCH__X86__MM__KMEMCHECK__ERROR_H
3
4#include <linux/ptrace.h>
5
6#include "shadow.h"
7
8void kmemcheck_error_save(enum kmemcheck_shadow state,
9 unsigned long address, unsigned int size, struct pt_regs *regs);
10
11void kmemcheck_error_save_bug(struct pt_regs *regs);
12
13void kmemcheck_error_recall(void);
14
15#endif
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
new file mode 100644
index 000000000000..2c55ed098654
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -0,0 +1,640 @@
1/**
2 * kmemcheck - a heavyweight memory checker for the linux kernel
3 * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no>
4 * (With a lot of help from Ingo Molnar and Pekka Enberg.)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2) as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/init.h>
12#include <linux/interrupt.h>
13#include <linux/kallsyms.h>
14#include <linux/kernel.h>
15#include <linux/kmemcheck.h>
16#include <linux/mm.h>
17#include <linux/module.h>
18#include <linux/page-flags.h>
19#include <linux/percpu.h>
20#include <linux/ptrace.h>
21#include <linux/string.h>
22#include <linux/types.h>
23
24#include <asm/cacheflush.h>
25#include <asm/kmemcheck.h>
26#include <asm/pgtable.h>
27#include <asm/tlbflush.h>
28
29#include "error.h"
30#include "opcode.h"
31#include "pte.h"
32#include "selftest.h"
33#include "shadow.h"
34
35
36#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
37# define KMEMCHECK_ENABLED 0
38#endif
39
40#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
41# define KMEMCHECK_ENABLED 1
42#endif
43
44#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
45# define KMEMCHECK_ENABLED 2
46#endif
47
48int kmemcheck_enabled = KMEMCHECK_ENABLED;
49
50int __init kmemcheck_init(void)
51{
52#ifdef CONFIG_SMP
53 /*
54 * Limit SMP to use a single CPU. We rely on the fact that this code
55 * runs before SMP is set up.
56 */
57 if (setup_max_cpus > 1) {
58 printk(KERN_INFO
59 "kmemcheck: Limiting number of CPUs to 1.\n");
60 setup_max_cpus = 1;
61 }
62#endif
63
64 if (!kmemcheck_selftest()) {
65 printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
66 kmemcheck_enabled = 0;
67 return -EINVAL;
68 }
69
70 printk(KERN_INFO "kmemcheck: Initialized\n");
71 return 0;
72}
73
74early_initcall(kmemcheck_init);
75
76/*
77 * We need to parse the kmemcheck= option before any memory is allocated.
78 */
79static int __init param_kmemcheck(char *str)
80{
81 if (!str)
82 return -EINVAL;
83
84 sscanf(str, "%d", &kmemcheck_enabled);
85 return 0;
86}
87
88early_param("kmemcheck", param_kmemcheck);
89
90int kmemcheck_show_addr(unsigned long address)
91{
92 pte_t *pte;
93
94 pte = kmemcheck_pte_lookup(address);
95 if (!pte)
96 return 0;
97
98 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
99 __flush_tlb_one(address);
100 return 1;
101}
102
103int kmemcheck_hide_addr(unsigned long address)
104{
105 pte_t *pte;
106
107 pte = kmemcheck_pte_lookup(address);
108 if (!pte)
109 return 0;
110
111 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
112 __flush_tlb_one(address);
113 return 1;
114}
115
116struct kmemcheck_context {
117 bool busy;
118 int balance;
119
120 /*
121 * There can be at most two memory operands to an instruction, but
122 * each address can cross a page boundary -- so we may need up to
123 * four addresses that must be hidden/revealed for each fault.
124 */
125 unsigned long addr[4];
126 unsigned long n_addrs;
127 unsigned long flags;
128
129 /* Data size of the instruction that caused a fault. */
130 unsigned int size;
131};
132
133static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
134
135bool kmemcheck_active(struct pt_regs *regs)
136{
137 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
138
139 return data->balance > 0;
140}
141
142/* Save an address that needs to be shown/hidden */
143static void kmemcheck_save_addr(unsigned long addr)
144{
145 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
146
147 BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
148 data->addr[data->n_addrs++] = addr;
149}
150
151static unsigned int kmemcheck_show_all(void)
152{
153 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
154 unsigned int i;
155 unsigned int n;
156
157 n = 0;
158 for (i = 0; i < data->n_addrs; ++i)
159 n += kmemcheck_show_addr(data->addr[i]);
160
161 return n;
162}
163
164static unsigned int kmemcheck_hide_all(void)
165{
166 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
167 unsigned int i;
168 unsigned int n;
169
170 n = 0;
171 for (i = 0; i < data->n_addrs; ++i)
172 n += kmemcheck_hide_addr(data->addr[i]);
173
174 return n;
175}
176
177/*
178 * Called from the #PF handler.
179 */
180void kmemcheck_show(struct pt_regs *regs)
181{
182 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
183
184 BUG_ON(!irqs_disabled());
185
186 if (unlikely(data->balance != 0)) {
187 kmemcheck_show_all();
188 kmemcheck_error_save_bug(regs);
189 data->balance = 0;
190 return;
191 }
192
193 /*
194 * None of the addresses actually belonged to kmemcheck. Note that
195 * this is not an error.
196 */
197 if (kmemcheck_show_all() == 0)
198 return;
199
200 ++data->balance;
201
202 /*
203 * The IF needs to be cleared as well, so that the faulting
204 * instruction can run "uninterrupted". Otherwise, we might take
205 * an interrupt and start executing that before we've had a chance
206 * to hide the page again.
207 *
208 * NOTE: In the rare case of multiple faults, we must not override
209 * the original flags:
210 */
211 if (!(regs->flags & X86_EFLAGS_TF))
212 data->flags = regs->flags;
213
214 regs->flags |= X86_EFLAGS_TF;
215 regs->flags &= ~X86_EFLAGS_IF;
216}
217
218/*
219 * Called from the #DB handler.
220 */
221void kmemcheck_hide(struct pt_regs *regs)
222{
223 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
224 int n;
225
226 BUG_ON(!irqs_disabled());
227
228 if (data->balance == 0)
229 return;
230
231 if (unlikely(data->balance != 1)) {
232 kmemcheck_show_all();
233 kmemcheck_error_save_bug(regs);
234 data->n_addrs = 0;
235 data->balance = 0;
236
237 if (!(data->flags & X86_EFLAGS_TF))
238 regs->flags &= ~X86_EFLAGS_TF;
239 if (data->flags & X86_EFLAGS_IF)
240 regs->flags |= X86_EFLAGS_IF;
241 return;
242 }
243
244 if (kmemcheck_enabled)
245 n = kmemcheck_hide_all();
246 else
247 n = kmemcheck_show_all();
248
249 if (n == 0)
250 return;
251
252 --data->balance;
253
254 data->n_addrs = 0;
255
256 if (!(data->flags & X86_EFLAGS_TF))
257 regs->flags &= ~X86_EFLAGS_TF;
258 if (data->flags & X86_EFLAGS_IF)
259 regs->flags |= X86_EFLAGS_IF;
260}
261
262void kmemcheck_show_pages(struct page *p, unsigned int n)
263{
264 unsigned int i;
265
266 for (i = 0; i < n; ++i) {
267 unsigned long address;
268 pte_t *pte;
269 unsigned int level;
270
271 address = (unsigned long) page_address(&p[i]);
272 pte = lookup_address(address, &level);
273 BUG_ON(!pte);
274 BUG_ON(level != PG_LEVEL_4K);
275
276 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
277 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
278 __flush_tlb_one(address);
279 }
280}
281
282bool kmemcheck_page_is_tracked(struct page *p)
283{
284 /* This will also check the "hidden" flag of the PTE. */
285 return kmemcheck_pte_lookup((unsigned long) page_address(p));
286}
287
288void kmemcheck_hide_pages(struct page *p, unsigned int n)
289{
290 unsigned int i;
291
292 for (i = 0; i < n; ++i) {
293 unsigned long address;
294 pte_t *pte;
295 unsigned int level;
296
297 address = (unsigned long) page_address(&p[i]);
298 pte = lookup_address(address, &level);
299 BUG_ON(!pte);
300 BUG_ON(level != PG_LEVEL_4K);
301
302 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
303 set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
304 __flush_tlb_one(address);
305 }
306}
307
308/* Access may NOT cross page boundary */
309static void kmemcheck_read_strict(struct pt_regs *regs,
310 unsigned long addr, unsigned int size)
311{
312 void *shadow;
313 enum kmemcheck_shadow status;
314
315 shadow = kmemcheck_shadow_lookup(addr);
316 if (!shadow)
317 return;
318
319 kmemcheck_save_addr(addr);
320 status = kmemcheck_shadow_test(shadow, size);
321 if (status == KMEMCHECK_SHADOW_INITIALIZED)
322 return;
323
324 if (kmemcheck_enabled)
325 kmemcheck_error_save(status, addr, size, regs);
326
327 if (kmemcheck_enabled == 2)
328 kmemcheck_enabled = 0;
329
330 /* Don't warn about it again. */
331 kmemcheck_shadow_set(shadow, size);
332}
333
334/* Access may cross page boundary */
335static void kmemcheck_read(struct pt_regs *regs,
336 unsigned long addr, unsigned int size)
337{
338 unsigned long page = addr & PAGE_MASK;
339 unsigned long next_addr = addr + size - 1;
340 unsigned long next_page = next_addr & PAGE_MASK;
341
342 if (likely(page == next_page)) {
343 kmemcheck_read_strict(regs, addr, size);
344 return;
345 }
346
347 /*
348 * What we do is basically to split the access across the
349 * two pages and handle each part separately. Yes, this means
350 * that we may now see reads that are 3 + 5 bytes, for
351 * example (and if both are uninitialized, there will be two
352 * reports), but it makes the code a lot simpler.
353 */
354 kmemcheck_read_strict(regs, addr, next_page - addr);
355 kmemcheck_read_strict(regs, next_page, next_addr - next_page);
356}
357
358static void kmemcheck_write_strict(struct pt_regs *regs,
359 unsigned long addr, unsigned int size)
360{
361 void *shadow;
362
363 shadow = kmemcheck_shadow_lookup(addr);
364 if (!shadow)
365 return;
366
367 kmemcheck_save_addr(addr);
368 kmemcheck_shadow_set(shadow, size);
369}
370
371static void kmemcheck_write(struct pt_regs *regs,
372 unsigned long addr, unsigned int size)
373{
374 unsigned long page = addr & PAGE_MASK;
375 unsigned long next_addr = addr + size - 1;
376 unsigned long next_page = next_addr & PAGE_MASK;
377
378 if (likely(page == next_page)) {
379 kmemcheck_write_strict(regs, addr, size);
380 return;
381 }
382
383 /* See comment in kmemcheck_read(). */
384 kmemcheck_write_strict(regs, addr, next_page - addr);
385 kmemcheck_write_strict(regs, next_page, next_addr - next_page);
386}
387
388/*
389 * Copying is hard. We have two addresses, each of which may be split across
390 * a page (and each page will have different shadow addresses).
391 */
392static void kmemcheck_copy(struct pt_regs *regs,
393 unsigned long src_addr, unsigned long dst_addr, unsigned int size)
394{
395 uint8_t shadow[8];
396 enum kmemcheck_shadow status;
397
398 unsigned long page;
399 unsigned long next_addr;
400 unsigned long next_page;
401
402 uint8_t *x;
403 unsigned int i;
404 unsigned int n;
405
406 BUG_ON(size > sizeof(shadow));
407
408 page = src_addr & PAGE_MASK;
409 next_addr = src_addr + size - 1;
410 next_page = next_addr & PAGE_MASK;
411
412 if (likely(page == next_page)) {
413 /* Same page */
414 x = kmemcheck_shadow_lookup(src_addr);
415 if (x) {
416 kmemcheck_save_addr(src_addr);
417 for (i = 0; i < size; ++i)
418 shadow[i] = x[i];
419 } else {
420 for (i = 0; i < size; ++i)
421 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
422 }
423 } else {
424 n = next_page - src_addr;
425 BUG_ON(n > sizeof(shadow));
426
427 /* First page */
428 x = kmemcheck_shadow_lookup(src_addr);
429 if (x) {
430 kmemcheck_save_addr(src_addr);
431 for (i = 0; i < n; ++i)
432 shadow[i] = x[i];
433 } else {
434 /* Not tracked */
435 for (i = 0; i < n; ++i)
436 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
437 }
438
439 /* Second page */
440 x = kmemcheck_shadow_lookup(next_page);
441 if (x) {
442 kmemcheck_save_addr(next_page);
443 for (i = n; i < size; ++i)
444 shadow[i] = x[i - n];
445 } else {
446 /* Not tracked */
447 for (i = n; i < size; ++i)
448 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
449 }
450 }
451
452 page = dst_addr & PAGE_MASK;
453 next_addr = dst_addr + size - 1;
454 next_page = next_addr & PAGE_MASK;
455
456 if (likely(page == next_page)) {
457 /* Same page */
458 x = kmemcheck_shadow_lookup(dst_addr);
459 if (x) {
460 kmemcheck_save_addr(dst_addr);
461 for (i = 0; i < size; ++i) {
462 x[i] = shadow[i];
463 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
464 }
465 }
466 } else {
467 n = next_page - dst_addr;
468 BUG_ON(n > sizeof(shadow));
469
470 /* First page */
471 x = kmemcheck_shadow_lookup(dst_addr);
472 if (x) {
473 kmemcheck_save_addr(dst_addr);
474 for (i = 0; i < n; ++i) {
475 x[i] = shadow[i];
476 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
477 }
478 }
479
480 /* Second page */
481 x = kmemcheck_shadow_lookup(next_page);
482 if (x) {
483 kmemcheck_save_addr(next_page);
484 for (i = n; i < size; ++i) {
485 x[i - n] = shadow[i];
486 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
487 }
488 }
489 }
490
491 status = kmemcheck_shadow_test(shadow, size);
492 if (status == KMEMCHECK_SHADOW_INITIALIZED)
493 return;
494
495 if (kmemcheck_enabled)
496 kmemcheck_error_save(status, src_addr, size, regs);
497
498 if (kmemcheck_enabled == 2)
499 kmemcheck_enabled = 0;
500}
501
502enum kmemcheck_method {
503 KMEMCHECK_READ,
504 KMEMCHECK_WRITE,
505};
506
507static void kmemcheck_access(struct pt_regs *regs,
508 unsigned long fallback_address, enum kmemcheck_method fallback_method)
509{
510 const uint8_t *insn;
511 const uint8_t *insn_primary;
512 unsigned int size;
513
514 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
515
516 /* Recursive fault -- ouch. */
517 if (data->busy) {
518 kmemcheck_show_addr(fallback_address);
519 kmemcheck_error_save_bug(regs);
520 return;
521 }
522
523 data->busy = true;
524
525 insn = (const uint8_t *) regs->ip;
526 insn_primary = kmemcheck_opcode_get_primary(insn);
527
528 kmemcheck_opcode_decode(insn, &size);
529
530 switch (insn_primary[0]) {
531#ifdef CONFIG_KMEMCHECK_BITOPS_OK
532 /* AND, OR, XOR */
533 /*
534 * Unfortunately, these instructions have to be excluded from
535 * our regular checking since they access only some (and not
536 * all) bits. This clears out "bogus" bitfield-access warnings.
537 */
538 case 0x80:
539 case 0x81:
540 case 0x82:
541 case 0x83:
542 switch ((insn_primary[1] >> 3) & 7) {
543 /* OR */
544 case 1:
545 /* AND */
546 case 4:
547 /* XOR */
548 case 6:
549 kmemcheck_write(regs, fallback_address, size);
550 goto out;
551
552 /* ADD */
553 case 0:
554 /* ADC */
555 case 2:
556 /* SBB */
557 case 3:
558 /* SUB */
559 case 5:
560 /* CMP */
561 case 7:
562 break;
563 }
564 break;
565#endif
566
567 /* MOVS, MOVSB, MOVSW, MOVSD */
568 case 0xa4:
569 case 0xa5:
570 /*
571 * These instructions are special because they take two
572 * addresses, but we only get one page fault.
573 */
574 kmemcheck_copy(regs, regs->si, regs->di, size);
575 goto out;
576
577 /* CMPS, CMPSB, CMPSW, CMPSD */
578 case 0xa6:
579 case 0xa7:
580 kmemcheck_read(regs, regs->si, size);
581 kmemcheck_read(regs, regs->di, size);
582 goto out;
583 }
584
585 /*
586 * If the opcode isn't special in any way, we use the data from the
587 * page fault handler to determine the address and type of memory
588 * access.
589 */
590 switch (fallback_method) {
591 case KMEMCHECK_READ:
592 kmemcheck_read(regs, fallback_address, size);
593 goto out;
594 case KMEMCHECK_WRITE:
595 kmemcheck_write(regs, fallback_address, size);
596 goto out;
597 }
598
599out:
600 data->busy = false;
601}
602
603bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
604 unsigned long error_code)
605{
606 pte_t *pte;
607
608 /*
609 * XXX: Is it safe to assume that memory accesses from virtual 86
610 * mode or non-kernel code segments will _never_ access kernel
611 * memory (e.g. tracked pages)? For now, we need this to avoid
612 * invoking kmemcheck for PnP BIOS calls.
613 */
614 if (regs->flags & X86_VM_MASK)
615 return false;
616 if (regs->cs != __KERNEL_CS)
617 return false;
618
619 pte = kmemcheck_pte_lookup(address);
620 if (!pte)
621 return false;
622
623 if (error_code & 2)
624 kmemcheck_access(regs, address, KMEMCHECK_WRITE);
625 else
626 kmemcheck_access(regs, address, KMEMCHECK_READ);
627
628 kmemcheck_show(regs);
629 return true;
630}
631
632bool kmemcheck_trap(struct pt_regs *regs)
633{
634 if (!kmemcheck_active(regs))
635 return false;
636
637 /* We're done. */
638 kmemcheck_hide(regs);
639 return true;
640}
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
new file mode 100644
index 000000000000..63c19e27aa6f
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/opcode.c
@@ -0,0 +1,106 @@
1#include <linux/types.h>
2
3#include "opcode.h"
4
5static bool opcode_is_prefix(uint8_t b)
6{
7 return
8 /* Group 1 */
9 b == 0xf0 || b == 0xf2 || b == 0xf3
10 /* Group 2 */
11 || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
12 || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e
13 /* Group 3 */
14 || b == 0x66
15 /* Group 4 */
16 || b == 0x67;
17}
18
19#ifdef CONFIG_X86_64
20static bool opcode_is_rex_prefix(uint8_t b)
21{
22 return (b & 0xf0) == 0x40;
23}
24#else
25static bool opcode_is_rex_prefix(uint8_t b)
26{
27 return false;
28}
29#endif
30
31#define REX_W (1 << 3)
32
33/*
34 * This is a VERY crude opcode decoder. We only need to find the size of the
35 * load/store that caused our #PF and this should work for all the opcodes
36 * that we care about. Moreover, the ones who invented this instruction set
37 * should be shot.
38 */
39void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size)
40{
41 /* Default operand size */
42 int operand_size_override = 4;
43
44 /* prefixes */
45 for (; opcode_is_prefix(*op); ++op) {
46 if (*op == 0x66)
47 operand_size_override = 2;
48 }
49
50 /* REX prefix */
51 if (opcode_is_rex_prefix(*op)) {
52 uint8_t rex = *op;
53
54 ++op;
55 if (rex & REX_W) {
56 switch (*op) {
57 case 0x63:
58 *size = 4;
59 return;
60 case 0x0f:
61 ++op;
62
63 switch (*op) {
64 case 0xb6:
65 case 0xbe:
66 *size = 1;
67 return;
68 case 0xb7:
69 case 0xbf:
70 *size = 2;
71 return;
72 }
73
74 break;
75 }
76
77 *size = 8;
78 return;
79 }
80 }
81
82 /* escape opcode */
83 if (*op == 0x0f) {
84 ++op;
85
86 /*
87 * This is move with zero-extend and sign-extend, respectively;
88 * we don't have to think about 0xb6/0xbe, because this is
89 * already handled in the conditional below.
90 */
91 if (*op == 0xb7 || *op == 0xbf)
92 operand_size_override = 2;
93 }
94
95 *size = (*op & 1) ? operand_size_override : 1;
96}
97
98const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op)
99{
100 /* skip prefixes */
101 while (opcode_is_prefix(*op))
102 ++op;
103 if (opcode_is_rex_prefix(*op))
104 ++op;
105 return op;
106}
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h
new file mode 100644
index 000000000000..6956aad66b5b
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/opcode.h
@@ -0,0 +1,9 @@
1#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H
2#define ARCH__X86__MM__KMEMCHECK__OPCODE_H
3
4#include <linux/types.h>
5
6void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size);
7const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op);
8
9#endif
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c
new file mode 100644
index 000000000000..4ead26eeaf96
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/pte.c
@@ -0,0 +1,22 @@
1#include <linux/mm.h>
2
3#include <asm/pgtable.h>
4
5#include "pte.h"
6
7pte_t *kmemcheck_pte_lookup(unsigned long address)
8{
9 pte_t *pte;
10 unsigned int level;
11
12 pte = lookup_address(address, &level);
13 if (!pte)
14 return NULL;
15 if (level != PG_LEVEL_4K)
16 return NULL;
17 if (!pte_hidden(*pte))
18 return NULL;
19
20 return pte;
21}
22
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h
new file mode 100644
index 000000000000..9f5966456492
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/pte.h
@@ -0,0 +1,10 @@
1#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H
2#define ARCH__X86__MM__KMEMCHECK__PTE_H
3
4#include <linux/mm.h>
5
6#include <asm/pgtable.h>
7
8pte_t *kmemcheck_pte_lookup(unsigned long address);
9
10#endif
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c
new file mode 100644
index 000000000000..036efbea8b28
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/selftest.c
@@ -0,0 +1,69 @@
1#include <linux/kernel.h>
2
3#include "opcode.h"
4#include "selftest.h"
5
6struct selftest_opcode {
7 unsigned int expected_size;
8 const uint8_t *insn;
9 const char *desc;
10};
11
12static const struct selftest_opcode selftest_opcodes[] = {
13 /* REP MOVS */
14 {1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"},
15 {4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"},
16
17 /* MOVZX / MOVZXD */
18 {1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"},
19 {1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"},
20
21 /* MOVSX / MOVSXD */
22 {1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"},
23 {1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"},
24
25#ifdef CONFIG_X86_64
26 /* MOVZX / MOVZXD */
27 {1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"},
28 {2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"},
29
30 /* MOVSX / MOVSXD */
31 {1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"},
32 {2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"},
33 {4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"},
34#endif
35};
36
37static bool selftest_opcode_one(const struct selftest_opcode *op)
38{
39 unsigned size;
40
41 kmemcheck_opcode_decode(op->insn, &size);
42
43 if (size == op->expected_size)
44 return true;
45
46 printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n",
47 op->desc, op->expected_size, size);
48 return false;
49}
50
51static bool selftest_opcodes_all(void)
52{
53 bool pass = true;
54 unsigned int i;
55
56 for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i)
57 pass = pass && selftest_opcode_one(&selftest_opcodes[i]);
58
59 return pass;
60}
61
62bool kmemcheck_selftest(void)
63{
64 bool pass = true;
65
66 pass = pass && selftest_opcodes_all();
67
68 return pass;
69}
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h
new file mode 100644
index 000000000000..8fed4fe11f95
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/selftest.h
@@ -0,0 +1,6 @@
1#ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H
2#define ARCH_X86_MM_KMEMCHECK_SELFTEST_H
3
4bool kmemcheck_selftest(void);
5
6#endif
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
new file mode 100644
index 000000000000..e773b6bd0079
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -0,0 +1,162 @@
1#include <linux/kmemcheck.h>
2#include <linux/module.h>
3#include <linux/mm.h>
4#include <linux/module.h>
5
6#include <asm/page.h>
7#include <asm/pgtable.h>
8
9#include "pte.h"
10#include "shadow.h"
11
12/*
13 * Return the shadow address for the given address. Returns NULL if the
14 * address is not tracked.
15 *
16 * We need to be extremely careful not to follow any invalid pointers,
17 * because this function can be called for *any* possible address.
18 */
19void *kmemcheck_shadow_lookup(unsigned long address)
20{
21 pte_t *pte;
22 struct page *page;
23
24 if (!virt_addr_valid(address))
25 return NULL;
26
27 pte = kmemcheck_pte_lookup(address);
28 if (!pte)
29 return NULL;
30
31 page = virt_to_page(address);
32 if (!page->shadow)
33 return NULL;
34 return page->shadow + (address & (PAGE_SIZE - 1));
35}
36
37static void mark_shadow(void *address, unsigned int n,
38 enum kmemcheck_shadow status)
39{
40 unsigned long addr = (unsigned long) address;
41 unsigned long last_addr = addr + n - 1;
42 unsigned long page = addr & PAGE_MASK;
43 unsigned long last_page = last_addr & PAGE_MASK;
44 unsigned int first_n;
45 void *shadow;
46
47 /* If the memory range crosses a page boundary, stop there. */
48 if (page == last_page)
49 first_n = n;
50 else
51 first_n = page + PAGE_SIZE - addr;
52
53 shadow = kmemcheck_shadow_lookup(addr);
54 if (shadow)
55 memset(shadow, status, first_n);
56
57 addr += first_n;
58 n -= first_n;
59
60 /* Do full-page memset()s. */
61 while (n >= PAGE_SIZE) {
62 shadow = kmemcheck_shadow_lookup(addr);
63 if (shadow)
64 memset(shadow, status, PAGE_SIZE);
65
66 addr += PAGE_SIZE;
67 n -= PAGE_SIZE;
68 }
69
70 /* Do the remaining page, if any. */
71 if (n > 0) {
72 shadow = kmemcheck_shadow_lookup(addr);
73 if (shadow)
74 memset(shadow, status, n);
75 }
76}
77
78void kmemcheck_mark_unallocated(void *address, unsigned int n)
79{
80 mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED);
81}
82
83void kmemcheck_mark_uninitialized(void *address, unsigned int n)
84{
85 mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED);
86}
87
88/*
89 * Fill the shadow memory of the given address such that the memory at that
90 * address is marked as being initialized.
91 */
92void kmemcheck_mark_initialized(void *address, unsigned int n)
93{
94 mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED);
95}
96EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized);
97
98void kmemcheck_mark_freed(void *address, unsigned int n)
99{
100 mark_shadow(address, n, KMEMCHECK_SHADOW_FREED);
101}
102
103void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n)
104{
105 unsigned int i;
106
107 for (i = 0; i < n; ++i)
108 kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE);
109}
110
111void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n)
112{
113 unsigned int i;
114
115 for (i = 0; i < n; ++i)
116 kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE);
117}
118
119void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
120{
121 unsigned int i;
122
123 for (i = 0; i < n; ++i)
124 kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE);
125}
126
127enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
128{
129 uint8_t *x;
130 unsigned int i;
131
132 x = shadow;
133
134#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
135 /*
136 * Make sure _some_ bytes are initialized. Gcc frequently generates
137 * code to access neighboring bytes.
138 */
139 for (i = 0; i < size; ++i) {
140 if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
141 return x[i];
142 }
143#else
144 /* All bytes must be initialized. */
145 for (i = 0; i < size; ++i) {
146 if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
147 return x[i];
148 }
149#endif
150
151 return x[0];
152}
153
154void kmemcheck_shadow_set(void *shadow, unsigned int size)
155{
156 uint8_t *x;
157 unsigned int i;
158
159 x = shadow;
160 for (i = 0; i < size; ++i)
161 x[i] = KMEMCHECK_SHADOW_INITIALIZED;
162}
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
new file mode 100644
index 000000000000..af46d9ab9d86
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -0,0 +1,16 @@
1#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H
2#define ARCH__X86__MM__KMEMCHECK__SHADOW_H
3
4enum kmemcheck_shadow {
5 KMEMCHECK_SHADOW_UNALLOCATED,
6 KMEMCHECK_SHADOW_UNINITIALIZED,
7 KMEMCHECK_SHADOW_INITIALIZED,
8 KMEMCHECK_SHADOW_FREED,
9};
10
11void *kmemcheck_shadow_lookup(unsigned long address);
12
13enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
14void kmemcheck_shadow_set(void *shadow, unsigned int size);
15
16#endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 6ce9518fe2ac..3cfe9ced8a4c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -470,7 +470,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
470 470
471 if (!debug_pagealloc) 471 if (!debug_pagealloc)
472 spin_unlock(&cpa_lock); 472 spin_unlock(&cpa_lock);
473 base = alloc_pages(GFP_KERNEL, 0); 473 base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
474 if (!debug_pagealloc) 474 if (!debug_pagealloc)
475 spin_lock(&cpa_lock); 475 spin_lock(&cpa_lock);
476 if (!base) 476 if (!base)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7aa03a5389f5..8e43bdd45456 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -4,9 +4,11 @@
4#include <asm/tlb.h> 4#include <asm/tlb.h>
5#include <asm/fixmap.h> 5#include <asm/fixmap.h>
6 6
7#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
8
7pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 9pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
8{ 10{
9 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); 11 return (pte_t *)__get_free_page(PGALLOC_GFP);
10} 12}
11 13
12pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) 14pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -14,9 +16,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
14 struct page *pte; 16 struct page *pte;
15 17
16#ifdef CONFIG_HIGHPTE 18#ifdef CONFIG_HIGHPTE
17 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); 19 pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
18#else 20#else
19 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); 21 pte = alloc_pages(PGALLOC_GFP, 0);
20#endif 22#endif
21 if (pte) 23 if (pte)
22 pgtable_page_ctor(pte); 24 pgtable_page_ctor(pte);
@@ -161,7 +163,7 @@ static int preallocate_pmds(pmd_t *pmds[])
161 bool failed = false; 163 bool failed = false;
162 164
163 for(i = 0; i < PREALLOCATED_PMDS; i++) { 165 for(i = 0; i < PREALLOCATED_PMDS; i++) {
164 pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); 166 pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
165 if (pmd == NULL) 167 if (pmd == NULL)
166 failed = true; 168 failed = true;
167 pmds[i] = pmd; 169 pmds[i] = pmd;
@@ -228,7 +230,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
228 pmd_t *pmds[PREALLOCATED_PMDS]; 230 pmd_t *pmds[PREALLOCATED_PMDS];
229 unsigned long flags; 231 unsigned long flags;
230 232
231 pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 233 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
232 234
233 if (pgd == NULL) 235 if (pgd == NULL)
234 goto out; 236 goto out;