aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig.cpu31
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/relocs.c2
-rw-r--r--arch/x86/boot/cpucheck.c8
-rw-r--r--arch/x86/kernel/alternative.c36
-rw-r--r--arch/x86/kernel/amd_iommu.c22
-rw-r--r--arch/x86/kernel/apm_32.c3
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/centaur.c11
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/common_64.c74
-rw-r--r--arch/x86/kernel/cpu/cyrix.c32
-rw-r--r--arch/x86/kernel/cpu/feature_names.c3
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/ds.c954
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/hpet.c19
-rw-r--r--arch/x86/kernel/io_delay.c8
-rw-r--r--arch/x86/kernel/ioport.c1
-rw-r--r--arch/x86/kernel/ipi.c3
-rw-r--r--arch/x86/kernel/kdebugfs.c1
-rw-r--r--arch/x86/kernel/kgdb.c43
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/process.c17
-rw-r--r--arch/x86/kernel/process_32.c54
-rw-r--r--arch/x86/kernel/process_64.c41
-rw-r--r--arch/x86/kernel/ptrace.c446
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/kernel/signal_32.c1
-rw-r--r--arch/x86/kernel/signal_64.c1
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/sys_i386_32.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c1
-rw-r--r--arch/x86/kernel/syscall_64.c4
-rw-r--r--arch/x86/kernel/time_32.c1
-rw-r--r--arch/x86/kernel/tls.c1
-rw-r--r--arch/x86/kernel/vm86_32.c1
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vsmp_64.c2
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/svm.c12
-rw-r--r--arch/x86/kvm/vmx.c3
-rw-r--r--arch/x86/kvm/vmx.h2
-rw-r--r--arch/x86/mach-default/setup.c4
-rw-r--r--arch/x86/mm/fault.c3
-rw-r--r--arch/x86/mm/init_32.c5
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/oprofile/nmi_int.c4
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/setup.c2
52 files changed, 1330 insertions, 590 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2c518fbc52ec..60a85768cfcb 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -382,14 +382,17 @@ config X86_OOSTORE
382# P6_NOPs are a relatively minor optimization that require a family >= 382# P6_NOPs are a relatively minor optimization that require a family >=
383# 6 processor, except that it is broken on certain VIA chips. 383# 6 processor, except that it is broken on certain VIA chips.
384# Furthermore, AMD chips prefer a totally different sequence of NOPs 384# Furthermore, AMD chips prefer a totally different sequence of NOPs
385# (which work on all CPUs). As a result, disallow these if we're 385# (which work on all CPUs). In addition, it looks like Virtual PC
386# compiling X86_GENERIC but not X86_64 (these NOPs do work on all 386# does not understand them.
387# x86-64 capable chips); the list of processors in the right-hand clause 387#
388# are the cores that benefit from this optimization. 388# As a result, disallow these if we're not compiling for X86_64 (these
389# NOPs do work on all x86-64 capable chips); the list of processors in
390# the right-hand clause are the cores that benefit from this optimization.
389# 391#
390config X86_P6_NOP 392config X86_P6_NOP
391 def_bool y 393 def_bool y
392 depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4 || MPSC) 394 depends on X86_64
395 depends on (MCORE2 || MPENTIUM4 || MPSC)
393 396
394config X86_TSC 397config X86_TSC
395 def_bool y 398 def_bool y
@@ -415,3 +418,21 @@ config X86_MINIMUM_CPU_FAMILY
415config X86_DEBUGCTLMSR 418config X86_DEBUGCTLMSR
416 def_bool y 419 def_bool y
417 depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) 420 depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
421
422config X86_DS
423 bool "Debug Store support"
424 default y
425 help
426 Add support for Debug Store.
427 This allows the kernel to provide a memory buffer to the hardware
428 to store various profiling and tracing events.
429
430config X86_PTRACE_BTS
431 bool "ptrace interface to Branch Trace Store"
432 default y
433 depends on (X86_DS && X86_DEBUGCTLMSR)
434 help
435 Add a ptrace interface to allow collecting an execution trace
436 of the traced task.
437 This collects control flow changes in a (cyclic) buffer and allows
438 debuggers to fill in the gaps and show an execution trace of the debuggee.
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 9fea73706479..aaf5a2131efc 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -16,7 +16,7 @@
16 */ 16 */
17#undef CONFIG_PARAVIRT 17#undef CONFIG_PARAVIRT
18#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
19#define _ASM_DESC_H_ 1 19#define ASM_X86__DESC_H 1
20#endif 20#endif
21 21
22#ifdef CONFIG_X86_64 22#ifdef CONFIG_X86_64
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index a1310c52fc0c..857e492c571e 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -492,7 +492,7 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
492 continue; 492 continue;
493 } 493 }
494 sh_symtab = sec_symtab->symtab; 494 sh_symtab = sec_symtab->symtab;
495 sym_strtab = sec->link->strtab; 495 sym_strtab = sec_symtab->link->strtab;
496 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { 496 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
497 Elf32_Rel *rel; 497 Elf32_Rel *rel;
498 Elf32_Sym *sym; 498 Elf32_Sym *sym;
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 4b9ae7c56748..4d3ff037201f 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -38,12 +38,12 @@ static const u32 req_flags[NCAPINTS] =
38{ 38{
39 REQUIRED_MASK0, 39 REQUIRED_MASK0,
40 REQUIRED_MASK1, 40 REQUIRED_MASK1,
41 REQUIRED_MASK2, 41 0, /* REQUIRED_MASK2 not implemented in this file */
42 REQUIRED_MASK3, 42 0, /* REQUIRED_MASK3 not implemented in this file */
43 REQUIRED_MASK4, 43 REQUIRED_MASK4,
44 REQUIRED_MASK5, 44 0, /* REQUIRED_MASK5 not implemented in this file */
45 REQUIRED_MASK6, 45 REQUIRED_MASK6,
46 REQUIRED_MASK7, 46 0, /* REQUIRED_MASK7 not implemented in this file */
47}; 47};
48 48
49#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a)) 49#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 2763cb37b553..65a0c1b48696 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
145extern char __vsyscall_0; 145extern char __vsyscall_0;
146const unsigned char *const *find_nop_table(void) 146const unsigned char *const *find_nop_table(void)
147{ 147{
148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || 148 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; 149 boot_cpu_has(X86_FEATURE_NOPL))
150 return p6_nops;
151 else
152 return k8_nops;
150} 153}
151 154
152#else /* CONFIG_X86_64 */ 155#else /* CONFIG_X86_64 */
153 156
154static const struct nop {
155 int cpuid;
156 const unsigned char *const *noptable;
157} noptypes[] = {
158 { X86_FEATURE_K8, k8_nops },
159 { X86_FEATURE_K7, k7_nops },
160 { X86_FEATURE_P4, p6_nops },
161 { X86_FEATURE_P3, p6_nops },
162 { -1, NULL }
163};
164
165const unsigned char *const *find_nop_table(void) 157const unsigned char *const *find_nop_table(void)
166{ 158{
167 const unsigned char *const *noptable = intel_nops; 159 if (boot_cpu_has(X86_FEATURE_K8))
168 int i; 160 return k8_nops;
169 161 else if (boot_cpu_has(X86_FEATURE_K7))
170 for (i = 0; noptypes[i].cpuid >= 0; i++) { 162 return k7_nops;
171 if (boot_cpu_has(noptypes[i].cpuid)) { 163 else if (boot_cpu_has(X86_FEATURE_NOPL))
172 noptable = noptypes[i].noptable; 164 return p6_nops;
173 break; 165 else
174 } 166 return intel_nops;
175 }
176 return noptable;
177} 167}
178 168
179#endif /* CONFIG_X86_64 */ 169#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 69b4d060b21c..042fdc27bc92 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -101,10 +101,10 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
101 */ 101 */
102static int iommu_completion_wait(struct amd_iommu *iommu) 102static int iommu_completion_wait(struct amd_iommu *iommu)
103{ 103{
104 int ret, ready = 0; 104 int ret = 0, ready = 0;
105 unsigned status = 0; 105 unsigned status = 0;
106 struct iommu_cmd cmd; 106 struct iommu_cmd cmd;
107 unsigned long i = 0; 107 unsigned long flags, i = 0;
108 108
109 memset(&cmd, 0, sizeof(cmd)); 109 memset(&cmd, 0, sizeof(cmd));
110 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 110 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
@@ -112,10 +112,12 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
112 112
113 iommu->need_sync = 0; 113 iommu->need_sync = 0;
114 114
115 ret = iommu_queue_command(iommu, &cmd); 115 spin_lock_irqsave(&iommu->lock, flags);
116
117 ret = __iommu_queue_command(iommu, &cmd);
116 118
117 if (ret) 119 if (ret)
118 return ret; 120 goto out;
119 121
120 while (!ready && (i < EXIT_LOOP_COUNT)) { 122 while (!ready && (i < EXIT_LOOP_COUNT)) {
121 ++i; 123 ++i;
@@ -130,6 +132,8 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
130 132
131 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) 133 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
132 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); 134 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
135out:
136 spin_unlock_irqrestore(&iommu->lock, flags);
133 137
134 return 0; 138 return 0;
135} 139}
@@ -140,6 +144,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
140static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) 144static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
141{ 145{
142 struct iommu_cmd cmd; 146 struct iommu_cmd cmd;
147 int ret;
143 148
144 BUG_ON(iommu == NULL); 149 BUG_ON(iommu == NULL);
145 150
@@ -147,9 +152,11 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
147 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); 152 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
148 cmd.data[0] = devid; 153 cmd.data[0] = devid;
149 154
155 ret = iommu_queue_command(iommu, &cmd);
156
150 iommu->need_sync = 1; 157 iommu->need_sync = 1;
151 158
152 return iommu_queue_command(iommu, &cmd); 159 return ret;
153} 160}
154 161
155/* 162/*
@@ -159,6 +166,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
159 u64 address, u16 domid, int pde, int s) 166 u64 address, u16 domid, int pde, int s)
160{ 167{
161 struct iommu_cmd cmd; 168 struct iommu_cmd cmd;
169 int ret;
162 170
163 memset(&cmd, 0, sizeof(cmd)); 171 memset(&cmd, 0, sizeof(cmd));
164 address &= PAGE_MASK; 172 address &= PAGE_MASK;
@@ -171,9 +179,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
171 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ 179 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
172 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; 180 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
173 181
182 ret = iommu_queue_command(iommu, &cmd);
183
174 iommu->need_sync = 1; 184 iommu->need_sync = 1;
175 185
176 return iommu_queue_command(iommu, &cmd); 186 return ret;
177} 187}
178 188
179/* 189/*
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 9ee24e6bc4b0..732d1f4e10ee 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -234,6 +234,7 @@
234#include <asm/uaccess.h> 234#include <asm/uaccess.h>
235#include <asm/desc.h> 235#include <asm/desc.h>
236#include <asm/i8253.h> 236#include <asm/i8253.h>
237#include <asm/olpc.h>
237#include <asm/paravirt.h> 238#include <asm/paravirt.h>
238#include <asm/reboot.h> 239#include <asm/reboot.h>
239 240
@@ -2217,7 +2218,7 @@ static int __init apm_init(void)
2217 2218
2218 dmi_check_system(apm_dmi_table); 2219 dmi_check_system(apm_dmi_table);
2219 2220
2220 if (apm_info.bios.version == 0 || paravirt_enabled()) { 2221 if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) {
2221 printk(KERN_INFO "apm: BIOS not found.\n"); 2222 printk(KERN_INFO "apm: BIOS not found.\n");
2222 return -ENODEV; 2223 return -ENODEV;
2223 } 2224 }
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index aa89387006fe..505543a75a56 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -22,7 +22,7 @@
22 22
23#define __NO_STUBS 1 23#define __NO_STUBS 1
24#undef __SYSCALL 24#undef __SYSCALL
25#undef _ASM_X86_64_UNISTD_H_ 25#undef ASM_X86__UNISTD_64_H
26#define __SYSCALL(nr, sym) [nr] = 1, 26#define __SYSCALL(nr, sym) [nr] = 1,
27static char syscalls[] = { 27static char syscalls[] = {
28#include <asm/unistd.h> 28#include <asm/unistd.h>
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index cae9cabc3031..18514ed26104 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
31 if (c->x86_power & (1<<8)) 31 if (c->x86_power & (1<<8))
32 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 32 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
33 } 33 }
34
35 /* Set MTRR capability flag if appropriate */
36 if (c->x86_model == 13 || c->x86_model == 9 ||
37 (c->x86_model == 8 && c->x86_mask >= 8))
38 set_cpu_cap(c, X86_FEATURE_K6_MTRR);
34} 39}
35 40
36static void __cpuinit init_amd(struct cpuinfo_x86 *c) 41static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
166 mbytes); 171 mbytes);
167 } 172 }
168 173
169 /* Set MTRR capability flag if appropriate */
170 if (c->x86_model == 13 || c->x86_model == 9 ||
171 (c->x86_model == 8 && c->x86_mask >= 8))
172 set_cpu_cap(c, X86_FEATURE_K6_MTRR);
173 break; 174 break;
174 } 175 }
175 176
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e0f45edd6a55..a0534c04d38a 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -314,6 +314,16 @@ enum {
314 EAMD3D = 1<<20, 314 EAMD3D = 1<<20,
315}; 315};
316 316
317static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
318{
319 switch (c->x86) {
320 case 5:
321 /* Emulate MTRRs using Centaur's MCR. */
322 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
323 break;
324 }
325}
326
317static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 327static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
318{ 328{
319 329
@@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
462static struct cpu_dev centaur_cpu_dev __cpuinitdata = { 472static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
463 .c_vendor = "Centaur", 473 .c_vendor = "Centaur",
464 .c_ident = { "CentaurHauls" }, 474 .c_ident = { "CentaurHauls" },
475 .c_early_init = early_init_centaur,
465 .c_init = init_centaur, 476 .c_init = init_centaur,
466 .c_size_cache = centaur_size_cache, 477 .c_size_cache = centaur_size_cache,
467}; 478};
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 80ab20d4fa39..4e456bd955bb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
13#include <asm/mtrr.h> 13#include <asm/mtrr.h>
14#include <asm/mce.h> 14#include <asm/mce.h>
15#include <asm/pat.h> 15#include <asm/pat.h>
16#include <asm/asm.h>
16#ifdef CONFIG_X86_LOCAL_APIC 17#ifdef CONFIG_X86_LOCAL_APIC
17#include <asm/mpspec.h> 18#include <asm/mpspec.h>
18#include <asm/apic.h> 19#include <asm/apic.h>
@@ -334,11 +335,24 @@ static void __init early_cpu_detect(void)
334 335
335 get_cpu_vendor(c, 1); 336 get_cpu_vendor(c, 1);
336 337
338 early_get_cap(c);
339
337 if (c->x86_vendor != X86_VENDOR_UNKNOWN && 340 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
338 cpu_devs[c->x86_vendor]->c_early_init) 341 cpu_devs[c->x86_vendor]->c_early_init)
339 cpu_devs[c->x86_vendor]->c_early_init(c); 342 cpu_devs[c->x86_vendor]->c_early_init(c);
343}
340 344
341 early_get_cap(c); 345/*
346 * The NOPL instruction is supposed to exist on all CPUs with
347 * family >= 6; unfortunately, that's not true in practice because
348 * of early VIA chips and (more importantly) broken virtualizers that
349 * are not easy to detect. In the latter case it doesn't even *fail*
350 * reliably, so probing for it doesn't even work. Disable it completely
351 * unless we can find a reliable way to detect all the broken cases.
352 */
353static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
354{
355 clear_cpu_cap(c, X86_FEATURE_NOPL);
342} 356}
343 357
344static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 358static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@@ -395,8 +409,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
395 } 409 }
396 410
397 init_scattered_cpuid_features(c); 411 init_scattered_cpuid_features(c);
412 detect_nopl(c);
398 } 413 }
399
400} 414}
401 415
402static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 416static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index dd6e3f15017e..a11f5d4477cd 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -18,6 +18,7 @@
18#include <asm/mtrr.h> 18#include <asm/mtrr.h>
19#include <asm/mce.h> 19#include <asm/mce.h>
20#include <asm/pat.h> 20#include <asm/pat.h>
21#include <asm/asm.h>
21#include <asm/numa.h> 22#include <asm/numa.h>
22#ifdef CONFIG_X86_LOCAL_APIC 23#ifdef CONFIG_X86_LOCAL_APIC
23#include <asm/mpspec.h> 24#include <asm/mpspec.h>
@@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void)
215 } 216 }
216} 217}
217 218
219/*
220 * The NOPL instruction is supposed to exist on all CPUs with
221 * family >= 6, unfortunately, that's not true in practice because
222 * of early VIA chips and (more importantly) broken virtualizers that
223 * are not easy to detect. Hence, probe for it based on first
224 * principles.
225 *
226 * Note: no 64-bit chip is known to lack these, but put the code here
227 * for consistency with 32 bits, and to make it utterly trivial to
228 * diagnose the problem should it ever surface.
229 */
230static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
231{
232 const u32 nopl_signature = 0x888c53b1; /* Random number */
233 u32 has_nopl = nopl_signature;
234
235 clear_cpu_cap(c, X86_FEATURE_NOPL);
236 if (c->x86 >= 6) {
237 asm volatile("\n"
238 "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
239 "2:\n"
240 " .section .fixup,\"ax\"\n"
241 "3: xor %0,%0\n"
242 " jmp 2b\n"
243 " .previous\n"
244 _ASM_EXTABLE(1b,3b)
245 : "+a" (has_nopl));
246
247 if (has_nopl == nopl_signature)
248 set_cpu_cap(c, X86_FEATURE_NOPL);
249 }
250}
251
218static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); 252static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
219 253
220void __init early_cpu_init(void) 254void __init early_cpu_init(void)
@@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
313 c->x86_phys_bits = eax & 0xff; 347 c->x86_phys_bits = eax & 0xff;
314 } 348 }
315 349
350 detect_nopl(c);
351
316 if (c->x86_vendor != X86_VENDOR_UNKNOWN && 352 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
317 cpu_devs[c->x86_vendor]->c_early_init) 353 cpu_devs[c->x86_vendor]->c_early_init)
318 cpu_devs[c->x86_vendor]->c_early_init(c); 354 cpu_devs[c->x86_vendor]->c_early_init(c);
@@ -493,17 +529,20 @@ void pda_init(int cpu)
493 /* others are initialized in smpboot.c */ 529 /* others are initialized in smpboot.c */
494 pda->pcurrent = &init_task; 530 pda->pcurrent = &init_task;
495 pda->irqstackptr = boot_cpu_stack; 531 pda->irqstackptr = boot_cpu_stack;
532 pda->irqstackptr += IRQSTACKSIZE - 64;
496 } else { 533 } else {
497 pda->irqstackptr = (char *) 534 if (!pda->irqstackptr) {
498 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); 535 pda->irqstackptr = (char *)
499 if (!pda->irqstackptr) 536 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
500 panic("cannot allocate irqstack for cpu %d", cpu); 537 if (!pda->irqstackptr)
538 panic("cannot allocate irqstack for cpu %d",
539 cpu);
540 pda->irqstackptr += IRQSTACKSIZE - 64;
541 }
501 542
502 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 543 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
503 pda->nodenumber = cpu_to_node(cpu); 544 pda->nodenumber = cpu_to_node(cpu);
504 } 545 }
505
506 pda->irqstackptr += IRQSTACKSIZE-64;
507} 546}
508 547
509char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 548char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
@@ -601,19 +640,22 @@ void __cpuinit cpu_init(void)
601 /* 640 /*
602 * set up and load the per-CPU TSS 641 * set up and load the per-CPU TSS
603 */ 642 */
604 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 643 if (!orig_ist->ist[0]) {
605 static const unsigned int order[N_EXCEPTION_STACKS] = { 644 static const unsigned int order[N_EXCEPTION_STACKS] = {
606 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 645 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
607 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER 646 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
608 }; 647 };
609 if (cpu) { 648 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
610 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); 649 if (cpu) {
611 if (!estacks) 650 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
612 panic("Cannot allocate exception stack %ld %d\n", 651 if (!estacks)
613 v, cpu); 652 panic("Cannot allocate exception "
653 "stack %ld %d\n", v, cpu);
654 }
655 estacks += PAGE_SIZE << order[v];
656 orig_ist->ist[v] = t->x86_tss.ist[v] =
657 (unsigned long)estacks;
614 } 658 }
615 estacks += PAGE_SIZE << order[v];
616 orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
617 } 659 }
618 660
619 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 661 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index e710a21bb6e8..898a5a2002ed 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -15,13 +15,11 @@
15/* 15/*
16 * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU 16 * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
17 */ 17 */
18static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) 18static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
19{ 19{
20 unsigned char ccr2, ccr3; 20 unsigned char ccr2, ccr3;
21 unsigned long flags;
22 21
23 /* we test for DEVID by checking whether CCR3 is writable */ 22 /* we test for DEVID by checking whether CCR3 is writable */
24 local_irq_save(flags);
25 ccr3 = getCx86(CX86_CCR3); 23 ccr3 = getCx86(CX86_CCR3);
26 setCx86(CX86_CCR3, ccr3 ^ 0x80); 24 setCx86(CX86_CCR3, ccr3 ^ 0x80);
27 getCx86(0xc0); /* dummy to change bus */ 25 getCx86(0xc0); /* dummy to change bus */
@@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
44 *dir0 = getCx86(CX86_DIR0); 42 *dir0 = getCx86(CX86_DIR0);
45 *dir1 = getCx86(CX86_DIR1); 43 *dir1 = getCx86(CX86_DIR1);
46 } 44 }
47 local_irq_restore(flags);
48} 45}
49 46
47static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
48{
49 unsigned long flags;
50
51 local_irq_save(flags);
52 __do_cyrix_devid(dir0, dir1);
53 local_irq_restore(flags);
54}
50/* 55/*
51 * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in 56 * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
52 * order to identify the Cyrix CPU model after we're out of setup.c 57 * order to identify the Cyrix CPU model after we're out of setup.c
@@ -161,6 +166,24 @@ static void __cpuinit geode_configure(void)
161 local_irq_restore(flags); 166 local_irq_restore(flags);
162} 167}
163 168
169static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c)
170{
171 unsigned char dir0, dir0_msn, dir1 = 0;
172
173 __do_cyrix_devid(&dir0, &dir1);
174 dir0_msn = dir0 >> 4; /* identifies CPU "family" */
175
176 switch (dir0_msn) {
177 case 3: /* 6x86/6x86L */
178 /* Emulate MTRRs using Cyrix's ARRs. */
179 set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
180 break;
181 case 5: /* 6x86MX/M II */
182 /* Emulate MTRRs using Cyrix's ARRs. */
183 set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
184 break;
185 }
186}
164 187
165static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) 188static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
166{ 189{
@@ -416,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
416static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { 439static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
417 .c_vendor = "Cyrix", 440 .c_vendor = "Cyrix",
418 .c_ident = { "CyrixInstead" }, 441 .c_ident = { "CyrixInstead" },
442 .c_early_init = early_init_cyrix,
419 .c_init = init_cyrix, 443 .c_init = init_cyrix,
420 .c_identify = cyrix_identify, 444 .c_identify = cyrix_identify,
421}; 445};
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
index e43ad4ad4cba..c9017799497c 100644
--- a/arch/x86/kernel/cpu/feature_names.c
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
39 NULL, NULL, NULL, NULL, 39 NULL, NULL, NULL, NULL,
40 "constant_tsc", "up", NULL, "arch_perfmon", 40 "constant_tsc", "up", NULL, "arch_perfmon",
41 "pebs", "bts", NULL, NULL, 41 "pebs", "bts", NULL, NULL,
42 "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, 42 "rep_good", NULL, NULL, NULL,
43 "nopl", NULL, NULL, NULL,
43 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 44 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
44 45
45 /* Intel-defined (#2) */ 46 /* Intel-defined (#2) */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b75f2569b8f8..f113ef4595f6 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
222 set_cpu_cap(c, X86_FEATURE_BTS); 222 set_cpu_cap(c, X86_FEATURE_BTS);
223 if (!(l1 & (1<<12))) 223 if (!(l1 & (1<<12)))
224 set_cpu_cap(c, X86_FEATURE_PEBS); 224 set_cpu_cap(c, X86_FEATURE_PEBS);
225 ds_init_intel(c);
225 } 226 }
226 227
227 if (cpu_has_bts) 228 if (cpu_has_bts)
228 ds_init_intel(c); 229 ptrace_bts_init_intel(c);
229 230
230 /* 231 /*
231 * See if we have a good local APIC by checking for buggy Pentia, 232 * See if we have a good local APIC by checking for buggy Pentia,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b117d7f8a564..5df16d818371 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -729,7 +729,7 @@ struct var_mtrr_range_state {
729 mtrr_type type; 729 mtrr_type type;
730}; 730};
731 731
732struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; 732static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
733static int __initdata debug_print; 733static int __initdata debug_print;
734 734
735static int __init 735static int __init
@@ -834,7 +834,7 @@ static int __init enable_mtrr_cleanup_setup(char *str)
834 enable_mtrr_cleanup = 1; 834 enable_mtrr_cleanup = 1;
835 return 0; 835 return 0;
836} 836}
837early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); 837early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
838 838
839struct var_mtrr_state { 839struct var_mtrr_state {
840 unsigned long range_startk; 840 unsigned long range_startk;
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 11c11b8ec48d..2b69994fd3a8 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -2,26 +2,49 @@
2 * Debug Store support 2 * Debug Store support
3 * 3 *
4 * This provides a low-level interface to the hardware's Debug Store 4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for last branch recording (LBR) and 5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * Different architectures use a different DS layout/pointer size. 8 * It manages:
9 * The below functions therefore work on a void*. 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional)
11 * - buffer overflow handling
12 * - buffer access
10 * 13 *
14 * It assumes:
15 * - get_task_struct on all parameter tasks
16 * - current is allowed to trace parameter tasks
11 * 17 *
12 * Since there is no user for PEBS, yet, only LBR (or branch
13 * trace store, BTS) is supported.
14 * 18 *
15 * 19 * Copyright (C) 2007-2008 Intel Corporation.
16 * Copyright (C) 2007 Intel Corporation. 20 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
17 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
18 */ 21 */
19 22
23
24#ifdef CONFIG_X86_DS
25
20#include <asm/ds.h> 26#include <asm/ds.h>
21 27
22#include <linux/errno.h> 28#include <linux/errno.h>
23#include <linux/string.h> 29#include <linux/string.h>
24#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/sched.h>
32#include <linux/mm.h>
33
34
35/*
36 * The configuration for a particular DS hardware implementation.
37 */
38struct ds_configuration {
39 /* the size of the DS structure in bytes */
40 unsigned char sizeof_ds;
41 /* the size of one pointer-typed field in the DS structure in bytes;
42 this covers the first 8 fields related to buffer management. */
43 unsigned char sizeof_field;
44 /* the size of a BTS/PEBS record in bytes */
45 unsigned char sizeof_rec[2];
46};
47static struct ds_configuration ds_cfg;
25 48
26 49
27/* 50/*
@@ -44,378 +67,747 @@
44 * (interrupt occurs when write pointer passes interrupt pointer) 67 * (interrupt occurs when write pointer passes interrupt pointer)
45 * - value to which counter is reset following counter overflow 68 * - value to which counter is reset following counter overflow
46 * 69 *
47 * On later architectures, the last branch recording hardware uses 70 * Later architectures use 64bit pointers throughout, whereas earlier
48 * 64bit pointers even in 32bit mode. 71 * architectures use 32bit pointers in 32bit mode.
49 *
50 *
51 * Branch Trace Store (BTS) records store information about control
52 * flow changes. They at least provide the following information:
53 * - source linear address
54 * - destination linear address
55 * 72 *
56 * Netburst supported a predicated bit that had been dropped in later
57 * architectures. We do not suppor it.
58 * 73 *
74 * We compute the base address for the first 8 fields based on:
75 * - the field size stored in the DS configuration
76 * - the relative field position
77 * - an offset giving the start of the respective region
59 * 78 *
60 * In order to abstract from the actual DS and BTS layout, we describe 79 * This offset is further used to index various arrays holding
61 * the access to the relevant fields. 80 * information for BTS and PEBS at the respective index.
62 * Thanks to Andi Kleen for proposing this design.
63 * 81 *
64 * The implementation, however, is not as general as it might seem. In 82 * On later 32bit processors, we only access the lower 32bit of the
65 * order to stay somewhat simple and efficient, we assume an 83 * 64bit pointer fields. The upper halves will be zeroed out.
66 * underlying unsigned type (mostly a pointer type) and we expect the
67 * field to be at least as big as that type.
68 */ 84 */
69 85
70/* 86enum ds_field {
71 * A special from_ip address to indicate that the BTS record is an 87 ds_buffer_base = 0,
72 * info record that needs to be interpreted or skipped. 88 ds_index,
73 */ 89 ds_absolute_maximum,
74#define BTS_ESCAPE_ADDRESS (-1) 90 ds_interrupt_threshold,
91};
75 92
76/* 93enum ds_qualifier {
77 * A field access descriptor 94 ds_bts = 0,
78 */ 95 ds_pebs
79struct access_desc {
80 unsigned char offset;
81 unsigned char size;
82}; 96};
83 97
98static inline unsigned long ds_get(const unsigned char *base,
99 enum ds_qualifier qual, enum ds_field field)
100{
101 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
102 return *(unsigned long *)base;
103}
104
105static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
106 enum ds_field field, unsigned long value)
107{
108 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
109 (*(unsigned long *)base) = value;
110}
111
112
84/* 113/*
85 * The configuration for a particular DS/BTS hardware implementation. 114 * Locking is done only for allocating BTS or PEBS resources and for
115 * guarding context and buffer memory allocation.
116 *
117 * Most functions require the current task to own the ds context part
118 * they are going to access. All the locking is done when validating
119 * access to the context.
86 */ 120 */
87struct ds_configuration { 121static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
88 /* the DS configuration */
89 unsigned char sizeof_ds;
90 struct access_desc bts_buffer_base;
91 struct access_desc bts_index;
92 struct access_desc bts_absolute_maximum;
93 struct access_desc bts_interrupt_threshold;
94 /* the BTS configuration */
95 unsigned char sizeof_bts;
96 struct access_desc from_ip;
97 struct access_desc to_ip;
98 /* BTS variants used to store additional information like
99 timestamps */
100 struct access_desc info_type;
101 struct access_desc info_data;
102 unsigned long debugctl_mask;
103};
104 122
105/* 123/*
106 * The global configuration used by the below accessor functions 124 * Validate that the current task is allowed to access the BTS/PEBS
125 * buffer of the parameter task.
126 *
127 * Returns 0, if access is granted; -Eerrno, otherwise.
107 */ 128 */
108static struct ds_configuration ds_cfg; 129static inline int ds_validate_access(struct ds_context *context,
130 enum ds_qualifier qual)
131{
132 if (!context)
133 return -EPERM;
134
135 if (context->owner[qual] == current)
136 return 0;
137
138 return -EPERM;
139}
140
109 141
110/* 142/*
111 * Accessor functions for some DS and BTS fields using the above 143 * We either support (system-wide) per-cpu or per-thread allocation.
112 * global ptrace_bts_cfg. 144 * We distinguish the two based on the task_struct pointer, where a
145 * NULL pointer indicates per-cpu allocation for the current cpu.
146 *
147 * Allocations are use-counted. As soon as resources are allocated,
148 * further allocations must be of the same type (per-cpu or
149 * per-thread). We model this by counting allocations (i.e. the number
150 * of tracers of a certain type) for one type negatively:
151 * =0 no tracers
152 * >0 number of per-thread tracers
153 * <0 number of per-cpu tracers
154 *
155 * The below functions to get and put tracers and to check the
156 * allocation type require the ds_lock to be held by the caller.
157 *
158 * Tracers essentially gives the number of ds contexts for a certain
159 * type of allocation.
113 */ 160 */
114static inline unsigned long get_bts_buffer_base(char *base) 161static long tracers;
162
163static inline void get_tracer(struct task_struct *task)
115{ 164{
116 return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); 165 tracers += (task ? 1 : -1);
117} 166}
118static inline void set_bts_buffer_base(char *base, unsigned long value) 167
168static inline void put_tracer(struct task_struct *task)
119{ 169{
120 (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; 170 tracers -= (task ? 1 : -1);
121} 171}
122static inline unsigned long get_bts_index(char *base) 172
173static inline int check_tracer(struct task_struct *task)
123{ 174{
124 return *(unsigned long *)(base + ds_cfg.bts_index.offset); 175 return (task ? (tracers >= 0) : (tracers <= 0));
125} 176}
126static inline void set_bts_index(char *base, unsigned long value) 177
178
179/*
180 * The DS context is either attached to a thread or to a cpu:
181 * - in the former case, the thread_struct contains a pointer to the
182 * attached context.
183 * - in the latter case, we use a static array of per-cpu context
184 * pointers.
185 *
186 * Contexts are use-counted. They are allocated on first access and
187 * deallocated when the last user puts the context.
188 *
189 * We distinguish between an allocating and a non-allocating get of a
190 * context:
191 * - the allocating get is used for requesting BTS/PEBS resources. It
192 * requires the caller to hold the global ds_lock.
193 * - the non-allocating get is used for all other cases. A
194 * non-existing context indicates an error. It acquires and releases
195 * the ds_lock itself for obtaining the context.
196 *
197 * A context and its DS configuration are allocated and deallocated
198 * together. A context always has a DS configuration of the
199 * appropriate size.
200 */
201static DEFINE_PER_CPU(struct ds_context *, system_context);
202
203#define this_system_context per_cpu(system_context, smp_processor_id())
204
205/*
206 * Returns the pointer to the parameter task's context or to the
207 * system-wide context, if task is NULL.
208 *
209 * Increases the use count of the returned context, if not NULL.
210 */
211static inline struct ds_context *ds_get_context(struct task_struct *task)
127{ 212{
128 (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; 213 struct ds_context *context;
214
215 spin_lock(&ds_lock);
216
217 context = (task ? task->thread.ds_ctx : this_system_context);
218 if (context)
219 context->count++;
220
221 spin_unlock(&ds_lock);
222
223 return context;
129} 224}
130static inline unsigned long get_bts_absolute_maximum(char *base) 225
226/*
227 * Same as ds_get_context, but allocates the context and it's DS
228 * structure, if necessary; returns NULL; if out of memory.
229 *
230 * pre: requires ds_lock to be held
231 */
232static inline struct ds_context *ds_alloc_context(struct task_struct *task)
131{ 233{
132 return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); 234 struct ds_context **p_context =
235 (task ? &task->thread.ds_ctx : &this_system_context);
236 struct ds_context *context = *p_context;
237
238 if (!context) {
239 context = kzalloc(sizeof(*context), GFP_KERNEL);
240
241 if (!context)
242 return NULL;
243
244 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
245 if (!context->ds) {
246 kfree(context);
247 return NULL;
248 }
249
250 *p_context = context;
251
252 context->this = p_context;
253 context->task = task;
254
255 if (task)
256 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
257
258 if (!task || (task == current))
259 wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
260
261 get_tracer(task);
262 }
263
264 context->count++;
265
266 return context;
133} 267}
134static inline void set_bts_absolute_maximum(char *base, unsigned long value) 268
269/*
270 * Decreases the use count of the parameter context, if not NULL.
271 * Deallocates the context, if the use count reaches zero.
272 */
273static inline void ds_put_context(struct ds_context *context)
135{ 274{
136 (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; 275 if (!context)
276 return;
277
278 spin_lock(&ds_lock);
279
280 if (--context->count)
281 goto out;
282
283 *(context->this) = NULL;
284
285 if (context->task)
286 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
287
288 if (!context->task || (context->task == current))
289 wrmsrl(MSR_IA32_DS_AREA, 0);
290
291 put_tracer(context->task);
292
293 /* free any leftover buffers from tracers that did not
294 * deallocate them properly. */
295 kfree(context->buffer[ds_bts]);
296 kfree(context->buffer[ds_pebs]);
297 kfree(context->ds);
298 kfree(context);
299 out:
300 spin_unlock(&ds_lock);
137} 301}
138static inline unsigned long get_bts_interrupt_threshold(char *base) 302
303
304/*
305 * Handle a buffer overflow
306 *
307 * task: the task whose buffers are overflowing;
308 * NULL for a buffer overflow on the current cpu
309 * context: the ds context
310 * qual: the buffer type
311 */
312static void ds_overflow(struct task_struct *task, struct ds_context *context,
313 enum ds_qualifier qual)
139{ 314{
140 return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); 315 if (!context)
316 return;
317
318 if (context->callback[qual])
319 (*context->callback[qual])(task);
320
321 /* todo: do some more overflow handling */
141} 322}
142static inline void set_bts_interrupt_threshold(char *base, unsigned long value) 323
324
325/*
326 * Allocate a non-pageable buffer of the parameter size.
327 * Checks the memory and the locked memory rlimit.
328 *
329 * Returns the buffer, if successful;
330 * NULL, if out of memory or rlimit exceeded.
331 *
332 * size: the requested buffer size in bytes
333 * pages (out): if not NULL, contains the number of pages reserved
334 */
335static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
143{ 336{
144 (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; 337 unsigned long rlim, vm, pgsz;
338 void *buffer;
339
340 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
341
342 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
343 vm = current->mm->total_vm + pgsz;
344 if (rlim < vm)
345 return NULL;
346
347 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
348 vm = current->mm->locked_vm + pgsz;
349 if (rlim < vm)
350 return NULL;
351
352 buffer = kzalloc(size, GFP_KERNEL);
353 if (!buffer)
354 return NULL;
355
356 current->mm->total_vm += pgsz;
357 current->mm->locked_vm += pgsz;
358
359 if (pages)
360 *pages = pgsz;
361
362 return buffer;
145} 363}
146static inline unsigned long get_from_ip(char *base) 364
365static int ds_request(struct task_struct *task, void *base, size_t size,
366 ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
147{ 367{
148 return *(unsigned long *)(base + ds_cfg.from_ip.offset); 368 struct ds_context *context;
369 unsigned long buffer, adj;
370 const unsigned long alignment = (1 << 3);
371 int error = 0;
372
373 if (!ds_cfg.sizeof_ds)
374 return -EOPNOTSUPP;
375
376 /* we require some space to do alignment adjustments below */
377 if (size < (alignment + ds_cfg.sizeof_rec[qual]))
378 return -EINVAL;
379
380 /* buffer overflow notification is not yet implemented */
381 if (ovfl)
382 return -EOPNOTSUPP;
383
384
385 spin_lock(&ds_lock);
386
387 if (!check_tracer(task))
388 return -EPERM;
389
390 error = -ENOMEM;
391 context = ds_alloc_context(task);
392 if (!context)
393 goto out_unlock;
394
395 error = -EALREADY;
396 if (context->owner[qual] == current)
397 goto out_unlock;
398 error = -EPERM;
399 if (context->owner[qual] != NULL)
400 goto out_unlock;
401 context->owner[qual] = current;
402
403 spin_unlock(&ds_lock);
404
405
406 error = -ENOMEM;
407 if (!base) {
408 base = ds_allocate_buffer(size, &context->pages[qual]);
409 if (!base)
410 goto out_release;
411
412 context->buffer[qual] = base;
413 }
414 error = 0;
415
416 context->callback[qual] = ovfl;
417
418 /* adjust the buffer address and size to meet alignment
419 * constraints:
420 * - buffer is double-word aligned
421 * - size is multiple of record size
422 *
423 * We checked the size at the very beginning; we have enough
424 * space to do the adjustment.
425 */
426 buffer = (unsigned long)base;
427
428 adj = ALIGN(buffer, alignment) - buffer;
429 buffer += adj;
430 size -= adj;
431
432 size /= ds_cfg.sizeof_rec[qual];
433 size *= ds_cfg.sizeof_rec[qual];
434
435 ds_set(context->ds, qual, ds_buffer_base, buffer);
436 ds_set(context->ds, qual, ds_index, buffer);
437 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
438
439 if (ovfl) {
440 /* todo: select a suitable interrupt threshold */
441 } else
442 ds_set(context->ds, qual,
443 ds_interrupt_threshold, buffer + size + 1);
444
445 /* we keep the context until ds_release */
446 return error;
447
448 out_release:
449 context->owner[qual] = NULL;
450 ds_put_context(context);
451 return error;
452
453 out_unlock:
454 spin_unlock(&ds_lock);
455 ds_put_context(context);
456 return error;
149} 457}
150static inline void set_from_ip(char *base, unsigned long value) 458
459int ds_request_bts(struct task_struct *task, void *base, size_t size,
460 ds_ovfl_callback_t ovfl)
151{ 461{
152 (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; 462 return ds_request(task, base, size, ovfl, ds_bts);
153} 463}
154static inline unsigned long get_to_ip(char *base) 464
465int ds_request_pebs(struct task_struct *task, void *base, size_t size,
466 ds_ovfl_callback_t ovfl)
155{ 467{
156 return *(unsigned long *)(base + ds_cfg.to_ip.offset); 468 return ds_request(task, base, size, ovfl, ds_pebs);
157} 469}
158static inline void set_to_ip(char *base, unsigned long value) 470
471static int ds_release(struct task_struct *task, enum ds_qualifier qual)
159{ 472{
160 (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; 473 struct ds_context *context;
474 int error;
475
476 context = ds_get_context(task);
477 error = ds_validate_access(context, qual);
478 if (error < 0)
479 goto out;
480
481 kfree(context->buffer[qual]);
482 context->buffer[qual] = NULL;
483
484 current->mm->total_vm -= context->pages[qual];
485 current->mm->locked_vm -= context->pages[qual];
486 context->pages[qual] = 0;
487 context->owner[qual] = NULL;
488
489 /*
490 * we put the context twice:
491 * once for the ds_get_context
492 * once for the corresponding ds_request
493 */
494 ds_put_context(context);
495 out:
496 ds_put_context(context);
497 return error;
161} 498}
162static inline unsigned char get_info_type(char *base) 499
500int ds_release_bts(struct task_struct *task)
163{ 501{
164 return *(unsigned char *)(base + ds_cfg.info_type.offset); 502 return ds_release(task, ds_bts);
165} 503}
166static inline void set_info_type(char *base, unsigned char value) 504
505int ds_release_pebs(struct task_struct *task)
167{ 506{
168 (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; 507 return ds_release(task, ds_pebs);
169} 508}
170static inline unsigned long get_info_data(char *base) 509
510static int ds_get_index(struct task_struct *task, size_t *pos,
511 enum ds_qualifier qual)
171{ 512{
172 return *(unsigned long *)(base + ds_cfg.info_data.offset); 513 struct ds_context *context;
514 unsigned long base, index;
515 int error;
516
517 context = ds_get_context(task);
518 error = ds_validate_access(context, qual);
519 if (error < 0)
520 goto out;
521
522 base = ds_get(context->ds, qual, ds_buffer_base);
523 index = ds_get(context->ds, qual, ds_index);
524
525 error = ((index - base) / ds_cfg.sizeof_rec[qual]);
526 if (pos)
527 *pos = error;
528 out:
529 ds_put_context(context);
530 return error;
173} 531}
174static inline void set_info_data(char *base, unsigned long value) 532
533int ds_get_bts_index(struct task_struct *task, size_t *pos)
175{ 534{
176 (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; 535 return ds_get_index(task, pos, ds_bts);
177} 536}
178 537
538int ds_get_pebs_index(struct task_struct *task, size_t *pos)
539{
540 return ds_get_index(task, pos, ds_pebs);
541}
179 542
180int ds_allocate(void **dsp, size_t bts_size_in_bytes) 543static int ds_get_end(struct task_struct *task, size_t *pos,
544 enum ds_qualifier qual)
181{ 545{
182 size_t bts_size_in_records; 546 struct ds_context *context;
183 unsigned long bts; 547 unsigned long base, end;
184 void *ds; 548 int error;
549
550 context = ds_get_context(task);
551 error = ds_validate_access(context, qual);
552 if (error < 0)
553 goto out;
554
555 base = ds_get(context->ds, qual, ds_buffer_base);
556 end = ds_get(context->ds, qual, ds_absolute_maximum);
557
558 error = ((end - base) / ds_cfg.sizeof_rec[qual]);
559 if (pos)
560 *pos = error;
561 out:
562 ds_put_context(context);
563 return error;
564}
185 565
186 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 566int ds_get_bts_end(struct task_struct *task, size_t *pos)
187 return -EOPNOTSUPP; 567{
568 return ds_get_end(task, pos, ds_bts);
569}
188 570
189 if (bts_size_in_bytes < 0) 571int ds_get_pebs_end(struct task_struct *task, size_t *pos)
190 return -EINVAL; 572{
573 return ds_get_end(task, pos, ds_pebs);
574}
191 575
192 bts_size_in_records = 576static int ds_access(struct task_struct *task, size_t index,
193 bts_size_in_bytes / ds_cfg.sizeof_bts; 577 const void **record, enum ds_qualifier qual)
194 bts_size_in_bytes = 578{
195 bts_size_in_records * ds_cfg.sizeof_bts; 579 struct ds_context *context;
580 unsigned long base, idx;
581 int error;
196 582
197 if (bts_size_in_bytes <= 0) 583 if (!record)
198 return -EINVAL; 584 return -EINVAL;
199 585
200 bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); 586 context = ds_get_context(task);
201 587 error = ds_validate_access(context, qual);
202 if (!bts) 588 if (error < 0)
203 return -ENOMEM; 589 goto out;
204 590
205 ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); 591 base = ds_get(context->ds, qual, ds_buffer_base);
592 idx = base + (index * ds_cfg.sizeof_rec[qual]);
206 593
207 if (!ds) { 594 error = -EINVAL;
208 kfree((void *)bts); 595 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
209 return -ENOMEM; 596 goto out;
210 }
211
212 set_bts_buffer_base(ds, bts);
213 set_bts_index(ds, bts);
214 set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
215 set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
216 597
217 *dsp = ds; 598 *record = (const void *)idx;
218 return 0; 599 error = ds_cfg.sizeof_rec[qual];
600 out:
601 ds_put_context(context);
602 return error;
219} 603}
220 604
221int ds_free(void **dsp) 605int ds_access_bts(struct task_struct *task, size_t index, const void **record)
222{ 606{
223 if (*dsp) { 607 return ds_access(task, index, record, ds_bts);
224 kfree((void *)get_bts_buffer_base(*dsp));
225 kfree(*dsp);
226 *dsp = NULL;
227 }
228 return 0;
229} 608}
230 609
231int ds_get_bts_size(void *ds) 610int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
232{ 611{
233 int size_in_bytes; 612 return ds_access(task, index, record, ds_pebs);
234
235 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
236 return -EOPNOTSUPP;
237
238 if (!ds)
239 return 0;
240
241 size_in_bytes =
242 get_bts_absolute_maximum(ds) -
243 get_bts_buffer_base(ds);
244 return size_in_bytes;
245} 613}
246 614
247int ds_get_bts_end(void *ds) 615static int ds_write(struct task_struct *task, const void *record, size_t size,
616 enum ds_qualifier qual, int force)
248{ 617{
249 int size_in_bytes = ds_get_bts_size(ds); 618 struct ds_context *context;
250 619 int error;
251 if (size_in_bytes <= 0)
252 return size_in_bytes;
253 620
254 return size_in_bytes / ds_cfg.sizeof_bts; 621 if (!record)
255} 622 return -EINVAL;
256 623
257int ds_get_bts_index(void *ds) 624 error = -EPERM;
258{ 625 context = ds_get_context(task);
259 int index_offset_in_bytes; 626 if (!context)
627 goto out;
260 628
261 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 629 if (!force) {
262 return -EOPNOTSUPP; 630 error = ds_validate_access(context, qual);
631 if (error < 0)
632 goto out;
633 }
263 634
264 index_offset_in_bytes = 635 error = 0;
265 get_bts_index(ds) - 636 while (size) {
266 get_bts_buffer_base(ds); 637 unsigned long base, index, end, write_end, int_th;
638 unsigned long write_size, adj_write_size;
639
640 /*
641 * write as much as possible without producing an
642 * overflow interrupt.
643 *
644 * interrupt_threshold must either be
645 * - bigger than absolute_maximum or
646 * - point to a record between buffer_base and absolute_maximum
647 *
648 * index points to a valid record.
649 */
650 base = ds_get(context->ds, qual, ds_buffer_base);
651 index = ds_get(context->ds, qual, ds_index);
652 end = ds_get(context->ds, qual, ds_absolute_maximum);
653 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
654
655 write_end = min(end, int_th);
656
657 /* if we are already beyond the interrupt threshold,
658 * we fill the entire buffer */
659 if (write_end <= index)
660 write_end = end;
661
662 if (write_end <= index)
663 goto out;
664
665 write_size = min((unsigned long) size, write_end - index);
666 memcpy((void *)index, record, write_size);
667
668 record = (const char *)record + write_size;
669 size -= write_size;
670 error += write_size;
671
672 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
673 adj_write_size *= ds_cfg.sizeof_rec[qual];
674
675 /* zero out trailing bytes */
676 memset((char *)index + write_size, 0,
677 adj_write_size - write_size);
678 index += adj_write_size;
679
680 if (index >= end)
681 index = base;
682 ds_set(context->ds, qual, ds_index, index);
683
684 if (index >= int_th)
685 ds_overflow(task, context, qual);
686 }
267 687
268 return index_offset_in_bytes / ds_cfg.sizeof_bts; 688 out:
689 ds_put_context(context);
690 return error;
269} 691}
270 692
271int ds_set_overflow(void *ds, int method) 693int ds_write_bts(struct task_struct *task, const void *record, size_t size)
272{ 694{
273 switch (method) { 695 return ds_write(task, record, size, ds_bts, /* force = */ 0);
274 case DS_O_SIGNAL:
275 return -EOPNOTSUPP;
276 case DS_O_WRAP:
277 return 0;
278 default:
279 return -EINVAL;
280 }
281} 696}
282 697
283int ds_get_overflow(void *ds) 698int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
284{ 699{
285 return DS_O_WRAP; 700 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
286} 701}
287 702
288int ds_clear(void *ds) 703int ds_unchecked_write_bts(struct task_struct *task,
704 const void *record, size_t size)
289{ 705{
290 int bts_size = ds_get_bts_size(ds); 706 return ds_write(task, record, size, ds_bts, /* force = */ 1);
291 unsigned long bts_base;
292
293 if (bts_size <= 0)
294 return bts_size;
295
296 bts_base = get_bts_buffer_base(ds);
297 memset((void *)bts_base, 0, bts_size);
298
299 set_bts_index(ds, bts_base);
300 return 0;
301} 707}
302 708
303int ds_read_bts(void *ds, int index, struct bts_struct *out) 709int ds_unchecked_write_pebs(struct task_struct *task,
710 const void *record, size_t size)
304{ 711{
305 void *bts; 712 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
713}
306 714
307 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 715static int ds_reset_or_clear(struct task_struct *task,
308 return -EOPNOTSUPP; 716 enum ds_qualifier qual, int clear)
717{
718 struct ds_context *context;
719 unsigned long base, end;
720 int error;
309 721
310 if (index < 0) 722 context = ds_get_context(task);
311 return -EINVAL; 723 error = ds_validate_access(context, qual);
724 if (error < 0)
725 goto out;
312 726
313 if (index >= ds_get_bts_size(ds)) 727 base = ds_get(context->ds, qual, ds_buffer_base);
314 return -EINVAL; 728 end = ds_get(context->ds, qual, ds_absolute_maximum);
315 729
316 bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); 730 if (clear)
731 memset((void *)base, 0, end - base);
317 732
318 memset(out, 0, sizeof(*out)); 733 ds_set(context->ds, qual, ds_index, base);
319 if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
320 out->qualifier = get_info_type(bts);
321 out->variant.jiffies = get_info_data(bts);
322 } else {
323 out->qualifier = BTS_BRANCH;
324 out->variant.lbr.from_ip = get_from_ip(bts);
325 out->variant.lbr.to_ip = get_to_ip(bts);
326 }
327 734
328 return sizeof(*out);; 735 error = 0;
736 out:
737 ds_put_context(context);
738 return error;
329} 739}
330 740
331int ds_write_bts(void *ds, const struct bts_struct *in) 741int ds_reset_bts(struct task_struct *task)
332{ 742{
333 unsigned long bts; 743 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
334 744}
335 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
336 return -EOPNOTSUPP;
337
338 if (ds_get_bts_size(ds) <= 0)
339 return -ENXIO;
340 745
341 bts = get_bts_index(ds); 746int ds_reset_pebs(struct task_struct *task)
747{
748 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
749}
342 750
343 memset((void *)bts, 0, ds_cfg.sizeof_bts); 751int ds_clear_bts(struct task_struct *task)
344 switch (in->qualifier) { 752{
345 case BTS_INVALID: 753 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
346 break; 754}
347 755
348 case BTS_BRANCH: 756int ds_clear_pebs(struct task_struct *task)
349 set_from_ip((void *)bts, in->variant.lbr.from_ip); 757{
350 set_to_ip((void *)bts, in->variant.lbr.to_ip); 758 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
351 break; 759}
352 760
353 case BTS_TASK_ARRIVES: 761int ds_get_pebs_reset(struct task_struct *task, u64 *value)
354 case BTS_TASK_DEPARTS: 762{
355 set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); 763 struct ds_context *context;
356 set_info_type((void *)bts, in->qualifier); 764 int error;
357 set_info_data((void *)bts, in->variant.jiffies);
358 break;
359 765
360 default: 766 if (!value)
361 return -EINVAL; 767 return -EINVAL;
362 }
363 768
364 bts = bts + ds_cfg.sizeof_bts; 769 context = ds_get_context(task);
365 if (bts >= get_bts_absolute_maximum(ds)) 770 error = ds_validate_access(context, ds_pebs);
366 bts = get_bts_buffer_base(ds); 771 if (error < 0)
367 set_bts_index(ds, bts); 772 goto out;
368 773
369 return ds_cfg.sizeof_bts; 774 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
775
776 error = 0;
777 out:
778 ds_put_context(context);
779 return error;
370} 780}
371 781
372unsigned long ds_debugctl_mask(void) 782int ds_set_pebs_reset(struct task_struct *task, u64 value)
373{ 783{
374 return ds_cfg.debugctl_mask; 784 struct ds_context *context;
375} 785 int error;
376 786
377#ifdef __i386__ 787 context = ds_get_context(task);
378static const struct ds_configuration ds_cfg_netburst = { 788 error = ds_validate_access(context, ds_pebs);
379 .sizeof_ds = 9 * 4, 789 if (error < 0)
380 .bts_buffer_base = { 0, 4 }, 790 goto out;
381 .bts_index = { 4, 4 },
382 .bts_absolute_maximum = { 8, 4 },
383 .bts_interrupt_threshold = { 12, 4 },
384 .sizeof_bts = 3 * 4,
385 .from_ip = { 0, 4 },
386 .to_ip = { 4, 4 },
387 .info_type = { 4, 1 },
388 .info_data = { 8, 4 },
389 .debugctl_mask = (1<<2)|(1<<3)
390};
391 791
392static const struct ds_configuration ds_cfg_pentium_m = { 792 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
393 .sizeof_ds = 9 * 4, 793
394 .bts_buffer_base = { 0, 4 }, 794 error = 0;
395 .bts_index = { 4, 4 }, 795 out:
396 .bts_absolute_maximum = { 8, 4 }, 796 ds_put_context(context);
397 .bts_interrupt_threshold = { 12, 4 }, 797 return error;
398 .sizeof_bts = 3 * 4, 798}
399 .from_ip = { 0, 4 }, 799
400 .to_ip = { 4, 4 }, 800static const struct ds_configuration ds_cfg_var = {
401 .info_type = { 4, 1 }, 801 .sizeof_ds = sizeof(long) * 12,
402 .info_data = { 8, 4 }, 802 .sizeof_field = sizeof(long),
403 .debugctl_mask = (1<<6)|(1<<7) 803 .sizeof_rec[ds_bts] = sizeof(long) * 3,
804 .sizeof_rec[ds_pebs] = sizeof(long) * 10
404}; 805};
405#endif /* _i386_ */ 806static const struct ds_configuration ds_cfg_64 = {
406 807 .sizeof_ds = 8 * 12,
407static const struct ds_configuration ds_cfg_core2 = { 808 .sizeof_field = 8,
408 .sizeof_ds = 9 * 8, 809 .sizeof_rec[ds_bts] = 8 * 3,
409 .bts_buffer_base = { 0, 8 }, 810 .sizeof_rec[ds_pebs] = 8 * 10
410 .bts_index = { 8, 8 },
411 .bts_absolute_maximum = { 16, 8 },
412 .bts_interrupt_threshold = { 24, 8 },
413 .sizeof_bts = 3 * 8,
414 .from_ip = { 0, 8 },
415 .to_ip = { 8, 8 },
416 .info_type = { 8, 1 },
417 .info_data = { 16, 8 },
418 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
419}; 811};
420 812
421static inline void 813static inline void
@@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
429 switch (c->x86) { 821 switch (c->x86) {
430 case 0x6: 822 case 0x6:
431 switch (c->x86_model) { 823 switch (c->x86_model) {
432#ifdef __i386__
433 case 0xD: 824 case 0xD:
434 case 0xE: /* Pentium M */ 825 case 0xE: /* Pentium M */
435 ds_configure(&ds_cfg_pentium_m); 826 ds_configure(&ds_cfg_var);
436 break; 827 break;
437#endif /* _i386_ */
438 case 0xF: /* Core2 */ 828 case 0xF: /* Core2 */
439 ds_configure(&ds_cfg_core2); 829 case 0x1C: /* Atom */
830 ds_configure(&ds_cfg_64);
440 break; 831 break;
441 default: 832 default:
442 /* sorry, don't know about them */ 833 /* sorry, don't know about them */
@@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
445 break; 836 break;
446 case 0xF: 837 case 0xF:
447 switch (c->x86_model) { 838 switch (c->x86_model) {
448#ifdef __i386__
449 case 0x0: 839 case 0x0:
450 case 0x1: 840 case 0x1:
451 case 0x2: /* Netburst */ 841 case 0x2: /* Netburst */
452 ds_configure(&ds_cfg_netburst); 842 ds_configure(&ds_cfg_var);
453 break; 843 break;
454#endif /* _i386_ */
455 default: 844 default:
456 /* sorry, don't know about them */ 845 /* sorry, don't know about them */
457 break; 846 break;
@@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
462 break; 851 break;
463 } 852 }
464} 853}
854
855void ds_free(struct ds_context *context)
856{
857 /* This is called when the task owning the parameter context
858 * is dying. There should not be any user of that context left
859 * to disturb us, anymore. */
860 unsigned long leftovers = context->count;
861 while (leftovers--)
862 ds_put_context(context);
863}
864#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 9af89078f7bb..66e48aa2dd1b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1203,7 +1203,7 @@ static int __init parse_memmap_opt(char *p)
1203 if (!p) 1203 if (!p)
1204 return -EINVAL; 1204 return -EINVAL;
1205 1205
1206 if (!strcmp(p, "exactmap")) { 1206 if (!strncmp(p, "exactmap", 8)) {
1207#ifdef CONFIG_CRASH_DUMP 1207#ifdef CONFIG_CRASH_DUMP
1208 /* 1208 /*
1209 * If we are doing a crash dump, we still need to know 1209 * If we are doing a crash dump, we still need to know
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 59fd3b6b1303..73deaffadd03 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -210,8 +210,8 @@ static void hpet_legacy_clockevent_register(void)
210 /* Calculate the min / max delta */ 210 /* Calculate the min / max delta */
211 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, 211 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
212 &hpet_clockevent); 212 &hpet_clockevent);
213 hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, 213 /* 5 usec minimum reprogramming delta. */
214 &hpet_clockevent); 214 hpet_clockevent.min_delta_ns = 5000;
215 215
216 /* 216 /*
217 * Start hpet with the boot cpu mask and make it 217 * Start hpet with the boot cpu mask and make it
@@ -270,15 +270,22 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode,
270} 270}
271 271
272static int hpet_legacy_next_event(unsigned long delta, 272static int hpet_legacy_next_event(unsigned long delta,
273 struct clock_event_device *evt) 273 struct clock_event_device *evt)
274{ 274{
275 unsigned long cnt; 275 u32 cnt;
276 276
277 cnt = hpet_readl(HPET_COUNTER); 277 cnt = hpet_readl(HPET_COUNTER);
278 cnt += delta; 278 cnt += (u32) delta;
279 hpet_writel(cnt, HPET_T0_CMP); 279 hpet_writel(cnt, HPET_T0_CMP);
280 280
281 return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0; 281 /*
282 * We need to read back the CMP register to make sure that
283 * what we wrote hit the chip before we compare it to the
284 * counter.
285 */
286 WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt);
287
288 return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
282} 289}
283 290
284/* 291/*
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 1c3a66a67f83..720d2607aacb 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -92,6 +92,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
92 DMI_MATCH(DMI_BOARD_NAME, "30BF") 92 DMI_MATCH(DMI_BOARD_NAME, "30BF")
93 } 93 }
94 }, 94 },
95 {
96 .callback = dmi_io_delay_0xed_port,
97 .ident = "Presario F700",
98 .matches = {
99 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
100 DMI_MATCH(DMI_BOARD_NAME, "30D3")
101 }
102 },
95 { } 103 { }
96}; 104};
97 105
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 50e5e4a31c85..191914302744 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,6 +14,7 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/thread_info.h> 15#include <linux/thread_info.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <asm/syscalls.h>
17 18
18/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ 19/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
19static void set_bitmap(unsigned long *bitmap, unsigned int base, 20static void set_bitmap(unsigned long *bitmap, unsigned int base,
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 3f7537b669d3..f1c688e46f35 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -20,6 +20,8 @@
20 20
21#ifdef CONFIG_X86_32 21#ifdef CONFIG_X86_32
22#include <mach_apic.h> 22#include <mach_apic.h>
23#include <mach_ipi.h>
24
23/* 25/*
24 * the following functions deal with sending IPIs between CPUs. 26 * the following functions deal with sending IPIs between CPUs.
25 * 27 *
@@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
147} 149}
148 150
149/* must come after the send_IPI functions above for inlining */ 151/* must come after the send_IPI functions above for inlining */
150#include <mach_ipi.h>
151static int convert_apicid_to_cpu(int apic_id) 152static int convert_apicid_to_cpu(int apic_id)
152{ 153{
153 int i; 154 int i;
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index f2d43bc75514..ff7d3b0124f1 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
139 if (PageHighMem(pg)) { 139 if (PageHighMem(pg)) {
140 data = ioremap_cache(pa_data, sizeof(*data)); 140 data = ioremap_cache(pa_data, sizeof(*data));
141 if (!data) { 141 if (!data) {
142 kfree(node);
142 error = -ENXIO; 143 error = -ENXIO;
143 goto err_dir; 144 goto err_dir;
144 } 145 }
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index f47f0eb886b8..8282a2139681 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -69,6 +69,9 @@ static int gdb_x86vector = -1;
69 */ 69 */
70void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) 70void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
71{ 71{
72#ifndef CONFIG_X86_32
73 u32 *gdb_regs32 = (u32 *)gdb_regs;
74#endif
72 gdb_regs[GDB_AX] = regs->ax; 75 gdb_regs[GDB_AX] = regs->ax;
73 gdb_regs[GDB_BX] = regs->bx; 76 gdb_regs[GDB_BX] = regs->bx;
74 gdb_regs[GDB_CX] = regs->cx; 77 gdb_regs[GDB_CX] = regs->cx;
@@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
76 gdb_regs[GDB_SI] = regs->si; 79 gdb_regs[GDB_SI] = regs->si;
77 gdb_regs[GDB_DI] = regs->di; 80 gdb_regs[GDB_DI] = regs->di;
78 gdb_regs[GDB_BP] = regs->bp; 81 gdb_regs[GDB_BP] = regs->bp;
79 gdb_regs[GDB_PS] = regs->flags;
80 gdb_regs[GDB_PC] = regs->ip; 82 gdb_regs[GDB_PC] = regs->ip;
81#ifdef CONFIG_X86_32 83#ifdef CONFIG_X86_32
84 gdb_regs[GDB_PS] = regs->flags;
82 gdb_regs[GDB_DS] = regs->ds; 85 gdb_regs[GDB_DS] = regs->ds;
83 gdb_regs[GDB_ES] = regs->es; 86 gdb_regs[GDB_ES] = regs->es;
84 gdb_regs[GDB_CS] = regs->cs; 87 gdb_regs[GDB_CS] = regs->cs;
@@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
94 gdb_regs[GDB_R13] = regs->r13; 97 gdb_regs[GDB_R13] = regs->r13;
95 gdb_regs[GDB_R14] = regs->r14; 98 gdb_regs[GDB_R14] = regs->r14;
96 gdb_regs[GDB_R15] = regs->r15; 99 gdb_regs[GDB_R15] = regs->r15;
100 gdb_regs32[GDB_PS] = regs->flags;
101 gdb_regs32[GDB_CS] = regs->cs;
102 gdb_regs32[GDB_SS] = regs->ss;
97#endif 103#endif
98 gdb_regs[GDB_SP] = regs->sp; 104 gdb_regs[GDB_SP] = regs->sp;
99} 105}
@@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
112 */ 118 */
113void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) 119void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
114{ 120{
121#ifndef CONFIG_X86_32
122 u32 *gdb_regs32 = (u32 *)gdb_regs;
123#endif
115 gdb_regs[GDB_AX] = 0; 124 gdb_regs[GDB_AX] = 0;
116 gdb_regs[GDB_BX] = 0; 125 gdb_regs[GDB_BX] = 0;
117 gdb_regs[GDB_CX] = 0; 126 gdb_regs[GDB_CX] = 0;
@@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
129 gdb_regs[GDB_FS] = 0xFFFF; 138 gdb_regs[GDB_FS] = 0xFFFF;
130 gdb_regs[GDB_GS] = 0xFFFF; 139 gdb_regs[GDB_GS] = 0xFFFF;
131#else 140#else
132 gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); 141 gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
133 gdb_regs[GDB_PC] = 0; 142 gdb_regs32[GDB_CS] = __KERNEL_CS;
143 gdb_regs32[GDB_SS] = __KERNEL_DS;
144 gdb_regs[GDB_PC] = p->thread.ip;
134 gdb_regs[GDB_R8] = 0; 145 gdb_regs[GDB_R8] = 0;
135 gdb_regs[GDB_R9] = 0; 146 gdb_regs[GDB_R9] = 0;
136 gdb_regs[GDB_R10] = 0; 147 gdb_regs[GDB_R10] = 0;
@@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
153 */ 164 */
154void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) 165void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
155{ 166{
167#ifndef CONFIG_X86_32
168 u32 *gdb_regs32 = (u32 *)gdb_regs;
169#endif
156 regs->ax = gdb_regs[GDB_AX]; 170 regs->ax = gdb_regs[GDB_AX];
157 regs->bx = gdb_regs[GDB_BX]; 171 regs->bx = gdb_regs[GDB_BX];
158 regs->cx = gdb_regs[GDB_CX]; 172 regs->cx = gdb_regs[GDB_CX];
@@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
160 regs->si = gdb_regs[GDB_SI]; 174 regs->si = gdb_regs[GDB_SI];
161 regs->di = gdb_regs[GDB_DI]; 175 regs->di = gdb_regs[GDB_DI];
162 regs->bp = gdb_regs[GDB_BP]; 176 regs->bp = gdb_regs[GDB_BP];
163 regs->flags = gdb_regs[GDB_PS];
164 regs->ip = gdb_regs[GDB_PC]; 177 regs->ip = gdb_regs[GDB_PC];
165#ifdef CONFIG_X86_32 178#ifdef CONFIG_X86_32
179 regs->flags = gdb_regs[GDB_PS];
166 regs->ds = gdb_regs[GDB_DS]; 180 regs->ds = gdb_regs[GDB_DS];
167 regs->es = gdb_regs[GDB_ES]; 181 regs->es = gdb_regs[GDB_ES];
168 regs->cs = gdb_regs[GDB_CS]; 182 regs->cs = gdb_regs[GDB_CS];
@@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
175 regs->r13 = gdb_regs[GDB_R13]; 189 regs->r13 = gdb_regs[GDB_R13];
176 regs->r14 = gdb_regs[GDB_R14]; 190 regs->r14 = gdb_regs[GDB_R14];
177 regs->r15 = gdb_regs[GDB_R15]; 191 regs->r15 = gdb_regs[GDB_R15];
192 regs->flags = gdb_regs32[GDB_PS];
193 regs->cs = gdb_regs32[GDB_CS];
194 regs->ss = gdb_regs32[GDB_SS];
178#endif 195#endif
179} 196}
180 197
@@ -378,10 +395,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
378 if (remcomInBuffer[0] == 's') { 395 if (remcomInBuffer[0] == 's') {
379 linux_regs->flags |= X86_EFLAGS_TF; 396 linux_regs->flags |= X86_EFLAGS_TF;
380 kgdb_single_step = 1; 397 kgdb_single_step = 1;
381 if (kgdb_contthread) { 398 atomic_set(&kgdb_cpu_doing_single_step,
382 atomic_set(&kgdb_cpu_doing_single_step, 399 raw_smp_processor_id());
383 raw_smp_processor_id());
384 }
385 } 400 }
386 401
387 get_debugreg(dr6, 6); 402 get_debugreg(dr6, 6);
@@ -466,9 +481,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
466 481
467 case DIE_DEBUG: 482 case DIE_DEBUG:
468 if (atomic_read(&kgdb_cpu_doing_single_step) == 483 if (atomic_read(&kgdb_cpu_doing_single_step) ==
469 raw_smp_processor_id() && 484 raw_smp_processor_id()) {
470 user_mode(regs)) 485 if (user_mode(regs))
471 return single_step_cont(regs, args); 486 return single_step_cont(regs, args);
487 break;
488 } else if (test_thread_flag(TIF_SINGLESTEP))
489 /* This means a user thread is single stepping
490 * a system call which should be ignored
491 */
492 return NOTIFY_DONE;
472 /* fall through */ 493 /* fall through */
473 default: 494 default:
474 if (user_mode(regs)) 495 if (user_mode(regs))
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b68e21f06f4f..0ed5f939b905 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -18,6 +18,7 @@
18#include <asm/ldt.h> 18#include <asm/ldt.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21#include <asm/syscalls.h>
21 22
22#ifdef CONFIG_SMP 23#ifdef CONFIG_SMP
23static void flush_ldt(void *current_mm) 24static void flush_ldt(void *current_mm)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7fc4d5b0a6a0..876e91890777 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -246,6 +246,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
246 return 1; 246 return 1;
247} 247}
248 248
249static cpumask_t c1e_mask = CPU_MASK_NONE;
250static int c1e_detected;
251
252void c1e_remove_cpu(int cpu)
253{
254 cpu_clear(cpu, c1e_mask);
255}
256
249/* 257/*
250 * C1E aware idle routine. We check for C1E active in the interrupt 258 * C1E aware idle routine. We check for C1E active in the interrupt
251 * pending message MSR. If we detect C1E, then we handle it the same 259 * pending message MSR. If we detect C1E, then we handle it the same
@@ -253,9 +261,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
253 */ 261 */
254static void c1e_idle(void) 262static void c1e_idle(void)
255{ 263{
256 static cpumask_t c1e_mask = CPU_MASK_NONE;
257 static int c1e_detected;
258
259 if (need_resched()) 264 if (need_resched())
260 return; 265 return;
261 266
@@ -265,8 +270,10 @@ static void c1e_idle(void)
265 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); 270 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
266 if (lo & K8_INTP_C1E_ACTIVE_MASK) { 271 if (lo & K8_INTP_C1E_ACTIVE_MASK) {
267 c1e_detected = 1; 272 c1e_detected = 1;
268 mark_tsc_unstable("TSC halt in C1E"); 273 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
269 printk(KERN_INFO "System has C1E enabled\n"); 274 mark_tsc_unstable("TSC halt in AMD C1E");
275 printk(KERN_INFO "System has AMD C1E enabled\n");
276 set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
270 } 277 }
271 } 278 }
272 279
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3b7a1ddcc0bc..c8609dea443f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,6 +55,9 @@
55#include <asm/tlbflush.h> 55#include <asm/tlbflush.h>
56#include <asm/cpu.h> 56#include <asm/cpu.h>
57#include <asm/kdebug.h> 57#include <asm/kdebug.h>
58#include <asm/idle.h>
59#include <asm/syscalls.h>
60#include <asm/smp.h>
58 61
59asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
60 63
@@ -88,6 +91,7 @@ static void cpu_exit_clear(void)
88 cpu_clear(cpu, cpu_callin_map); 91 cpu_clear(cpu, cpu_callin_map);
89 92
90 numa_remove_cpu(cpu); 93 numa_remove_cpu(cpu);
94 c1e_remove_cpu(cpu);
91} 95}
92 96
93/* We don't actually take CPU down, just spin without interrupts. */ 97/* We don't actually take CPU down, just spin without interrupts. */
@@ -275,6 +279,14 @@ void exit_thread(void)
275 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 279 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
276 put_cpu(); 280 put_cpu();
277 } 281 }
282#ifdef CONFIG_X86_DS
283 /* Free any DS contexts that have not been properly released. */
284 if (unlikely(current->thread.ds_ctx)) {
285 /* we clear debugctl to make sure DS is not used. */
286 update_debugctlmsr(0);
287 ds_free(current->thread.ds_ctx);
288 }
289#endif /* CONFIG_X86_DS */
278} 290}
279 291
280void flush_thread(void) 292void flush_thread(void)
@@ -436,6 +448,35 @@ int set_tsc_mode(unsigned int val)
436 return 0; 448 return 0;
437} 449}
438 450
451#ifdef CONFIG_X86_DS
452static int update_debugctl(struct thread_struct *prev,
453 struct thread_struct *next, unsigned long debugctl)
454{
455 unsigned long ds_prev = 0;
456 unsigned long ds_next = 0;
457
458 if (prev->ds_ctx)
459 ds_prev = (unsigned long)prev->ds_ctx->ds;
460 if (next->ds_ctx)
461 ds_next = (unsigned long)next->ds_ctx->ds;
462
463 if (ds_next != ds_prev) {
464 /* we clear debugctl to make sure DS
465 * is not in use when we change it */
466 debugctl = 0;
467 update_debugctlmsr(0);
468 wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
469 }
470 return debugctl;
471}
472#else
473static int update_debugctl(struct thread_struct *prev,
474 struct thread_struct *next, unsigned long debugctl)
475{
476 return debugctl;
477}
478#endif /* CONFIG_X86_DS */
479
439static noinline void 480static noinline void
440__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 481__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
441 struct tss_struct *tss) 482 struct tss_struct *tss)
@@ -446,14 +487,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
446 prev = &prev_p->thread; 487 prev = &prev_p->thread;
447 next = &next_p->thread; 488 next = &next_p->thread;
448 489
449 debugctl = prev->debugctlmsr; 490 debugctl = update_debugctl(prev, next, prev->debugctlmsr);
450 if (next->ds_area_msr != prev->ds_area_msr) {
451 /* we clear debugctl to make sure DS
452 * is not in use when we change it */
453 debugctl = 0;
454 update_debugctlmsr(0);
455 wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
456 }
457 491
458 if (next->debugctlmsr != debugctl) 492 if (next->debugctlmsr != debugctl)
459 update_debugctlmsr(next->debugctlmsr); 493 update_debugctlmsr(next->debugctlmsr);
@@ -477,13 +511,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
477 hard_enable_TSC(); 511 hard_enable_TSC();
478 } 512 }
479 513
480#ifdef X86_BTS 514#ifdef CONFIG_X86_PTRACE_BTS
481 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 515 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
482 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 516 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
483 517
484 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 518 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
485 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 519 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
486#endif 520#endif /* CONFIG_X86_PTRACE_BTS */
487 521
488 522
489 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 523 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71553b664e2a..79e3e173ab40 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,7 @@
51#include <asm/proto.h> 51#include <asm/proto.h>
52#include <asm/ia32.h> 52#include <asm/ia32.h>
53#include <asm/idle.h> 53#include <asm/idle.h>
54#include <asm/syscalls.h>
54 55
55asmlinkage extern void ret_from_fork(void); 56asmlinkage extern void ret_from_fork(void);
56 57
@@ -93,6 +94,8 @@ DECLARE_PER_CPU(int, cpu_state);
93static inline void play_dead(void) 94static inline void play_dead(void)
94{ 95{
95 idle_task_exit(); 96 idle_task_exit();
97 c1e_remove_cpu(raw_smp_processor_id());
98
96 mb(); 99 mb();
97 /* Ack it */ 100 /* Ack it */
98 __get_cpu_var(cpu_state) = CPU_DEAD; 101 __get_cpu_var(cpu_state) = CPU_DEAD;
@@ -238,6 +241,14 @@ void exit_thread(void)
238 t->io_bitmap_max = 0; 241 t->io_bitmap_max = 0;
239 put_cpu(); 242 put_cpu();
240 } 243 }
244#ifdef CONFIG_X86_DS
245 /* Free any DS contexts that have not been properly released. */
246 if (unlikely(t->ds_ctx)) {
247 /* we clear debugctl to make sure DS is not used. */
248 update_debugctlmsr(0);
249 ds_free(t->ds_ctx);
250 }
251#endif /* CONFIG_X86_DS */
241} 252}
242 253
243void flush_thread(void) 254void flush_thread(void)
@@ -471,13 +482,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
471 next = &next_p->thread; 482 next = &next_p->thread;
472 483
473 debugctl = prev->debugctlmsr; 484 debugctl = prev->debugctlmsr;
474 if (next->ds_area_msr != prev->ds_area_msr) { 485
475 /* we clear debugctl to make sure DS 486#ifdef CONFIG_X86_DS
476 * is not in use when we change it */ 487 {
477 debugctl = 0; 488 unsigned long ds_prev = 0, ds_next = 0;
478 update_debugctlmsr(0); 489
479 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); 490 if (prev->ds_ctx)
491 ds_prev = (unsigned long)prev->ds_ctx->ds;
492 if (next->ds_ctx)
493 ds_next = (unsigned long)next->ds_ctx->ds;
494
495 if (ds_next != ds_prev) {
496 /*
497 * We clear debugctl to make sure DS
498 * is not in use when we change it:
499 */
500 debugctl = 0;
501 update_debugctlmsr(0);
502 wrmsrl(MSR_IA32_DS_AREA, ds_next);
503 }
480 } 504 }
505#endif /* CONFIG_X86_DS */
481 506
482 if (next->debugctlmsr != debugctl) 507 if (next->debugctlmsr != debugctl)
483 update_debugctlmsr(next->debugctlmsr); 508 update_debugctlmsr(next->debugctlmsr);
@@ -515,13 +540,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
515 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 540 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
516 } 541 }
517 542
518#ifdef X86_BTS 543#ifdef CONFIG_X86_PTRACE_BTS
519 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 544 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
520 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 545 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
521 546
522 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 547 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
523 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 548 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
524#endif 549#endif /* CONFIG_X86_PTRACE_BTS */
525} 550}
526 551
527/* 552/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 19a7d2c40560..e375b658efc3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -70,7 +70,7 @@ static inline bool invalid_selector(u16 value)
70 70
71#define FLAG_MASK FLAG_MASK_32 71#define FLAG_MASK FLAG_MASK_32
72 72
73static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) 73static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
74{ 74{
75 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); 75 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
76 regno >>= 2; 76 regno >>= 2;
@@ -555,45 +555,115 @@ static int ptrace_set_debugreg(struct task_struct *child,
555 return 0; 555 return 0;
556} 556}
557 557
558#ifdef X86_BTS 558#ifdef CONFIG_X86_PTRACE_BTS
559/*
560 * The configuration for a particular BTS hardware implementation.
561 */
562struct bts_configuration {
563 /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
564 unsigned char sizeof_bts;
565 /* the size of a field in the BTS record in bytes */
566 unsigned char sizeof_field;
567 /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
568 unsigned long debugctl_mask;
569};
570static struct bts_configuration bts_cfg;
571
572#define BTS_MAX_RECORD_SIZE (8 * 3)
573
574
575/*
576 * Branch Trace Store (BTS) uses the following format. Different
577 * architectures vary in the size of those fields.
578 * - source linear address
579 * - destination linear address
580 * - flags
581 *
582 * Later architectures use 64bit pointers throughout, whereas earlier
583 * architectures use 32bit pointers in 32bit mode.
584 *
585 * We compute the base address for the first 8 fields based on:
586 * - the field size stored in the DS configuration
587 * - the relative field position
588 *
589 * In order to store additional information in the BTS buffer, we use
590 * a special source address to indicate that the record requires
591 * special interpretation.
592 *
593 * Netburst indicated via a bit in the flags field whether the branch
594 * was predicted; this is ignored.
595 */
596
597enum bts_field {
598 bts_from = 0,
599 bts_to,
600 bts_flags,
601
602 bts_escape = (unsigned long)-1,
603 bts_qual = bts_to,
604 bts_jiffies = bts_flags
605};
559 606
560static int ptrace_bts_get_size(struct task_struct *child) 607static inline unsigned long bts_get(const char *base, enum bts_field field)
561{ 608{
562 if (!child->thread.ds_area_msr) 609 base += (bts_cfg.sizeof_field * field);
563 return -ENXIO; 610 return *(unsigned long *)base;
611}
612
613static inline void bts_set(char *base, enum bts_field field, unsigned long val)
614{
615 base += (bts_cfg.sizeof_field * field);;
616 (*(unsigned long *)base) = val;
617}
564 618
565 return ds_get_bts_index((void *)child->thread.ds_area_msr); 619/*
620 * Translate a BTS record from the raw format into the bts_struct format
621 *
622 * out (out): bts_struct interpretation
623 * raw: raw BTS record
624 */
625static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
626{
627 memset(out, 0, sizeof(*out));
628 if (bts_get(raw, bts_from) == bts_escape) {
629 out->qualifier = bts_get(raw, bts_qual);
630 out->variant.jiffies = bts_get(raw, bts_jiffies);
631 } else {
632 out->qualifier = BTS_BRANCH;
633 out->variant.lbr.from_ip = bts_get(raw, bts_from);
634 out->variant.lbr.to_ip = bts_get(raw, bts_to);
635 }
566} 636}
567 637
568static int ptrace_bts_read_record(struct task_struct *child, 638static int ptrace_bts_read_record(struct task_struct *child, size_t index,
569 long index,
570 struct bts_struct __user *out) 639 struct bts_struct __user *out)
571{ 640{
572 struct bts_struct ret; 641 struct bts_struct ret;
573 int retval; 642 const void *bts_record;
574 int bts_end; 643 size_t bts_index, bts_end;
575 int bts_index; 644 int error;
576 645
577 if (!child->thread.ds_area_msr) 646 error = ds_get_bts_end(child, &bts_end);
578 return -ENXIO; 647 if (error < 0)
648 return error;
579 649
580 if (index < 0)
581 return -EINVAL;
582
583 bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
584 if (bts_end <= index) 650 if (bts_end <= index)
585 return -EINVAL; 651 return -EINVAL;
586 652
653 error = ds_get_bts_index(child, &bts_index);
654 if (error < 0)
655 return error;
656
587 /* translate the ptrace bts index into the ds bts index */ 657 /* translate the ptrace bts index into the ds bts index */
588 bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); 658 bts_index += bts_end - (index + 1);
589 bts_index -= (index + 1); 659 if (bts_end <= bts_index)
590 if (bts_index < 0) 660 bts_index -= bts_end;
591 bts_index += bts_end; 661
662 error = ds_access_bts(child, bts_index, &bts_record);
663 if (error < 0)
664 return error;
592 665
593 retval = ds_read_bts((void *)child->thread.ds_area_msr, 666 ptrace_bts_translate_record(&ret, bts_record);
594 bts_index, &ret);
595 if (retval < 0)
596 return retval;
597 667
598 if (copy_to_user(out, &ret, sizeof(ret))) 668 if (copy_to_user(out, &ret, sizeof(ret)))
599 return -EFAULT; 669 return -EFAULT;
@@ -601,101 +671,106 @@ static int ptrace_bts_read_record(struct task_struct *child,
601 return sizeof(ret); 671 return sizeof(ret);
602} 672}
603 673
604static int ptrace_bts_clear(struct task_struct *child)
605{
606 if (!child->thread.ds_area_msr)
607 return -ENXIO;
608
609 return ds_clear((void *)child->thread.ds_area_msr);
610}
611
612static int ptrace_bts_drain(struct task_struct *child, 674static int ptrace_bts_drain(struct task_struct *child,
613 long size, 675 long size,
614 struct bts_struct __user *out) 676 struct bts_struct __user *out)
615{ 677{
616 int end, i; 678 struct bts_struct ret;
617 void *ds = (void *)child->thread.ds_area_msr; 679 const unsigned char *raw;
618 680 size_t end, i;
619 if (!ds) 681 int error;
620 return -ENXIO;
621 682
622 end = ds_get_bts_index(ds); 683 error = ds_get_bts_index(child, &end);
623 if (end <= 0) 684 if (error < 0)
624 return end; 685 return error;
625 686
626 if (size < (end * sizeof(struct bts_struct))) 687 if (size < (end * sizeof(struct bts_struct)))
627 return -EIO; 688 return -EIO;
628 689
629 for (i = 0; i < end; i++, out++) { 690 error = ds_access_bts(child, 0, (const void **)&raw);
630 struct bts_struct ret; 691 if (error < 0)
631 int retval; 692 return error;
632 693
633 retval = ds_read_bts(ds, i, &ret); 694 for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
634 if (retval < 0) 695 ptrace_bts_translate_record(&ret, raw);
635 return retval;
636 696
637 if (copy_to_user(out, &ret, sizeof(ret))) 697 if (copy_to_user(out, &ret, sizeof(ret)))
638 return -EFAULT; 698 return -EFAULT;
639 } 699 }
640 700
641 ds_clear(ds); 701 error = ds_clear_bts(child);
702 if (error < 0)
703 return error;
642 704
643 return end; 705 return end;
644} 706}
645 707
708static void ptrace_bts_ovfl(struct task_struct *child)
709{
710 send_sig(child->thread.bts_ovfl_signal, child, 0);
711}
712
646static int ptrace_bts_config(struct task_struct *child, 713static int ptrace_bts_config(struct task_struct *child,
647 long cfg_size, 714 long cfg_size,
648 const struct ptrace_bts_config __user *ucfg) 715 const struct ptrace_bts_config __user *ucfg)
649{ 716{
650 struct ptrace_bts_config cfg; 717 struct ptrace_bts_config cfg;
651 int bts_size, ret = 0; 718 int error = 0;
652 void *ds; 719
720 error = -EOPNOTSUPP;
721 if (!bts_cfg.sizeof_bts)
722 goto errout;
653 723
724 error = -EIO;
654 if (cfg_size < sizeof(cfg)) 725 if (cfg_size < sizeof(cfg))
655 return -EIO; 726 goto errout;
656 727
728 error = -EFAULT;
657 if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 729 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
658 return -EFAULT; 730 goto errout;
659 731
660 if ((int)cfg.size < 0) 732 error = -EINVAL;
661 return -EINVAL; 733 if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
734 !(cfg.flags & PTRACE_BTS_O_ALLOC))
735 goto errout;
662 736
663 bts_size = 0; 737 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
664 ds = (void *)child->thread.ds_area_msr; 738 ds_ovfl_callback_t ovfl = NULL;
665 if (ds) { 739 unsigned int sig = 0;
666 bts_size = ds_get_bts_size(ds); 740
667 if (bts_size < 0) 741 /* we ignore the error in case we were not tracing child */
668 return bts_size; 742 (void)ds_release_bts(child);
669 } 743
670 cfg.size = PAGE_ALIGN(cfg.size); 744 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
745 if (!cfg.signal)
746 goto errout;
747
748 sig = cfg.signal;
749 ovfl = ptrace_bts_ovfl;
750 }
671 751
672 if (bts_size != cfg.size) { 752 error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
673 ret = ptrace_bts_realloc(child, cfg.size, 753 if (error < 0)
674 cfg.flags & PTRACE_BTS_O_CUT_SIZE);
675 if (ret < 0)
676 goto errout; 754 goto errout;
677 755
678 ds = (void *)child->thread.ds_area_msr; 756 child->thread.bts_ovfl_signal = sig;
679 } 757 }
680 758
681 if (cfg.flags & PTRACE_BTS_O_SIGNAL) 759 error = -EINVAL;
682 ret = ds_set_overflow(ds, DS_O_SIGNAL); 760 if (!child->thread.ds_ctx && cfg.flags)
683 else
684 ret = ds_set_overflow(ds, DS_O_WRAP);
685 if (ret < 0)
686 goto errout; 761 goto errout;
687 762
688 if (cfg.flags & PTRACE_BTS_O_TRACE) 763 if (cfg.flags & PTRACE_BTS_O_TRACE)
689 child->thread.debugctlmsr |= ds_debugctl_mask(); 764 child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
690 else 765 else
691 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 766 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
692 767
693 if (cfg.flags & PTRACE_BTS_O_SCHED) 768 if (cfg.flags & PTRACE_BTS_O_SCHED)
694 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 769 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
695 else 770 else
696 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 771 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
697 772
698 ret = sizeof(cfg); 773 error = sizeof(cfg);
699 774
700out: 775out:
701 if (child->thread.debugctlmsr) 776 if (child->thread.debugctlmsr)
@@ -703,10 +778,10 @@ out:
703 else 778 else
704 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 779 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
705 780
706 return ret; 781 return error;
707 782
708errout: 783errout:
709 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 784 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
710 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 785 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
711 goto out; 786 goto out;
712} 787}
@@ -715,29 +790,40 @@ static int ptrace_bts_status(struct task_struct *child,
715 long cfg_size, 790 long cfg_size,
716 struct ptrace_bts_config __user *ucfg) 791 struct ptrace_bts_config __user *ucfg)
717{ 792{
718 void *ds = (void *)child->thread.ds_area_msr;
719 struct ptrace_bts_config cfg; 793 struct ptrace_bts_config cfg;
794 size_t end;
795 const void *base, *max;
796 int error;
720 797
721 if (cfg_size < sizeof(cfg)) 798 if (cfg_size < sizeof(cfg))
722 return -EIO; 799 return -EIO;
723 800
724 memset(&cfg, 0, sizeof(cfg)); 801 error = ds_get_bts_end(child, &end);
802 if (error < 0)
803 return error;
725 804
726 if (ds) { 805 error = ds_access_bts(child, /* index = */ 0, &base);
727 cfg.size = ds_get_bts_size(ds); 806 if (error < 0)
807 return error;
728 808
729 if (ds_get_overflow(ds) == DS_O_SIGNAL) 809 error = ds_access_bts(child, /* index = */ end, &max);
730 cfg.flags |= PTRACE_BTS_O_SIGNAL; 810 if (error < 0)
811 return error;
731 812
732 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && 813 memset(&cfg, 0, sizeof(cfg));
733 child->thread.debugctlmsr & ds_debugctl_mask()) 814 cfg.size = (max - base);
734 cfg.flags |= PTRACE_BTS_O_TRACE; 815 cfg.signal = child->thread.bts_ovfl_signal;
816 cfg.bts_size = sizeof(struct bts_struct);
735 817
736 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) 818 if (cfg.signal)
737 cfg.flags |= PTRACE_BTS_O_SCHED; 819 cfg.flags |= PTRACE_BTS_O_SIGNAL;
738 }
739 820
740 cfg.bts_size = sizeof(struct bts_struct); 821 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
822 child->thread.debugctlmsr & bts_cfg.debugctl_mask)
823 cfg.flags |= PTRACE_BTS_O_TRACE;
824
825 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
826 cfg.flags |= PTRACE_BTS_O_SCHED;
741 827
742 if (copy_to_user(ucfg, &cfg, sizeof(cfg))) 828 if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
743 return -EFAULT; 829 return -EFAULT;
@@ -745,89 +831,38 @@ static int ptrace_bts_status(struct task_struct *child,
745 return sizeof(cfg); 831 return sizeof(cfg);
746} 832}
747 833
748
749static int ptrace_bts_write_record(struct task_struct *child, 834static int ptrace_bts_write_record(struct task_struct *child,
750 const struct bts_struct *in) 835 const struct bts_struct *in)
751{ 836{
752 int retval; 837 unsigned char bts_record[BTS_MAX_RECORD_SIZE];
753 838
754 if (!child->thread.ds_area_msr) 839 BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
755 return -ENXIO;
756 840
757 retval = ds_write_bts((void *)child->thread.ds_area_msr, in); 841 memset(bts_record, 0, bts_cfg.sizeof_bts);
758 if (retval) 842 switch (in->qualifier) {
759 return retval; 843 case BTS_INVALID:
844 break;
760 845
761 return sizeof(*in); 846 case BTS_BRANCH:
762} 847 bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
848 bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
849 break;
763 850
764static int ptrace_bts_realloc(struct task_struct *child, 851 case BTS_TASK_ARRIVES:
765 int size, int reduce_size) 852 case BTS_TASK_DEPARTS:
766{ 853 bts_set(bts_record, bts_from, bts_escape);
767 unsigned long rlim, vm; 854 bts_set(bts_record, bts_qual, in->qualifier);
768 int ret, old_size; 855 bts_set(bts_record, bts_jiffies, in->variant.jiffies);
856 break;
769 857
770 if (size < 0) 858 default:
771 return -EINVAL; 859 return -EINVAL;
772
773 old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
774 if (old_size < 0)
775 return old_size;
776
777 ret = ds_free((void **)&child->thread.ds_area_msr);
778 if (ret < 0)
779 goto out;
780
781 size >>= PAGE_SHIFT;
782 old_size >>= PAGE_SHIFT;
783
784 current->mm->total_vm -= old_size;
785 current->mm->locked_vm -= old_size;
786
787 if (size == 0)
788 goto out;
789
790 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
791 vm = current->mm->total_vm + size;
792 if (rlim < vm) {
793 ret = -ENOMEM;
794
795 if (!reduce_size)
796 goto out;
797
798 size = rlim - current->mm->total_vm;
799 if (size <= 0)
800 goto out;
801 } 860 }
802 861
803 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 862 /* The writing task will be the switched-to task on a context
804 vm = current->mm->locked_vm + size; 863 * switch. It needs to write into the switched-from task's BTS
805 if (rlim < vm) { 864 * buffer. */
806 ret = -ENOMEM; 865 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
807
808 if (!reduce_size)
809 goto out;
810
811 size = rlim - current->mm->locked_vm;
812 if (size <= 0)
813 goto out;
814 }
815
816 ret = ds_allocate((void **)&child->thread.ds_area_msr,
817 size << PAGE_SHIFT);
818 if (ret < 0)
819 goto out;
820
821 current->mm->total_vm += size;
822 current->mm->locked_vm += size;
823
824out:
825 if (child->thread.ds_area_msr)
826 set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
827 else
828 clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
829
830 return ret;
831} 866}
832 867
833void ptrace_bts_take_timestamp(struct task_struct *tsk, 868void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -840,7 +875,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
840 875
841 ptrace_bts_write_record(tsk, &rec); 876 ptrace_bts_write_record(tsk, &rec);
842} 877}
843#endif /* X86_BTS */ 878
879static const struct bts_configuration bts_cfg_netburst = {
880 .sizeof_bts = sizeof(long) * 3,
881 .sizeof_field = sizeof(long),
882 .debugctl_mask = (1<<2)|(1<<3)|(1<<5)
883};
884
885static const struct bts_configuration bts_cfg_pentium_m = {
886 .sizeof_bts = sizeof(long) * 3,
887 .sizeof_field = sizeof(long),
888 .debugctl_mask = (1<<6)|(1<<7)
889};
890
891static const struct bts_configuration bts_cfg_core2 = {
892 .sizeof_bts = 8 * 3,
893 .sizeof_field = 8,
894 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
895};
896
897static inline void bts_configure(const struct bts_configuration *cfg)
898{
899 bts_cfg = *cfg;
900}
901
902void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
903{
904 switch (c->x86) {
905 case 0x6:
906 switch (c->x86_model) {
907 case 0xD:
908 case 0xE: /* Pentium M */
909 bts_configure(&bts_cfg_pentium_m);
910 break;
911 case 0xF: /* Core2 */
912 case 0x1C: /* Atom */
913 bts_configure(&bts_cfg_core2);
914 break;
915 default:
916 /* sorry, don't know about them */
917 break;
918 }
919 break;
920 case 0xF:
921 switch (c->x86_model) {
922 case 0x0:
923 case 0x1:
924 case 0x2: /* Netburst */
925 bts_configure(&bts_cfg_netburst);
926 break;
927 default:
928 /* sorry, don't know about them */
929 break;
930 }
931 break;
932 default:
933 /* sorry, don't know about them */
934 break;
935 }
936}
937#endif /* CONFIG_X86_PTRACE_BTS */
844 938
845/* 939/*
846 * Called by kernel/ptrace.c when detaching.. 940 * Called by kernel/ptrace.c when detaching..
@@ -853,15 +947,15 @@ void ptrace_disable(struct task_struct *child)
853#ifdef TIF_SYSCALL_EMU 947#ifdef TIF_SYSCALL_EMU
854 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 948 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
855#endif 949#endif
856 if (child->thread.ds_area_msr) { 950#ifdef CONFIG_X86_PTRACE_BTS
857#ifdef X86_BTS 951 (void)ds_release_bts(child);
858 ptrace_bts_realloc(child, 0, 0); 952
859#endif 953 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
860 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 954 if (!child->thread.debugctlmsr)
861 if (!child->thread.debugctlmsr) 955 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
862 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 956
863 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 957 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
864 } 958#endif /* CONFIG_X86_PTRACE_BTS */
865} 959}
866 960
867#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 961#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -981,7 +1075,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
981 /* 1075 /*
982 * These bits need more cooking - not enabled yet: 1076 * These bits need more cooking - not enabled yet:
983 */ 1077 */
984#ifdef X86_BTS 1078#ifdef CONFIG_X86_PTRACE_BTS
985 case PTRACE_BTS_CONFIG: 1079 case PTRACE_BTS_CONFIG:
986 ret = ptrace_bts_config 1080 ret = ptrace_bts_config
987 (child, data, (struct ptrace_bts_config __user *)addr); 1081 (child, data, (struct ptrace_bts_config __user *)addr);
@@ -993,7 +1087,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
993 break; 1087 break;
994 1088
995 case PTRACE_BTS_SIZE: 1089 case PTRACE_BTS_SIZE:
996 ret = ptrace_bts_get_size(child); 1090 ret = ds_get_bts_index(child, /* pos = */ NULL);
997 break; 1091 break;
998 1092
999 case PTRACE_BTS_GET: 1093 case PTRACE_BTS_GET:
@@ -1002,14 +1096,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1002 break; 1096 break;
1003 1097
1004 case PTRACE_BTS_CLEAR: 1098 case PTRACE_BTS_CLEAR:
1005 ret = ptrace_bts_clear(child); 1099 ret = ds_clear_bts(child);
1006 break; 1100 break;
1007 1101
1008 case PTRACE_BTS_DRAIN: 1102 case PTRACE_BTS_DRAIN:
1009 ret = ptrace_bts_drain 1103 ret = ptrace_bts_drain
1010 (child, data, (struct bts_struct __user *) addr); 1104 (child, data, (struct bts_struct __user *) addr);
1011 break; 1105 break;
1012#endif 1106#endif /* CONFIG_X86_PTRACE_BTS */
1013 1107
1014 default: 1108 default:
1015 ret = ptrace_request(child, request, addr, data); 1109 ret = ptrace_request(child, request, addr, data);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 362d4e7f2d38..9838f2539dfc 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -670,6 +670,10 @@ void __init setup_arch(char **cmdline_p)
670 670
671 parse_early_param(); 671 parse_early_param();
672 672
673#ifdef CONFIG_X86_64
674 check_efer();
675#endif
676
673#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) 677#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
674 /* 678 /*
675 * Must be before kernel pagetables are setup 679 * Must be before kernel pagetables are setup
@@ -738,7 +742,6 @@ void __init setup_arch(char **cmdline_p)
738#else 742#else
739 num_physpages = max_pfn; 743 num_physpages = max_pfn;
740 744
741 check_efer();
742 745
743 /* How many end-of-memory variables you have, grandma! */ 746 /* How many end-of-memory variables you have, grandma! */
744 /* need this before calling reserve_initrd */ 747 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 4445d26efd47..2a2435d3037d 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -27,6 +27,7 @@
27#include <asm/uaccess.h> 27#include <asm/uaccess.h>
28#include <asm/i387.h> 28#include <asm/i387.h>
29#include <asm/vdso.h> 29#include <asm/vdso.h>
30#include <asm/syscalls.h>
30 31
31#include "sigframe.h" 32#include "sigframe.h"
32 33
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 1e1933892b4f..4d32487805ef 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -28,6 +28,7 @@
28#include <asm/ia32_unistd.h> 28#include <asm/ia32_unistd.h>
29#include <asm/mce.h> 29#include <asm/mce.h>
30#include <asm/syscall.h> 30#include <asm/syscall.h>
31#include <asm/syscalls.h>
31#include "sigframe.h" 32#include "sigframe.h"
32 33
33#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) 34#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b3f916..a66c93550a0d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
88#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) 88#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
89#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) 89#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
90#else 90#else
91struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; 91static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
92#define get_idle_for_cpu(x) (idle_thread_array[(x)]) 92#define get_idle_for_cpu(x) (idle_thread_array[(x)])
93#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) 93#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
94#endif 94#endif
@@ -129,7 +129,7 @@ static int boot_cpu_logical_apicid;
129static cpumask_t cpu_sibling_setup_map; 129static cpumask_t cpu_sibling_setup_map;
130 130
131/* Set if we find a B stepping CPU */ 131/* Set if we find a B stepping CPU */
132int __cpuinitdata smp_b_stepping; 132static int __cpuinitdata smp_b_stepping;
133 133
134#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) 134#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
135 135
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 7066cb855a60..1884a8d12bfa 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -22,6 +22,8 @@
22#include <linux/uaccess.h> 22#include <linux/uaccess.h>
23#include <linux/unistd.h> 23#include <linux/unistd.h>
24 24
25#include <asm/syscalls.h>
26
25asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, 27asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
26 unsigned long prot, unsigned long flags, 28 unsigned long prot, unsigned long flags,
27 unsigned long fd, unsigned long pgoff) 29 unsigned long fd, unsigned long pgoff)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 3b360ef33817..c9288c883e20 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -16,6 +16,7 @@
16 16
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <asm/ia32.h> 18#include <asm/ia32.h>
19#include <asm/syscalls.h>
19 20
20asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, 21asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
21 unsigned long fd, unsigned long off) 22 unsigned long fd, unsigned long off)
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 170d43c17487..3d1be4f0fac5 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -8,12 +8,12 @@
8#define __NO_STUBS 8#define __NO_STUBS
9 9
10#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 10#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
11#undef _ASM_X86_64_UNISTD_H_ 11#undef ASM_X86__UNISTD_64_H
12#include <asm/unistd_64.h> 12#include <asm/unistd_64.h>
13 13
14#undef __SYSCALL 14#undef __SYSCALL
15#define __SYSCALL(nr, sym) [nr] = sym, 15#define __SYSCALL(nr, sym) [nr] = sym,
16#undef _ASM_X86_64_UNISTD_H_ 16#undef ASM_X86__UNISTD_64_H
17 17
18typedef void (*sys_call_ptr_t)(void); 18typedef void (*sys_call_ptr_t)(void);
19 19
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index ffe3c664afc0..bbecf8b6bf96 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -36,6 +36,7 @@
36#include <asm/arch_hooks.h> 36#include <asm/arch_hooks.h>
37#include <asm/hpet.h> 37#include <asm/hpet.h>
38#include <asm/time.h> 38#include <asm/time.h>
39#include <asm/timer.h>
39 40
40#include "do_timer.h" 41#include "do_timer.h"
41 42
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index ab6bf375a307..6bb7b8579e70 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -10,6 +10,7 @@
10#include <asm/ldt.h> 10#include <asm/ldt.h>
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/syscalls.h>
13 14
14#include "tls.h" 15#include "tls.h"
15 16
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 38f566fa27d2..4eeb5cf9720d 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -46,6 +46,7 @@
46#include <asm/io.h> 46#include <asm/io.h>
47#include <asm/tlbflush.h> 47#include <asm/tlbflush.h>
48#include <asm/irq.h> 48#include <asm/irq.h>
49#include <asm/syscalls.h>
49 50
50/* 51/*
51 * Known problems: 52 * Known problems:
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 6ca515d6db54..edfb09f30479 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
235 const void *desc) 235 const void *desc)
236{ 236{
237 u32 *ldt_entry = (u32 *)desc; 237 u32 *ldt_entry = (u32 *)desc;
238 vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); 238 vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
239} 239}
240 240
241static void vmi_load_sp0(struct tss_struct *tss, 241static void vmi_load_sp0(struct tss_struct *tss,
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 0c029e8959c7..7766d36983fc 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -61,7 +61,7 @@ static void vsmp_irq_enable(void)
61 native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); 61 native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
62} 62}
63 63
64static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, 64static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
65 unsigned long addr, unsigned len) 65 unsigned long addr, unsigned len)
66{ 66{
67 switch (type) { 67 switch (type) {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0bfe2bd305eb..3da2508eb22a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -711,6 +711,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
711 u64 *spte; 711 u64 *spte;
712 int young = 0; 712 int young = 0;
713 713
714 /* always return old for EPT */
715 if (!shadow_accessed_mask)
716 return 0;
717
714 spte = rmap_next(kvm, rmapp, NULL); 718 spte = rmap_next(kvm, rmapp, NULL);
715 while (spte) { 719 while (spte) {
716 int _young; 720 int _young;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e2ee264740c7..8233b86c778c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -62,6 +62,7 @@ static int npt = 1;
62module_param(npt, int, S_IRUGO); 62module_param(npt, int, S_IRUGO);
63 63
64static void kvm_reput_irq(struct vcpu_svm *svm); 64static void kvm_reput_irq(struct vcpu_svm *svm);
65static void svm_flush_tlb(struct kvm_vcpu *vcpu);
65 66
66static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 67static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
67{ 68{
@@ -878,6 +879,10 @@ set:
878static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 879static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
879{ 880{
880 unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; 881 unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
882 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
883
884 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
885 force_new_asid(vcpu);
881 886
882 vcpu->arch.cr4 = cr4; 887 vcpu->arch.cr4 = cr4;
883 if (!npt_enabled) 888 if (!npt_enabled)
@@ -1027,6 +1032,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1027 KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code, 1032 KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
1028 (u32)fault_address, (u32)(fault_address >> 32), 1033 (u32)fault_address, (u32)(fault_address >> 32),
1029 handler); 1034 handler);
1035 /*
1036 * FIXME: Tis shouldn't be necessary here, but there is a flush
1037 * missing in the MMU code. Until we find this bug, flush the
1038 * complete TLB here on an NPF
1039 */
1040 if (npt_enabled)
1041 svm_flush_tlb(&svm->vcpu);
1030 1042
1031 if (event_injection) 1043 if (event_injection)
1032 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); 1044 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2a69773e3b26..7041cc52b562 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3301,8 +3301,7 @@ static int __init vmx_init(void)
3301 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3301 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3302 VMX_EPT_WRITABLE_MASK | 3302 VMX_EPT_WRITABLE_MASK |
3303 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); 3303 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3304 kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, 3304 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3305 VMX_EPT_FAKE_DIRTY_MASK, 0ull,
3306 VMX_EPT_EXECUTABLE_MASK); 3305 VMX_EPT_EXECUTABLE_MASK);
3307 kvm_enable_tdp(); 3306 kvm_enable_tdp();
3308 } else 3307 } else
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 425a13436b3f..23e8373507ad 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -370,8 +370,6 @@ enum vmcs_field {
370#define VMX_EPT_READABLE_MASK 0x1ull 370#define VMX_EPT_READABLE_MASK 0x1ull
371#define VMX_EPT_WRITABLE_MASK 0x2ull 371#define VMX_EPT_WRITABLE_MASK 0x2ull
372#define VMX_EPT_EXECUTABLE_MASK 0x4ull 372#define VMX_EPT_EXECUTABLE_MASK 0x4ull
373#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62)
374#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63)
375 373
376#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 374#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
377 375
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 3d317836be9e..3f2cf11f201a 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -10,13 +10,15 @@
10#include <asm/e820.h> 10#include <asm/e820.h>
11#include <asm/setup.h> 11#include <asm/setup.h>
12 12
13#include <mach_ipi.h>
14
13#ifdef CONFIG_HOTPLUG_CPU 15#ifdef CONFIG_HOTPLUG_CPU
14#define DEFAULT_SEND_IPI (1) 16#define DEFAULT_SEND_IPI (1)
15#else 17#else
16#define DEFAULT_SEND_IPI (0) 18#define DEFAULT_SEND_IPI (0)
17#endif 19#endif
18 20
19int no_broadcast=DEFAULT_SEND_IPI; 21int no_broadcast = DEFAULT_SEND_IPI;
20 22
21/** 23/**
22 * pre_intr_init_hook - initialisation prior to setting up interrupt vectors 24 * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe67b42..8f92cac4e6db 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -35,6 +35,7 @@
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm-generic/sections.h> 37#include <asm-generic/sections.h>
38#include <asm/traps.h>
38 39
39/* 40/*
40 * Page fault error code bits 41 * Page fault error code bits
@@ -357,8 +358,6 @@ static int is_errata100(struct pt_regs *regs, unsigned long address)
357 return 0; 358 return 0;
358} 359}
359 360
360void do_invalid_op(struct pt_regs *, unsigned long);
361
362static int is_f00f_bug(struct pt_regs *regs, unsigned long address) 361static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
363{ 362{
364#ifdef CONFIG_X86_F00F_BUG 363#ifdef CONFIG_X86_F00F_BUG
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d37f29376b0c..6b9a9358b330 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -47,6 +47,7 @@
47#include <asm/paravirt.h> 47#include <asm/paravirt.h>
48#include <asm/setup.h> 48#include <asm/setup.h>
49#include <asm/cacheflush.h> 49#include <asm/cacheflush.h>
50#include <asm/smp.h>
50 51
51unsigned int __VMALLOC_RESERVE = 128 << 20; 52unsigned int __VMALLOC_RESERVE = 128 << 20;
52 53
@@ -458,11 +459,7 @@ static void __init pagetable_init(void)
458{ 459{
459 pgd_t *pgd_base = swapper_pg_dir; 460 pgd_t *pgd_base = swapper_pg_dir;
460 461
461 paravirt_pagetable_setup_start(pgd_base);
462
463 permanent_kmaps_init(pgd_base); 462 permanent_kmaps_init(pgd_base);
464
465 paravirt_pagetable_setup_done(pgd_base);
466} 463}
467 464
468#ifdef CONFIG_ACPI_SLEEP 465#ifdef CONFIG_ACPI_SLEEP
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4b6e6a29ae3..cac6da54203b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -421,7 +421,7 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
421 return; 421 return;
422} 422}
423 423
424int __initdata early_ioremap_debug; 424static int __initdata early_ioremap_debug;
425 425
426static int __init early_ioremap_debug_setup(char *str) 426static int __init early_ioremap_debug_setup(char *str)
427{ 427{
@@ -547,7 +547,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
547} 547}
548 548
549 549
550int __initdata early_ioremap_nested; 550static int __initdata early_ioremap_nested;
551 551
552static int __init check_early_ioremap_leak(void) 552static int __init check_early_ioremap_leak(void)
553{ 553{
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 0227694f7dab..8a5f1614a3d5 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -295,10 +295,12 @@ static void nmi_cpu_shutdown(void *dummy)
295 295
296static void nmi_shutdown(void) 296static void nmi_shutdown(void)
297{ 297{
298 struct op_msrs *msrs = &get_cpu_var(cpu_msrs); 298 struct op_msrs *msrs;
299
299 nmi_enabled = 0; 300 nmi_enabled = 0;
300 on_each_cpu(nmi_cpu_shutdown, NULL, 1); 301 on_each_cpu(nmi_cpu_shutdown, NULL, 1);
301 unregister_die_notifier(&profile_exceptions_nb); 302 unregister_die_notifier(&profile_exceptions_nb);
303 msrs = &get_cpu_var(cpu_msrs);
302 model->shutdown(msrs); 304 model->shutdown(msrs);
303 free_msrs(); 305 free_msrs();
304 put_cpu_var(cpu_msrs); 306 put_cpu_var(cpu_msrs);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9ff6e3cbf08f..a4e201b47f64 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1324,7 +1324,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1324 .ptep_modify_prot_commit = __ptep_modify_prot_commit, 1324 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
1325 1325
1326 .pte_val = xen_pte_val, 1326 .pte_val = xen_pte_val,
1327 .pte_flags = native_pte_val, 1327 .pte_flags = native_pte_flags,
1328 .pgd_val = xen_pgd_val, 1328 .pgd_val = xen_pgd_val,
1329 1329
1330 .make_pte = xen_make_pte, 1330 .make_pte = xen_make_pte,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b6acc3a0af46..d67901083888 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -42,7 +42,7 @@ char * __init xen_memory_setup(void)
42 42
43 e820.nr_map = 0; 43 e820.nr_map = 0;
44 44
45 e820_add_region(0, PFN_PHYS(max_pfn), E820_RAM); 45 e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
46 46
47 /* 47 /*
48 * Even though this is normal, usable memory under Xen, reserve 48 * Even though this is normal, usable memory under Xen, reserve