aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/apic_64.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c15
-rw-r--r--arch/x86/kernel/entry_32.S1
-rw-r--r--arch/x86/kernel/geode_32.c5
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/i387.c44
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic_32.c12
-rw-r--r--arch/x86/kernel/kvmclock.c93
-rw-r--r--arch/x86/kernel/mfgpt_32.c2
-rw-r--r--arch/x86/kernel/nmi_32.c9
-rw-r--r--arch/x86/kernel/pci-dma.c14
-rw-r--r--arch/x86/kernel/pci-gart_64.c31
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c6
-rw-r--r--arch/x86/kernel/pvclock.c141
-rw-r--r--arch/x86/kernel/rtc.c34
-rw-r--r--arch/x86/kernel/setup_32.c10
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kernel/traps_32.c1
-rw-r--r--arch/x86/kernel/tsc_32.c23
-rw-r--r--arch/x86/kernel/tsc_64.c5
25 files changed, 360 insertions, 121 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b4720..77807d4769c9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
82obj-$(CONFIG_KVM_GUEST) += kvm.o 82obj-$(CONFIG_KVM_GUEST) += kvm.o
83obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 83obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
84obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 84obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
85obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
85 86
86obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 87obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
87 88
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c49ebcc6c41e..33c5216fd3e1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -242,12 +242,19 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
242 242
243static void __cpuinit acpi_register_lapic(int id, u8 enabled) 243static void __cpuinit acpi_register_lapic(int id, u8 enabled)
244{ 244{
245 unsigned int ver = 0;
246
245 if (!enabled) { 247 if (!enabled) {
246 ++disabled_cpus; 248 ++disabled_cpus;
247 return; 249 return;
248 } 250 }
249 251
250 generic_processor_info(id, 0); 252#ifdef CONFIG_X86_32
253 if (boot_cpu_physical_apicid != -1U)
254 ver = apic_version[boot_cpu_physical_apicid];
255#endif
256
257 generic_processor_info(id, ver);
251} 258}
252 259
253static int __init 260static int __init
@@ -767,8 +774,13 @@ static void __init acpi_register_lapic_address(unsigned long address)
767 mp_lapic_addr = address; 774 mp_lapic_addr = address;
768 775
769 set_fixmap_nocache(FIX_APIC_BASE, address); 776 set_fixmap_nocache(FIX_APIC_BASE, address);
770 if (boot_cpu_physical_apicid == -1U) 777 if (boot_cpu_physical_apicid == -1U) {
771 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 778 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
779#ifdef CONFIG_X86_32
780 apic_version[boot_cpu_physical_apicid] =
781 GET_APIC_VERSION(apic_read(APIC_LVR));
782#endif
783 }
772} 784}
773 785
774static int __init early_acpi_parse_madt_lapic_addr_ovr(void) 786static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 5910020c3f24..0633cfd0dc29 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -534,7 +534,7 @@ int setup_profiling_timer(unsigned int multiplier)
534 */ 534 */
535void clear_local_APIC(void) 535void clear_local_APIC(void)
536{ 536{
537 int maxlvt = lapic_get_maxlvt(); 537 int maxlvt;
538 u32 v; 538 u32 v;
539 539
540 /* APIC hasn't been mapped yet */ 540 /* APIC hasn't been mapped yet */
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index af4a867a097c..777a7ff075de 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -245,7 +245,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
245 if ((ecx > 95) || (ecx == 0) || (eax < ebx)) 245 if ((ecx > 95) || (ecx == 0) || (eax < ebx))
246 return -EIO; 246 return -EIO;
247 247
248 edx = (eax - ebx) / (100 - ecx); 248 edx = ((eax - ebx) * 100) / (100 - ecx);
249 *low_freq = edx * 1000; /* back to kHz */ 249 *low_freq = edx * 1000; /* back to kHz */
250 250
251 dprintk("low frequency is %u kHz\n", *low_freq); 251 dprintk("low frequency is %u kHz\n", *low_freq);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 46d4034d9f37..206791eb46e3 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1127,12 +1127,23 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1127 * an UP version, and is deprecated by AMD. 1127 * an UP version, and is deprecated by AMD.
1128 */ 1128 */
1129 if (num_online_cpus() != 1) { 1129 if (num_online_cpus() != 1) {
1130 printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); 1130#ifndef CONFIG_ACPI_PROCESSOR
1131 printk(KERN_ERR PFX "ACPI Processor support is required "
1132 "for SMP systems but is absent. Please load the "
1133 "ACPI Processor module before starting this "
1134 "driver.\n");
1135#else
1136 printk(KERN_ERR PFX "Your BIOS does not provide ACPI "
1137 "_PSS objects in a way that Linux understands. "
1138 "Please report this to the Linux ACPI maintainers"
1139 " and complain to your BIOS vendor.\n");
1140#endif
1131 kfree(data); 1141 kfree(data);
1132 return -ENODEV; 1142 return -ENODEV;
1133 } 1143 }
1134 if (pol->cpu != 0) { 1144 if (pol->cpu != 0) {
1135 printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); 1145 printk(KERN_ERR PFX "No ACPI _PSS objects for CPU other than "
1146 "CPU0. Complain to your BIOS vendor.\n");
1136 kfree(data); 1147 kfree(data);
1137 return -ENODEV; 1148 return -ENODEV;
1138 } 1149 }
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 2a609dc3271c..c778e4fa55a2 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -248,6 +248,7 @@ ENTRY(resume_userspace)
248 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 248 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
249 # setting need_resched or sigpending 249 # setting need_resched or sigpending
250 # between sampling and the iret 250 # between sampling and the iret
251 TRACE_IRQS_OFF
251 movl TI_flags(%ebp), %ecx 252 movl TI_flags(%ebp), %ecx
252 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on 253 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
253 # int/exception return? 254 # int/exception return?
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index e8edd63ab000..9b08e852fd1a 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -166,6 +166,8 @@ int geode_has_vsa2(void)
166 static int has_vsa2 = -1; 166 static int has_vsa2 = -1;
167 167
168 if (has_vsa2 == -1) { 168 if (has_vsa2 == -1) {
169 u16 val;
170
169 /* 171 /*
170 * The VSA has virtual registers that we can query for a 172 * The VSA has virtual registers that we can query for a
171 * signature. 173 * signature.
@@ -173,7 +175,8 @@ int geode_has_vsa2(void)
173 outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); 175 outw(VSA_VR_UNLOCK, VSA_VRC_INDEX);
174 outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); 176 outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX);
175 177
176 has_vsa2 = (inw(VSA_VRC_DATA) == VSA_SIG); 178 val = inw(VSA_VRC_DATA);
179 has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG);
177 } 180 }
178 181
179 return has_vsa2; 182 return has_vsa2;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b2cc73768a9d..f7357cc0162c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -189,7 +189,7 @@ default_entry:
189 * this stage. 189 * this stage.
190 */ 190 */
191 191
192#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */ 192#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
193 193
194 xorl %ebx,%ebx /* %ebx is kept at zero */ 194 xorl %ebx,%ebx /* %ebx is kept at zero */
195 195
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index e03cc952f233..eb9ddd8efb82 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -56,6 +56,11 @@ void __cpuinit mxcsr_feature_mask_init(void)
56 56
57void __init init_thread_xstate(void) 57void __init init_thread_xstate(void)
58{ 58{
59 if (!HAVE_HWFP) {
60 xstate_size = sizeof(struct i387_soft_struct);
61 return;
62 }
63
59 if (cpu_has_fxsr) 64 if (cpu_has_fxsr)
60 xstate_size = sizeof(struct i387_fxsave_struct); 65 xstate_size = sizeof(struct i387_fxsave_struct);
61#ifdef CONFIG_X86_32 66#ifdef CONFIG_X86_32
@@ -94,7 +99,7 @@ void __cpuinit fpu_init(void)
94int init_fpu(struct task_struct *tsk) 99int init_fpu(struct task_struct *tsk)
95{ 100{
96 if (tsk_used_math(tsk)) { 101 if (tsk_used_math(tsk)) {
97 if (tsk == current) 102 if (HAVE_HWFP && tsk == current)
98 unlazy_fpu(tsk); 103 unlazy_fpu(tsk);
99 return 0; 104 return 0;
100 } 105 }
@@ -109,6 +114,15 @@ int init_fpu(struct task_struct *tsk)
109 return -ENOMEM; 114 return -ENOMEM;
110 } 115 }
111 116
117#ifdef CONFIG_X86_32
118 if (!HAVE_HWFP) {
119 memset(tsk->thread.xstate, 0, xstate_size);
120 finit();
121 set_stopped_child_used_math(tsk);
122 return 0;
123 }
124#endif
125
112 if (cpu_has_fxsr) { 126 if (cpu_has_fxsr) {
113 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; 127 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
114 128
@@ -330,13 +344,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
330 struct user_i387_ia32_struct env; 344 struct user_i387_ia32_struct env;
331 int ret; 345 int ret;
332 346
333 if (!HAVE_HWFP)
334 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
335
336 ret = init_fpu(target); 347 ret = init_fpu(target);
337 if (ret) 348 if (ret)
338 return ret; 349 return ret;
339 350
351 if (!HAVE_HWFP)
352 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
353
340 if (!cpu_has_fxsr) { 354 if (!cpu_has_fxsr) {
341 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 355 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
342 &target->thread.xstate->fsave, 0, 356 &target->thread.xstate->fsave, 0,
@@ -360,15 +374,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
360 struct user_i387_ia32_struct env; 374 struct user_i387_ia32_struct env;
361 int ret; 375 int ret;
362 376
363 if (!HAVE_HWFP)
364 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
365
366 ret = init_fpu(target); 377 ret = init_fpu(target);
367 if (ret) 378 if (ret)
368 return ret; 379 return ret;
369 380
370 set_stopped_child_used_math(target); 381 set_stopped_child_used_math(target);
371 382
383 if (!HAVE_HWFP)
384 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
385
372 if (!cpu_has_fxsr) { 386 if (!cpu_has_fxsr) {
373 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 387 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
374 &target->thread.xstate->fsave, 0, -1); 388 &target->thread.xstate->fsave, 0, -1);
@@ -474,18 +488,18 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
474int restore_i387_ia32(struct _fpstate_ia32 __user *buf) 488int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
475{ 489{
476 int err; 490 int err;
491 struct task_struct *tsk = current;
477 492
478 if (HAVE_HWFP) { 493 if (HAVE_HWFP)
479 struct task_struct *tsk = current;
480
481 clear_fpu(tsk); 494 clear_fpu(tsk);
482 495
483 if (!used_math()) { 496 if (!used_math()) {
484 err = init_fpu(tsk); 497 err = init_fpu(tsk);
485 if (err) 498 if (err)
486 return err; 499 return err;
487 } 500 }
488 501
502 if (HAVE_HWFP) {
489 if (cpu_has_fxsr) 503 if (cpu_has_fxsr)
490 err = restore_i387_fxsave(buf); 504 err = restore_i387_fxsave(buf);
491 else 505 else
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 3d01e47777db..a4f93b4120c1 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -11,7 +11,6 @@
11#include <asm/desc.h> 11#include <asm/desc.h>
12 12
13static struct fs_struct init_fs = INIT_FS; 13static struct fs_struct init_fs = INIT_FS;
14static struct files_struct init_files = INIT_FILES;
15static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 14static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
16static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
17struct mm_struct init_mm = INIT_MM(init_mm); 16struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index a40d54fc1fdd..4dc8600d9d20 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -2130,14 +2130,10 @@ static inline void __init check_timer(void)
2130{ 2130{
2131 int apic1, pin1, apic2, pin2; 2131 int apic1, pin1, apic2, pin2;
2132 int vector; 2132 int vector;
2133 unsigned int ver;
2134 unsigned long flags; 2133 unsigned long flags;
2135 2134
2136 local_irq_save(flags); 2135 local_irq_save(flags);
2137 2136
2138 ver = apic_read(APIC_LVR);
2139 ver = GET_APIC_VERSION(ver);
2140
2141 /* 2137 /*
2142 * get/set the timer IRQ vector: 2138 * get/set the timer IRQ vector:
2143 */ 2139 */
@@ -2150,15 +2146,11 @@ static inline void __init check_timer(void)
2150 * mode for the 8259A whenever interrupts are routed 2146 * mode for the 8259A whenever interrupts are routed
2151 * through I/O APICs. Also IRQ0 has to be enabled in 2147 * through I/O APICs. Also IRQ0 has to be enabled in
2152 * the 8259A which implies the virtual wire has to be 2148 * the 8259A which implies the virtual wire has to be
2153 * disabled in the local APIC. Finally timer interrupts 2149 * disabled in the local APIC.
2154 * need to be acknowledged manually in the 8259A for
2155 * timer_interrupt() and for the i82489DX when using
2156 * the NMI watchdog.
2157 */ 2150 */
2158 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2151 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2159 init_8259A(1); 2152 init_8259A(1);
2160 timer_ack = !cpu_has_tsc; 2153 timer_ack = 1;
2161 timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2162 if (timer_over_8254 > 0) 2154 if (timer_over_8254 > 0)
2163 enable_8259A_irq(0); 2155 enable_8259A_irq(0);
2164 2156
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 4bc1be5d5472..87edf1ceb1df 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/clocksource.h> 19#include <linux/clocksource.h>
20#include <linux/kvm_para.h> 20#include <linux/kvm_para.h>
21#include <asm/pvclock.h>
21#include <asm/arch_hooks.h> 22#include <asm/arch_hooks.h>
22#include <asm/msr.h> 23#include <asm/msr.h>
23#include <asm/apic.h> 24#include <asm/apic.h>
@@ -36,83 +37,47 @@ static int parse_no_kvmclock(char *arg)
36early_param("no-kvmclock", parse_no_kvmclock); 37early_param("no-kvmclock", parse_no_kvmclock);
37 38
38/* The hypervisor will put information about time periodically here */ 39/* The hypervisor will put information about time periodically here */
39static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); 40static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
40#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field 41static struct pvclock_wall_clock wall_clock;
41 42
42static inline u64 kvm_get_delta(u64 last_tsc)
43{
44 int cpu = smp_processor_id();
45 u64 delta = native_read_tsc() - last_tsc;
46 return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
47}
48
49static struct kvm_wall_clock wall_clock;
50static cycle_t kvm_clock_read(void);
51/* 43/*
52 * The wallclock is the time of day when we booted. Since then, some time may 44 * The wallclock is the time of day when we booted. Since then, some time may
53 * have elapsed since the hypervisor wrote the data. So we try to account for 45 * have elapsed since the hypervisor wrote the data. So we try to account for
54 * that with system time 46 * that with system time
55 */ 47 */
56unsigned long kvm_get_wallclock(void) 48static unsigned long kvm_get_wallclock(void)
57{ 49{
58 u32 wc_sec, wc_nsec; 50 struct pvclock_vcpu_time_info *vcpu_time;
59 u64 delta;
60 struct timespec ts; 51 struct timespec ts;
61 int version, nsec;
62 int low, high; 52 int low, high;
63 53
64 low = (int)__pa(&wall_clock); 54 low = (int)__pa(&wall_clock);
65 high = ((u64)__pa(&wall_clock) >> 32); 55 high = ((u64)__pa(&wall_clock) >> 32);
56 native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
66 57
67 delta = kvm_clock_read(); 58 vcpu_time = &get_cpu_var(hv_clock);
59 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
60 put_cpu_var(hv_clock);
68 61
69 native_write_msr(MSR_KVM_WALL_CLOCK, low, high); 62 return ts.tv_sec;
70 do {
71 version = wall_clock.wc_version;
72 rmb();
73 wc_sec = wall_clock.wc_sec;
74 wc_nsec = wall_clock.wc_nsec;
75 rmb();
76 } while ((wall_clock.wc_version != version) || (version & 1));
77
78 delta = kvm_clock_read() - delta;
79 delta += wc_nsec;
80 nsec = do_div(delta, NSEC_PER_SEC);
81 set_normalized_timespec(&ts, wc_sec + delta, nsec);
82 /*
83 * Of all mechanisms of time adjustment I've tested, this one
84 * was the champion!
85 */
86 return ts.tv_sec + 1;
87} 63}
88 64
89int kvm_set_wallclock(unsigned long now) 65static int kvm_set_wallclock(unsigned long now)
90{ 66{
91 return 0; 67 return -1;
92} 68}
93 69
94/*
95 * This is our read_clock function. The host puts an tsc timestamp each time
96 * it updates a new time. Without the tsc adjustment, we can have a situation
97 * in which a vcpu starts to run earlier (smaller system_time), but probes
98 * time later (compared to another vcpu), leading to backwards time
99 */
100static cycle_t kvm_clock_read(void) 70static cycle_t kvm_clock_read(void)
101{ 71{
102 u64 last_tsc, now; 72 struct pvclock_vcpu_time_info *src;
103 int cpu; 73 cycle_t ret;
104 74
105 preempt_disable(); 75 src = &get_cpu_var(hv_clock);
106 cpu = smp_processor_id(); 76 ret = pvclock_clocksource_read(src);
107 77 put_cpu_var(hv_clock);
108 last_tsc = get_clock(cpu, tsc_timestamp); 78 return ret;
109 now = get_clock(cpu, system_time);
110
111 now += kvm_get_delta(last_tsc);
112 preempt_enable();
113
114 return now;
115} 79}
80
116static struct clocksource kvm_clock = { 81static struct clocksource kvm_clock = {
117 .name = "kvm-clock", 82 .name = "kvm-clock",
118 .read = kvm_clock_read, 83 .read = kvm_clock_read,
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
123 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 88 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
124}; 89};
125 90
126static int kvm_register_clock(void) 91static int kvm_register_clock(char *txt)
127{ 92{
128 int cpu = smp_processor_id(); 93 int cpu = smp_processor_id();
129 int low, high; 94 int low, high;
130 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; 95 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
131 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); 96 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
132 97 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
98 cpu, high, low, txt);
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); 99 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
134} 100}
135 101
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
140 * Now that the first cpu already had this clocksource initialized, 106 * Now that the first cpu already had this clocksource initialized,
141 * we shouldn't fail. 107 * we shouldn't fail.
142 */ 108 */
143 WARN_ON(kvm_register_clock()); 109 WARN_ON(kvm_register_clock("secondary cpu clock"));
144 /* ok, done with our trickery, call native */ 110 /* ok, done with our trickery, call native */
145 setup_secondary_APIC_clock(); 111 setup_secondary_APIC_clock();
146} 112}
147#endif 113#endif
148 114
115#ifdef CONFIG_SMP
116void __init kvm_smp_prepare_boot_cpu(void)
117{
118 WARN_ON(kvm_register_clock("primary cpu clock"));
119 native_smp_prepare_boot_cpu();
120}
121#endif
122
149/* 123/*
150 * After the clock is registered, the host will keep writing to the 124 * After the clock is registered, the host will keep writing to the
151 * registered memory location. If the guest happens to shutdown, this memory 125 * registered memory location. If the guest happens to shutdown, this memory
@@ -174,7 +148,7 @@ void __init kvmclock_init(void)
174 return; 148 return;
175 149
176 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { 150 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
177 if (kvm_register_clock()) 151 if (kvm_register_clock("boot clock"))
178 return; 152 return;
179 pv_time_ops.get_wallclock = kvm_get_wallclock; 153 pv_time_ops.get_wallclock = kvm_get_wallclock;
180 pv_time_ops.set_wallclock = kvm_set_wallclock; 154 pv_time_ops.set_wallclock = kvm_set_wallclock;
@@ -182,6 +156,9 @@ void __init kvmclock_init(void)
182#ifdef CONFIG_X86_LOCAL_APIC 156#ifdef CONFIG_X86_LOCAL_APIC
183 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; 157 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
184#endif 158#endif
159#ifdef CONFIG_SMP
160 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
161#endif
185 machine_ops.shutdown = kvm_shutdown; 162 machine_ops.shutdown = kvm_shutdown;
186#ifdef CONFIG_KEXEC 163#ifdef CONFIG_KEXEC
187 machine_ops.crash_shutdown = kvm_crash_shutdown; 164 machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3cad17fe026b..07c0f828f488 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -155,6 +155,7 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
155 wrmsr(msr, value, dummy); 155 wrmsr(msr, value, dummy);
156 return 0; 156 return 0;
157} 157}
158EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
158 159
159int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) 160int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
160{ 161{
@@ -222,6 +223,7 @@ int geode_mfgpt_alloc_timer(int timer, int domain)
222 /* No timers available - too bad */ 223 /* No timers available - too bad */
223 return -1; 224 return -1;
224} 225}
226EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
225 227
226 228
227#ifdef CONFIG_GEODE_MFGPT_TIMER 229#ifdef CONFIG_GEODE_MFGPT_TIMER
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 11b14bbaa61e..84160f74eeb0 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -26,7 +26,6 @@
26 26
27#include <asm/smp.h> 27#include <asm/smp.h>
28#include <asm/nmi.h> 28#include <asm/nmi.h>
29#include <asm/timer.h>
30 29
31#include "mach_traps.h" 30#include "mach_traps.h"
32 31
@@ -82,7 +81,7 @@ int __init check_nmi_watchdog(void)
82 81
83 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 82 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
84 if (!prev_nmi_count) 83 if (!prev_nmi_count)
85 goto error; 84 return -1;
86 85
87 printk(KERN_INFO "Testing NMI watchdog ... "); 86 printk(KERN_INFO "Testing NMI watchdog ... ");
88 87
@@ -119,7 +118,7 @@ int __init check_nmi_watchdog(void)
119 if (!atomic_read(&nmi_active)) { 118 if (!atomic_read(&nmi_active)) {
120 kfree(prev_nmi_count); 119 kfree(prev_nmi_count);
121 atomic_set(&nmi_active, -1); 120 atomic_set(&nmi_active, -1);
122 goto error; 121 return -1;
123 } 122 }
124 printk("OK.\n"); 123 printk("OK.\n");
125 124
@@ -130,10 +129,6 @@ int __init check_nmi_watchdog(void)
130 129
131 kfree(prev_nmi_count); 130 kfree(prev_nmi_count);
132 return 0; 131 return 0;
133error:
134 timer_ack = !cpu_has_tsc;
135
136 return -1;
137} 132}
138 133
139static int __init setup_nmi_watchdog(char *str) 134static int __init setup_nmi_watchdog(char *str)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index c5ef1af8e79d..dc00a1331ace 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -378,6 +378,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
378 struct page *page; 378 struct page *page;
379 unsigned long dma_mask = 0; 379 unsigned long dma_mask = 0;
380 dma_addr_t bus; 380 dma_addr_t bus;
381 int noretry = 0;
381 382
382 /* ignore region specifiers */ 383 /* ignore region specifiers */
383 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 384 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
@@ -397,20 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
397 if (dev->dma_mask == NULL) 398 if (dev->dma_mask == NULL)
398 return NULL; 399 return NULL;
399 400
400 /* Don't invoke OOM killer */ 401 /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
401 gfp |= __GFP_NORETRY; 402 if (gfp & __GFP_DMA)
403 noretry = 1;
402 404
403#ifdef CONFIG_X86_64 405#ifdef CONFIG_X86_64
404 /* Why <=? Even when the mask is smaller than 4GB it is often 406 /* Why <=? Even when the mask is smaller than 4GB it is often
405 larger than 16MB and in this case we have a chance of 407 larger than 16MB and in this case we have a chance of
406 finding fitting memory in the next higher zone first. If 408 finding fitting memory in the next higher zone first. If
407 not retry with true GFP_DMA. -AK */ 409 not retry with true GFP_DMA. -AK */
408 if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) 410 if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
409 gfp |= GFP_DMA32; 411 gfp |= GFP_DMA32;
412 if (dma_mask < DMA_32BIT_MASK)
413 noretry = 1;
414 }
410#endif 415#endif
411 416
412 again: 417 again:
413 page = dma_alloc_pages(dev, gfp, get_order(size)); 418 page = dma_alloc_pages(dev,
419 noretry ? gfp | __GFP_NORETRY : gfp, get_order(size));
414 if (page == NULL) 420 if (page == NULL)
415 return NULL; 421 return NULL;
416 422
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c07455d1695f..aa8ec928caa8 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -26,6 +26,7 @@
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <linux/scatterlist.h> 27#include <linux/scatterlist.h>
28#include <linux/iommu-helper.h> 28#include <linux/iommu-helper.h>
29#include <linux/sysdev.h>
29#include <asm/atomic.h> 30#include <asm/atomic.h>
30#include <asm/io.h> 31#include <asm/io.h>
31#include <asm/mtrr.h> 32#include <asm/mtrr.h>
@@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
548 return aper_base; 549 return aper_base;
549} 550}
550 551
552static int gart_resume(struct sys_device *dev)
553{
554 return 0;
555}
556
557static int gart_suspend(struct sys_device *dev, pm_message_t state)
558{
559 return -EINVAL;
560}
561
562static struct sysdev_class gart_sysdev_class = {
563 .name = "gart",
564 .suspend = gart_suspend,
565 .resume = gart_resume,
566
567};
568
569static struct sys_device device_gart = {
570 .id = 0,
571 .cls = &gart_sysdev_class,
572};
573
551/* 574/*
552 * Private Northbridge GATT initialization in case we cannot use the 575 * Private Northbridge GATT initialization in case we cannot use the
553 * AGP driver for some reason. 576 * AGP driver for some reason.
@@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
558 unsigned aper_base, new_aper_base; 581 unsigned aper_base, new_aper_base;
559 struct pci_dev *dev; 582 struct pci_dev *dev;
560 void *gatt; 583 void *gatt;
561 int i; 584 int i, error;
562 585
563 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 586 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
564 aper_size = aper_base = info->aper_size = 0; 587 aper_size = aper_base = info->aper_size = 0;
@@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
606 629
607 pci_write_config_dword(dev, 0x90, ctl); 630 pci_write_config_dword(dev, 0x90, ctl);
608 } 631 }
632
633 error = sysdev_class_register(&gart_sysdev_class);
634 if (!error)
635 error = sysdev_register(&device_gart);
636 if (error)
637 panic("Could not register gart_sysdev -- would corrupt data on next suspend");
609 flush_gart(); 638 flush_gart();
610 639
611 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", 640 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f8476dfbb60d..e2db9ac5c61c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -333,6 +333,7 @@ void flush_thread(void)
333 /* 333 /*
334 * Forget coprocessor state.. 334 * Forget coprocessor state..
335 */ 335 */
336 tsk->fpu_counter = 0;
336 clear_fpu(tsk); 337 clear_fpu(tsk);
337 clear_used_math(); 338 clear_used_math();
338} 339}
@@ -649,8 +650,11 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
649 /* If the task has used fpu the last 5 timeslices, just do a full 650 /* If the task has used fpu the last 5 timeslices, just do a full
650 * restore of the math state immediately to avoid the trap; the 651 * restore of the math state immediately to avoid the trap; the
651 * chances of needing FPU soon are obviously high now 652 * chances of needing FPU soon are obviously high now
653 *
654 * tsk_used_math() checks prevent calling math_state_restore(),
655 * which can sleep in the case of !tsk_used_math()
652 */ 656 */
653 if (next_p->fpu_counter > 5) 657 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
654 math_state_restore(); 658 math_state_restore();
655 659
656 /* 660 /*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e2319f39988b..c6eb5c91e5f6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -294,6 +294,7 @@ void flush_thread(void)
294 /* 294 /*
295 * Forget coprocessor state.. 295 * Forget coprocessor state..
296 */ 296 */
297 tsk->fpu_counter = 0;
297 clear_fpu(tsk); 298 clear_fpu(tsk);
298 clear_used_math(); 299 clear_used_math();
299} 300}
@@ -658,8 +659,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
658 /* If the task has used fpu the last 5 timeslices, just do a full 659 /* If the task has used fpu the last 5 timeslices, just do a full
659 * restore of the math state immediately to avoid the trap; the 660 * restore of the math state immediately to avoid the trap; the
660 * chances of needing FPU soon are obviously high now 661 * chances of needing FPU soon are obviously high now
662 *
663 * tsk_used_math() checks prevent calling math_state_restore(),
664 * which can sleep in the case of !tsk_used_math()
661 */ 665 */
662 if (next_p->fpu_counter>5) 666 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
663 math_state_restore(); 667 math_state_restore();
664 return prev_p; 668 return prev_p;
665} 669}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000000000000..05fbe9a0325a
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,141 @@
1/* paravirtual clock -- common code used by kvm/xen
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16*/
17
18#include <linux/kernel.h>
19#include <linux/percpu.h>
20#include <asm/pvclock.h>
21
22/*
23 * These are perodically updated
24 * xen: magic shared_info page
25 * kvm: gpa registered via msr
26 * and then copied here.
27 */
28struct pvclock_shadow_time {
29 u64 tsc_timestamp; /* TSC at last update of time vals. */
30 u64 system_timestamp; /* Time, in nanosecs, since boot. */
31 u32 tsc_to_nsec_mul;
32 int tsc_shift;
33 u32 version;
34};
35
36/*
37 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
38 * yielding a 64-bit result.
39 */
40static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
41{
42 u64 product;
43#ifdef __i386__
44 u32 tmp1, tmp2;
45#endif
46
47 if (shift < 0)
48 delta >>= -shift;
49 else
50 delta <<= shift;
51
52#ifdef __i386__
53 __asm__ (
54 "mul %5 ; "
55 "mov %4,%%eax ; "
56 "mov %%edx,%4 ; "
57 "mul %5 ; "
58 "xor %5,%5 ; "
59 "add %4,%%eax ; "
60 "adc %5,%%edx ; "
61 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
62 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
63#elif __x86_64__
64 __asm__ (
65 "mul %%rdx ; shrd $32,%%rdx,%%rax"
66 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
67#else
68#error implement me!
69#endif
70
71 return product;
72}
73
74static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
75{
76 u64 delta = native_read_tsc() - shadow->tsc_timestamp;
77 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
78}
79
80/*
81 * Reads a consistent set of time-base values from hypervisor,
82 * into a shadow data area.
83 */
84static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
85 struct pvclock_vcpu_time_info *src)
86{
87 do {
88 dst->version = src->version;
89 rmb(); /* fetch version before data */
90 dst->tsc_timestamp = src->tsc_timestamp;
91 dst->system_timestamp = src->system_time;
92 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
93 dst->tsc_shift = src->tsc_shift;
94 rmb(); /* test version after fetching data */
95 } while ((src->version & 1) || (dst->version != src->version));
96
97 return dst->version;
98}
99
100cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
101{
102 struct pvclock_shadow_time shadow;
103 unsigned version;
104 cycle_t ret, offset;
105
106 do {
107 version = pvclock_get_time_values(&shadow, src);
108 barrier();
109 offset = pvclock_get_nsec_offset(&shadow);
110 ret = shadow.system_timestamp + offset;
111 barrier();
112 } while (version != src->version);
113
114 return ret;
115}
116
117void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
118 struct pvclock_vcpu_time_info *vcpu_time,
119 struct timespec *ts)
120{
121 u32 version;
122 u64 delta;
123 struct timespec now;
124
125 /* get wallclock at system boot */
126 do {
127 version = wall_clock->version;
128 rmb(); /* fetch version before time */
129 now.tv_sec = wall_clock->sec;
130 now.tv_nsec = wall_clock->nsec;
131 rmb(); /* fetch time before checking version */
132 } while ((wall_clock->version & 1) || (version != wall_clock->version));
133
134 delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
135 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
136
137 now.tv_nsec = do_div(delta, NSEC_PER_SEC);
138 now.tv_sec = delta;
139
140 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
141}
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 9615eee9b775..05191bbc68b8 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -4,6 +4,8 @@
4#include <linux/acpi.h> 4#include <linux/acpi.h>
5#include <linux/bcd.h> 5#include <linux/bcd.h>
6#include <linux/mc146818rtc.h> 6#include <linux/mc146818rtc.h>
7#include <linux/platform_device.h>
8#include <linux/pnp.h>
7 9
8#include <asm/time.h> 10#include <asm/time.h>
9#include <asm/vsyscall.h> 11#include <asm/vsyscall.h>
@@ -197,3 +199,35 @@ unsigned long long native_read_tsc(void)
197} 199}
198EXPORT_SYMBOL(native_read_tsc); 200EXPORT_SYMBOL(native_read_tsc);
199 201
202
203static struct resource rtc_resources[] = {
204 [0] = {
205 .start = RTC_PORT(0),
206 .end = RTC_PORT(1),
207 .flags = IORESOURCE_IO,
208 },
209 [1] = {
210 .start = RTC_IRQ,
211 .end = RTC_IRQ,
212 .flags = IORESOURCE_IRQ,
213 }
214};
215
216static struct platform_device rtc_device = {
217 .name = "rtc_cmos",
218 .id = -1,
219 .resource = rtc_resources,
220 .num_resources = ARRAY_SIZE(rtc_resources),
221};
222
223static __init int add_rtc_cmos(void)
224{
225#ifdef CONFIG_PNP
226 if (!pnp_platform_devices)
227 platform_device_register(&rtc_device);
228#else
229 platform_device_register(&rtc_device);
230#endif /* CONFIG_PNP */
231 return 0;
232}
233device_initcall(add_rtc_cmos);
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 2c5f8b213e86..5a2f8e063887 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -532,10 +532,16 @@ static void __init reserve_crashkernel(void)
532 (unsigned long)(crash_size >> 20), 532 (unsigned long)(crash_size >> 20),
533 (unsigned long)(crash_base >> 20), 533 (unsigned long)(crash_base >> 20),
534 (unsigned long)(total_mem >> 20)); 534 (unsigned long)(total_mem >> 20));
535
536 if (reserve_bootmem(crash_base, crash_size,
537 BOOTMEM_EXCLUSIVE) < 0) {
538 printk(KERN_INFO "crashkernel reservation "
539 "failed - memory is in use\n");
540 return;
541 }
542
535 crashk_res.start = crash_base; 543 crashk_res.start = crash_base;
536 crashk_res.end = crash_base + crash_size - 1; 544 crashk_res.end = crash_base + crash_size - 1;
537 reserve_bootmem(crash_base, crash_size,
538 BOOTMEM_DEFAULT);
539 } else 545 } else
540 printk(KERN_INFO "crashkernel reservation failed - " 546 printk(KERN_INFO "crashkernel reservation failed - "
541 "you have to specify a base address\n"); 547 "you have to specify a base address\n");
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 38988491c622..56078d61c793 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1190,6 +1190,7 @@ static void __init smp_cpu_index_default(void)
1190 */ 1190 */
1191void __init native_smp_prepare_cpus(unsigned int max_cpus) 1191void __init native_smp_prepare_cpus(unsigned int max_cpus)
1192{ 1192{
1193 preempt_disable();
1193 nmi_watchdog_default(); 1194 nmi_watchdog_default();
1194 smp_cpu_index_default(); 1195 smp_cpu_index_default();
1195 current_cpu_data = boot_cpu_data; 1196 current_cpu_data = boot_cpu_data;
@@ -1206,7 +1207,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1206 if (smp_sanity_check(max_cpus) < 0) { 1207 if (smp_sanity_check(max_cpus) < 0) {
1207 printk(KERN_INFO "SMP disabled\n"); 1208 printk(KERN_INFO "SMP disabled\n");
1208 disable_smp(); 1209 disable_smp();
1209 return; 1210 goto out;
1210 } 1211 }
1211 1212
1212 preempt_disable(); 1213 preempt_disable();
@@ -1246,6 +1247,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1246 printk(KERN_INFO "CPU%d: ", 0); 1247 printk(KERN_INFO "CPU%d: ", 0);
1247 print_cpu_info(&cpu_data(0)); 1248 print_cpu_info(&cpu_data(0));
1248 setup_boot_clock(); 1249 setup_boot_clock();
1250out:
1251 preempt_enable();
1249} 1252}
1250/* 1253/*
1251 * Early setup to make printk work. 1254 * Early setup to make printk work.
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index bde6f63e15d5..08d752de4eee 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -544,6 +544,7 @@ vm86_trap:
544#define DO_ERROR(trapnr, signr, str, name) \ 544#define DO_ERROR(trapnr, signr, str, name) \
545void do_##name(struct pt_regs *regs, long error_code) \ 545void do_##name(struct pt_regs *regs, long error_code) \
546{ \ 546{ \
547 trace_hardirqs_fixup(); \
547 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 548 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
548 == NOTIFY_STOP) \ 549 == NOTIFY_STOP) \
549 return; \ 550 return; \
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index e4790728b224..65b70637ad97 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -14,7 +14,10 @@
14 14
15#include "mach_timer.h" 15#include "mach_timer.h"
16 16
17static int tsc_enabled; 17/* native_sched_clock() is called before tsc_init(), so
18 we must start with the TSC soft disabled to prevent
19 erroneous rdtsc usage on !cpu_has_tsc processors */
20static int tsc_disabled = -1;
18 21
19/* 22/*
20 * On some systems the TSC frequency does not 23 * On some systems the TSC frequency does not
@@ -28,8 +31,8 @@ EXPORT_SYMBOL_GPL(tsc_khz);
28static int __init tsc_setup(char *str) 31static int __init tsc_setup(char *str)
29{ 32{
30 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " 33 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
31 "cannot disable TSC completely.\n"); 34 "cannot disable TSC completely.\n");
32 mark_tsc_unstable("user disabled TSC"); 35 tsc_disabled = 1;
33 return 1; 36 return 1;
34} 37}
35#else 38#else
@@ -120,7 +123,7 @@ unsigned long long native_sched_clock(void)
120 * very important for it to be as fast as the platform 123 * very important for it to be as fast as the platform
121 * can achive it. ) 124 * can achive it. )
122 */ 125 */
123 if (unlikely(!tsc_enabled && !tsc_unstable)) 126 if (unlikely(tsc_disabled))
124 /* No locking but a rare wrong value is not a big deal: */ 127 /* No locking but a rare wrong value is not a big deal: */
125 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 128 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
126 129
@@ -322,7 +325,6 @@ void mark_tsc_unstable(char *reason)
322{ 325{
323 if (!tsc_unstable) { 326 if (!tsc_unstable) {
324 tsc_unstable = 1; 327 tsc_unstable = 1;
325 tsc_enabled = 0;
326 printk("Marking TSC unstable due to: %s.\n", reason); 328 printk("Marking TSC unstable due to: %s.\n", reason);
327 /* Can be called before registration */ 329 /* Can be called before registration */
328 if (clocksource_tsc.mult) 330 if (clocksource_tsc.mult)
@@ -336,7 +338,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
336static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) 338static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
337{ 339{
338 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", 340 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
339 d->ident); 341 d->ident);
340 tsc_unstable = 1; 342 tsc_unstable = 1;
341 return 0; 343 return 0;
342} 344}
@@ -403,7 +405,7 @@ void __init tsc_init(void)
403{ 405{
404 int cpu; 406 int cpu;
405 407
406 if (!cpu_has_tsc) 408 if (!cpu_has_tsc || tsc_disabled > 0)
407 return; 409 return;
408 410
409 cpu_khz = calculate_cpu_khz(); 411 cpu_khz = calculate_cpu_khz();
@@ -414,6 +416,9 @@ void __init tsc_init(void)
414 return; 416 return;
415 } 417 }
416 418
419 /* now allow native_sched_clock() to use rdtsc */
420 tsc_disabled = 0;
421
417 printk("Detected %lu.%03lu MHz processor.\n", 422 printk("Detected %lu.%03lu MHz processor.\n",
418 (unsigned long)cpu_khz / 1000, 423 (unsigned long)cpu_khz / 1000,
419 (unsigned long)cpu_khz % 1000); 424 (unsigned long)cpu_khz % 1000);
@@ -441,8 +446,6 @@ void __init tsc_init(void)
441 if (check_tsc_unstable()) { 446 if (check_tsc_unstable()) {
442 clocksource_tsc.rating = 0; 447 clocksource_tsc.rating = 0;
443 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 448 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
444 } else 449 }
445 tsc_enabled = 1;
446
447 clocksource_register(&clocksource_tsc); 450 clocksource_register(&clocksource_tsc);
448} 451}
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index fcc16e58609e..1784b8077a12 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -227,14 +227,14 @@ void __init tsc_calibrate(void)
227 /* hpet or pmtimer available ? */ 227 /* hpet or pmtimer available ? */
228 if (!hpet && !pm1 && !pm2) { 228 if (!hpet && !pm1 && !pm2) {
229 printk(KERN_INFO "TSC calibrated against PIT\n"); 229 printk(KERN_INFO "TSC calibrated against PIT\n");
230 return; 230 goto out;
231 } 231 }
232 232
233 /* Check, whether the sampling was disturbed by an SMI */ 233 /* Check, whether the sampling was disturbed by an SMI */
234 if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { 234 if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) {
235 printk(KERN_WARNING "TSC calibration disturbed by SMI, " 235 printk(KERN_WARNING "TSC calibration disturbed by SMI, "
236 "using PIT calibration result\n"); 236 "using PIT calibration result\n");
237 return; 237 goto out;
238 } 238 }
239 239
240 tsc2 = (tsc2 - tsc1) * 1000000L; 240 tsc2 = (tsc2 - tsc1) * 1000000L;
@@ -255,6 +255,7 @@ void __init tsc_calibrate(void)
255 255
256 tsc_khz = tsc2 / tsc1; 256 tsc_khz = tsc2 / tsc1;
257 257
258out:
258 for_each_possible_cpu(cpu) 259 for_each_possible_cpu(cpu)
259 set_cyc2ns_scale(tsc_khz, cpu); 260 set_cyc2ns_scale(tsc_khz, cpu);
260} 261}