aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.S38
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.h5
-rw-r--r--arch/x86/kernel/acpi/sleep.c16
-rw-r--r--arch/x86/kernel/apic_64.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c15
-rw-r--r--arch/x86/kernel/efi_32.c8
-rw-r--r--arch/x86/kernel/entry_32.S1
-rw-r--r--arch/x86/kernel/geode_32.c5
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/i387.c48
-rw-r--r--arch/x86/kernel/io_apic_32.c12
-rw-r--r--arch/x86/kernel/kvmclock.c93
-rw-r--r--arch/x86/kernel/mfgpt_32.c2
-rw-r--r--arch/x86/kernel/nmi_32.c9
-rw-r--r--arch/x86/kernel/pci-dma.c14
-rw-r--r--arch/x86/kernel/pci-gart_64.c31
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c6
-rw-r--r--arch/x86/kernel/pvclock.c141
-rw-r--r--arch/x86/kernel/rtc.c34
-rw-r--r--arch/x86/kernel/setup_32.c10
-rw-r--r--arch/x86/kernel/smpboot.c6
-rw-r--r--arch/x86/kernel/traps_32.c1
-rw-r--r--arch/x86/kernel/tsc_32.c23
-rw-r--r--arch/x86/kernel/tsc_64.c5
29 files changed, 424 insertions, 130 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 739d49acd2f1..5ff67208d4ae 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -90,6 +90,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
90obj-$(CONFIG_KVM_GUEST) += kvm.o 90obj-$(CONFIG_KVM_GUEST) += kvm.o
91obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 91obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
92obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 92obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
93obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
93 94
94obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 95obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
95 96
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c49ebcc6c41e..33c5216fd3e1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -242,12 +242,19 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
242 242
243static void __cpuinit acpi_register_lapic(int id, u8 enabled) 243static void __cpuinit acpi_register_lapic(int id, u8 enabled)
244{ 244{
245 unsigned int ver = 0;
246
245 if (!enabled) { 247 if (!enabled) {
246 ++disabled_cpus; 248 ++disabled_cpus;
247 return; 249 return;
248 } 250 }
249 251
250 generic_processor_info(id, 0); 252#ifdef CONFIG_X86_32
253 if (boot_cpu_physical_apicid != -1U)
254 ver = apic_version[boot_cpu_physical_apicid];
255#endif
256
257 generic_processor_info(id, ver);
251} 258}
252 259
253static int __init 260static int __init
@@ -767,8 +774,13 @@ static void __init acpi_register_lapic_address(unsigned long address)
767 mp_lapic_addr = address; 774 mp_lapic_addr = address;
768 775
769 set_fixmap_nocache(FIX_APIC_BASE, address); 776 set_fixmap_nocache(FIX_APIC_BASE, address);
770 if (boot_cpu_physical_apicid == -1U) 777 if (boot_cpu_physical_apicid == -1U) {
771 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 778 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
779#ifdef CONFIG_X86_32
780 apic_version[boot_cpu_physical_apicid] =
781 GET_APIC_VERSION(apic_read(APIC_LVR));
782#endif
783 }
772} 784}
773 785
774static int __init early_acpi_parse_madt_lapic_addr_ovr(void) 786static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
index f9b77fb37e5b..3355973b12ac 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -5,6 +5,7 @@
5#include <asm/msr-index.h> 5#include <asm/msr-index.h>
6#include <asm/page.h> 6#include <asm/page.h>
7#include <asm/pgtable.h> 7#include <asm/pgtable.h>
8#include <asm/processor-flags.h>
8 9
9 .code16 10 .code16
10 .section ".header", "a" 11 .section ".header", "a"
@@ -24,6 +25,11 @@ pmode_gdt: .quad 0
24realmode_flags: .long 0 25realmode_flags: .long 0
25real_magic: .long 0 26real_magic: .long 0
26trampoline_segment: .word 0 27trampoline_segment: .word 0
28_pad1: .byte 0
29wakeup_jmp: .byte 0xea /* ljmpw */
30wakeup_jmp_off: .word 3f
31wakeup_jmp_seg: .word 0
32wakeup_gdt: .quad 0, 0, 0
27signature: .long 0x51ee1111 33signature: .long 0x51ee1111
28 34
29 .text 35 .text
@@ -34,11 +40,34 @@ _start:
34 cli 40 cli
35 cld 41 cld
36 42
43 /* Apparently some dimwit BIOS programmers don't know how to
44 program a PM to RM transition, and we might end up here with
45 junk in the data segment descriptor registers. The only way
46 to repair that is to go into PM and fix it ourselves... */
47 movw $16, %cx
48 lgdtl %cs:wakeup_gdt
49 movl %cr0, %eax
50 orb $X86_CR0_PE, %al
51 movl %eax, %cr0
52 jmp 1f
531: ljmpw $8, $2f
542:
55 movw %cx, %ds
56 movw %cx, %es
57 movw %cx, %ss
58 movw %cx, %fs
59 movw %cx, %gs
60
61 andb $~X86_CR0_PE, %al
62 movl %eax, %cr0
63 jmp wakeup_jmp
643:
37 /* Set up segments */ 65 /* Set up segments */
38 movw %cs, %ax 66 movw %cs, %ax
39 movw %ax, %ds 67 movw %ax, %ds
40 movw %ax, %es 68 movw %ax, %es
41 movw %ax, %ss 69 movw %ax, %ss
70 lidtl wakeup_idt
42 71
43 movl $wakeup_stack_end, %esp 72 movl $wakeup_stack_end, %esp
44 73
@@ -98,7 +127,14 @@ bogus_real_magic:
98 jmp 1b 127 jmp 1b
99 128
100 .data 129 .data
101 .balign 4 130 .balign 8
131
132 /* This is the standard real-mode IDT */
133wakeup_idt:
134 .word 0xffff /* limit */
135 .long 0 /* address */
136 .word 0
137
102 .globl HEAP, heap_end 138 .globl HEAP, heap_end
103HEAP: 139HEAP:
104 .long wakeup_heap 140 .long wakeup_heap
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h
index ef8166fe8020..69d38d0b2b64 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.h
+++ b/arch/x86/kernel/acpi/realmode/wakeup.h
@@ -24,6 +24,11 @@ struct wakeup_header {
24 u32 realmode_flags; 24 u32 realmode_flags;
25 u32 real_magic; 25 u32 real_magic;
26 u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ 26 u16 trampoline_segment; /* segment with trampoline code, 64-bit only */
27 u8 _pad1;
28 u8 wakeup_jmp;
29 u16 wakeup_jmp_off;
30 u16 wakeup_jmp_seg;
31 u64 wakeup_gdt[3];
27 u32 signature; /* To check we have correct structure */ 32 u32 signature; /* To check we have correct structure */
28} __attribute__((__packed__)); 33} __attribute__((__packed__));
29 34
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index afc25ee9964b..36af01f029ed 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -50,6 +50,20 @@ int acpi_save_state_mem(void)
50 50
51 header->video_mode = saved_video_mode; 51 header->video_mode = saved_video_mode;
52 52
53 header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
54 /* GDT[0]: GDT self-pointer */
55 header->wakeup_gdt[0] =
56 (u64)(sizeof(header->wakeup_gdt) - 1) +
57 ((u64)(acpi_wakeup_address +
58 ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
59 << 16);
60 /* GDT[1]: real-mode-like code segment */
61 header->wakeup_gdt[1] = (0x009bULL << 40) +
62 ((u64)acpi_wakeup_address << 16) + 0xffff;
63 /* GDT[2]: real-mode-like data segment */
64 header->wakeup_gdt[2] = (0x0093ULL << 40) +
65 ((u64)acpi_wakeup_address << 16) + 0xffff;
66
53#ifndef CONFIG_64BIT 67#ifndef CONFIG_64BIT
54 store_gdt((struct desc_ptr *)&header->pmode_gdt); 68 store_gdt((struct desc_ptr *)&header->pmode_gdt);
55 69
@@ -111,7 +125,7 @@ void __init acpi_reserve_bootmem(void)
111 return; 125 return;
112 } 126 }
113 127
114 acpi_wakeup_address = acpi_realmode; 128 acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
115} 129}
116 130
117 131
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 5910020c3f24..0633cfd0dc29 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -534,7 +534,7 @@ int setup_profiling_timer(unsigned int multiplier)
534 */ 534 */
535void clear_local_APIC(void) 535void clear_local_APIC(void)
536{ 536{
537 int maxlvt = lapic_get_maxlvt(); 537 int maxlvt;
538 u32 v; 538 u32 v;
539 539
540 /* APIC hasn't been mapped yet */ 540 /* APIC hasn't been mapped yet */
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index af4a867a097c..777a7ff075de 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -245,7 +245,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
245 if ((ecx > 95) || (ecx == 0) || (eax < ebx)) 245 if ((ecx > 95) || (ecx == 0) || (eax < ebx))
246 return -EIO; 246 return -EIO;
247 247
248 edx = (eax - ebx) / (100 - ecx); 248 edx = ((eax - ebx) * 100) / (100 - ecx);
249 *low_freq = edx * 1000; /* back to kHz */ 249 *low_freq = edx * 1000; /* back to kHz */
250 250
251 dprintk("low frequency is %u kHz\n", *low_freq); 251 dprintk("low frequency is %u kHz\n", *low_freq);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 46d4034d9f37..206791eb46e3 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1127,12 +1127,23 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1127 * an UP version, and is deprecated by AMD. 1127 * an UP version, and is deprecated by AMD.
1128 */ 1128 */
1129 if (num_online_cpus() != 1) { 1129 if (num_online_cpus() != 1) {
1130 printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); 1130#ifndef CONFIG_ACPI_PROCESSOR
1131 printk(KERN_ERR PFX "ACPI Processor support is required "
1132 "for SMP systems but is absent. Please load the "
1133 "ACPI Processor module before starting this "
1134 "driver.\n");
1135#else
1136 printk(KERN_ERR PFX "Your BIOS does not provide ACPI "
1137 "_PSS objects in a way that Linux understands. "
1138 "Please report this to the Linux ACPI maintainers"
1139 " and complain to your BIOS vendor.\n");
1140#endif
1131 kfree(data); 1141 kfree(data);
1132 return -ENODEV; 1142 return -ENODEV;
1133 } 1143 }
1134 if (pol->cpu != 0) { 1144 if (pol->cpu != 0) {
1135 printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); 1145 printk(KERN_ERR PFX "No ACPI _PSS objects for CPU other than "
1146 "CPU0. Complain to your BIOS vendor.\n");
1136 kfree(data); 1147 kfree(data);
1137 return -ENODEV; 1148 return -ENODEV;
1138 } 1149 }
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 5d23d85624d4..4b63c8e1f13b 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -49,13 +49,13 @@ void efi_call_phys_prelog(void)
49 local_irq_save(efi_rt_eflags); 49 local_irq_save(efi_rt_eflags);
50 50
51 /* 51 /*
52 * If I don't have PSE, I should just duplicate two entries in page 52 * If I don't have PAE, I should just duplicate two entries in page
53 * directory. If I have PSE, I just need to duplicate one entry in 53 * directory. If I have PAE, I just need to duplicate one entry in
54 * page directory. 54 * page directory.
55 */ 55 */
56 cr4 = read_cr4(); 56 cr4 = read_cr4();
57 57
58 if (cr4 & X86_CR4_PSE) { 58 if (cr4 & X86_CR4_PAE) {
59 efi_bak_pg_dir_pointer[0].pgd = 59 efi_bak_pg_dir_pointer[0].pgd =
60 swapper_pg_dir[pgd_index(0)].pgd; 60 swapper_pg_dir[pgd_index(0)].pgd;
61 swapper_pg_dir[0].pgd = 61 swapper_pg_dir[0].pgd =
@@ -93,7 +93,7 @@ void efi_call_phys_epilog(void)
93 93
94 cr4 = read_cr4(); 94 cr4 = read_cr4();
95 95
96 if (cr4 & X86_CR4_PSE) { 96 if (cr4 & X86_CR4_PAE) {
97 swapper_pg_dir[pgd_index(0)].pgd = 97 swapper_pg_dir[pgd_index(0)].pgd =
98 efi_bak_pg_dir_pointer[0].pgd; 98 efi_bak_pg_dir_pointer[0].pgd;
99 } else { 99 } else {
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index e6517ce0b824..04ea83ccb979 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -248,6 +248,7 @@ ENTRY(resume_userspace)
248 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt 248 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
249 # setting need_resched or sigpending 249 # setting need_resched or sigpending
250 # between sampling and the iret 250 # between sampling and the iret
251 TRACE_IRQS_OFF
251 movl TI_flags(%ebp), %ecx 252 movl TI_flags(%ebp), %ecx
252 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on 253 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
253 # int/exception return? 254 # int/exception return?
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index e8edd63ab000..9b08e852fd1a 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -166,6 +166,8 @@ int geode_has_vsa2(void)
166 static int has_vsa2 = -1; 166 static int has_vsa2 = -1;
167 167
168 if (has_vsa2 == -1) { 168 if (has_vsa2 == -1) {
169 u16 val;
170
169 /* 171 /*
170 * The VSA has virtual registers that we can query for a 172 * The VSA has virtual registers that we can query for a
171 * signature. 173 * signature.
@@ -173,7 +175,8 @@ int geode_has_vsa2(void)
173 outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); 175 outw(VSA_VR_UNLOCK, VSA_VRC_INDEX);
174 outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); 176 outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX);
175 177
176 has_vsa2 = (inw(VSA_VRC_DATA) == VSA_SIG); 178 val = inw(VSA_VRC_DATA);
179 has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG);
177 } 180 }
178 181
179 return has_vsa2; 182 return has_vsa2;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b2cc73768a9d..f7357cc0162c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -189,7 +189,7 @@ default_entry:
189 * this stage. 189 * this stage.
190 */ 190 */
191 191
192#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */ 192#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
193 193
194 xorl %ebx,%ebx /* %ebx is kept at zero */ 194 xorl %ebx,%ebx /* %ebx is kept at zero */
195 195
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 10a1955bb1d1..b817974ef942 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -128,7 +128,7 @@ ident_complete:
128 /* Fixup phys_base */ 128 /* Fixup phys_base */
129 addq %rbp, phys_base(%rip) 129 addq %rbp, phys_base(%rip)
130 130
131#ifdef CONFIG_SMP 131#ifdef CONFIG_X86_TRAMPOLINE
132 addq %rbp, trampoline_level4_pgt + 0(%rip) 132 addq %rbp, trampoline_level4_pgt + 0(%rip)
133 addq %rbp, trampoline_level4_pgt + (511*8)(%rip) 133 addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
134#endif 134#endif
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index e03cc952f233..95e80e5033c3 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -56,6 +56,11 @@ void __cpuinit mxcsr_feature_mask_init(void)
56 56
57void __init init_thread_xstate(void) 57void __init init_thread_xstate(void)
58{ 58{
59 if (!HAVE_HWFP) {
60 xstate_size = sizeof(struct i387_soft_struct);
61 return;
62 }
63
59 if (cpu_has_fxsr) 64 if (cpu_has_fxsr)
60 xstate_size = sizeof(struct i387_fxsave_struct); 65 xstate_size = sizeof(struct i387_fxsave_struct);
61#ifdef CONFIG_X86_32 66#ifdef CONFIG_X86_32
@@ -94,7 +99,7 @@ void __cpuinit fpu_init(void)
94int init_fpu(struct task_struct *tsk) 99int init_fpu(struct task_struct *tsk)
95{ 100{
96 if (tsk_used_math(tsk)) { 101 if (tsk_used_math(tsk)) {
97 if (tsk == current) 102 if (HAVE_HWFP && tsk == current)
98 unlazy_fpu(tsk); 103 unlazy_fpu(tsk);
99 return 0; 104 return 0;
100 } 105 }
@@ -109,6 +114,15 @@ int init_fpu(struct task_struct *tsk)
109 return -ENOMEM; 114 return -ENOMEM;
110 } 115 }
111 116
117#ifdef CONFIG_X86_32
118 if (!HAVE_HWFP) {
119 memset(tsk->thread.xstate, 0, xstate_size);
120 finit();
121 set_stopped_child_used_math(tsk);
122 return 0;
123 }
124#endif
125
112 if (cpu_has_fxsr) { 126 if (cpu_has_fxsr) {
113 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; 127 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
114 128
@@ -148,7 +162,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
148 int ret; 162 int ret;
149 163
150 if (!cpu_has_fxsr) 164 if (!cpu_has_fxsr)
151 return -ENODEV; 165 return -EIO;
152 166
153 ret = init_fpu(target); 167 ret = init_fpu(target);
154 if (ret) 168 if (ret)
@@ -165,7 +179,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
165 int ret; 179 int ret;
166 180
167 if (!cpu_has_fxsr) 181 if (!cpu_has_fxsr)
168 return -ENODEV; 182 return -EIO;
169 183
170 ret = init_fpu(target); 184 ret = init_fpu(target);
171 if (ret) 185 if (ret)
@@ -330,13 +344,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
330 struct user_i387_ia32_struct env; 344 struct user_i387_ia32_struct env;
331 int ret; 345 int ret;
332 346
333 if (!HAVE_HWFP)
334 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
335
336 ret = init_fpu(target); 347 ret = init_fpu(target);
337 if (ret) 348 if (ret)
338 return ret; 349 return ret;
339 350
351 if (!HAVE_HWFP)
352 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
353
340 if (!cpu_has_fxsr) { 354 if (!cpu_has_fxsr) {
341 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 355 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
342 &target->thread.xstate->fsave, 0, 356 &target->thread.xstate->fsave, 0,
@@ -360,15 +374,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
360 struct user_i387_ia32_struct env; 374 struct user_i387_ia32_struct env;
361 int ret; 375 int ret;
362 376
363 if (!HAVE_HWFP)
364 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
365
366 ret = init_fpu(target); 377 ret = init_fpu(target);
367 if (ret) 378 if (ret)
368 return ret; 379 return ret;
369 380
370 set_stopped_child_used_math(target); 381 set_stopped_child_used_math(target);
371 382
383 if (!HAVE_HWFP)
384 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
385
372 if (!cpu_has_fxsr) { 386 if (!cpu_has_fxsr) {
373 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 387 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
374 &target->thread.xstate->fsave, 0, -1); 388 &target->thread.xstate->fsave, 0, -1);
@@ -474,18 +488,18 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
474int restore_i387_ia32(struct _fpstate_ia32 __user *buf) 488int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
475{ 489{
476 int err; 490 int err;
491 struct task_struct *tsk = current;
477 492
478 if (HAVE_HWFP) { 493 if (HAVE_HWFP)
479 struct task_struct *tsk = current;
480
481 clear_fpu(tsk); 494 clear_fpu(tsk);
482 495
483 if (!used_math()) { 496 if (!used_math()) {
484 err = init_fpu(tsk); 497 err = init_fpu(tsk);
485 if (err) 498 if (err)
486 return err; 499 return err;
487 } 500 }
488 501
502 if (HAVE_HWFP) {
489 if (cpu_has_fxsr) 503 if (cpu_has_fxsr)
490 err = restore_i387_fxsave(buf); 504 err = restore_i387_fxsave(buf);
491 else 505 else
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index a40d54fc1fdd..4dc8600d9d20 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -2130,14 +2130,10 @@ static inline void __init check_timer(void)
2130{ 2130{
2131 int apic1, pin1, apic2, pin2; 2131 int apic1, pin1, apic2, pin2;
2132 int vector; 2132 int vector;
2133 unsigned int ver;
2134 unsigned long flags; 2133 unsigned long flags;
2135 2134
2136 local_irq_save(flags); 2135 local_irq_save(flags);
2137 2136
2138 ver = apic_read(APIC_LVR);
2139 ver = GET_APIC_VERSION(ver);
2140
2141 /* 2137 /*
2142 * get/set the timer IRQ vector: 2138 * get/set the timer IRQ vector:
2143 */ 2139 */
@@ -2150,15 +2146,11 @@ static inline void __init check_timer(void)
2150 * mode for the 8259A whenever interrupts are routed 2146 * mode for the 8259A whenever interrupts are routed
2151 * through I/O APICs. Also IRQ0 has to be enabled in 2147 * through I/O APICs. Also IRQ0 has to be enabled in
2152 * the 8259A which implies the virtual wire has to be 2148 * the 8259A which implies the virtual wire has to be
2153 * disabled in the local APIC. Finally timer interrupts 2149 * disabled in the local APIC.
2154 * need to be acknowledged manually in the 8259A for
2155 * timer_interrupt() and for the i82489DX when using
2156 * the NMI watchdog.
2157 */ 2150 */
2158 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2151 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2159 init_8259A(1); 2152 init_8259A(1);
2160 timer_ack = !cpu_has_tsc; 2153 timer_ack = 1;
2161 timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2162 if (timer_over_8254 > 0) 2154 if (timer_over_8254 > 0)
2163 enable_8259A_irq(0); 2155 enable_8259A_irq(0);
2164 2156
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 4bc1be5d5472..87edf1ceb1df 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/clocksource.h> 19#include <linux/clocksource.h>
20#include <linux/kvm_para.h> 20#include <linux/kvm_para.h>
21#include <asm/pvclock.h>
21#include <asm/arch_hooks.h> 22#include <asm/arch_hooks.h>
22#include <asm/msr.h> 23#include <asm/msr.h>
23#include <asm/apic.h> 24#include <asm/apic.h>
@@ -36,83 +37,47 @@ static int parse_no_kvmclock(char *arg)
36early_param("no-kvmclock", parse_no_kvmclock); 37early_param("no-kvmclock", parse_no_kvmclock);
37 38
38/* The hypervisor will put information about time periodically here */ 39/* The hypervisor will put information about time periodically here */
39static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); 40static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
40#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field 41static struct pvclock_wall_clock wall_clock;
41 42
42static inline u64 kvm_get_delta(u64 last_tsc)
43{
44 int cpu = smp_processor_id();
45 u64 delta = native_read_tsc() - last_tsc;
46 return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
47}
48
49static struct kvm_wall_clock wall_clock;
50static cycle_t kvm_clock_read(void);
51/* 43/*
52 * The wallclock is the time of day when we booted. Since then, some time may 44 * The wallclock is the time of day when we booted. Since then, some time may
53 * have elapsed since the hypervisor wrote the data. So we try to account for 45 * have elapsed since the hypervisor wrote the data. So we try to account for
54 * that with system time 46 * that with system time
55 */ 47 */
56unsigned long kvm_get_wallclock(void) 48static unsigned long kvm_get_wallclock(void)
57{ 49{
58 u32 wc_sec, wc_nsec; 50 struct pvclock_vcpu_time_info *vcpu_time;
59 u64 delta;
60 struct timespec ts; 51 struct timespec ts;
61 int version, nsec;
62 int low, high; 52 int low, high;
63 53
64 low = (int)__pa(&wall_clock); 54 low = (int)__pa(&wall_clock);
65 high = ((u64)__pa(&wall_clock) >> 32); 55 high = ((u64)__pa(&wall_clock) >> 32);
56 native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
66 57
67 delta = kvm_clock_read(); 58 vcpu_time = &get_cpu_var(hv_clock);
59 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
60 put_cpu_var(hv_clock);
68 61
69 native_write_msr(MSR_KVM_WALL_CLOCK, low, high); 62 return ts.tv_sec;
70 do {
71 version = wall_clock.wc_version;
72 rmb();
73 wc_sec = wall_clock.wc_sec;
74 wc_nsec = wall_clock.wc_nsec;
75 rmb();
76 } while ((wall_clock.wc_version != version) || (version & 1));
77
78 delta = kvm_clock_read() - delta;
79 delta += wc_nsec;
80 nsec = do_div(delta, NSEC_PER_SEC);
81 set_normalized_timespec(&ts, wc_sec + delta, nsec);
82 /*
83 * Of all mechanisms of time adjustment I've tested, this one
84 * was the champion!
85 */
86 return ts.tv_sec + 1;
87} 63}
88 64
89int kvm_set_wallclock(unsigned long now) 65static int kvm_set_wallclock(unsigned long now)
90{ 66{
91 return 0; 67 return -1;
92} 68}
93 69
94/*
95 * This is our read_clock function. The host puts an tsc timestamp each time
96 * it updates a new time. Without the tsc adjustment, we can have a situation
97 * in which a vcpu starts to run earlier (smaller system_time), but probes
98 * time later (compared to another vcpu), leading to backwards time
99 */
100static cycle_t kvm_clock_read(void) 70static cycle_t kvm_clock_read(void)
101{ 71{
102 u64 last_tsc, now; 72 struct pvclock_vcpu_time_info *src;
103 int cpu; 73 cycle_t ret;
104 74
105 preempt_disable(); 75 src = &get_cpu_var(hv_clock);
106 cpu = smp_processor_id(); 76 ret = pvclock_clocksource_read(src);
107 77 put_cpu_var(hv_clock);
108 last_tsc = get_clock(cpu, tsc_timestamp); 78 return ret;
109 now = get_clock(cpu, system_time);
110
111 now += kvm_get_delta(last_tsc);
112 preempt_enable();
113
114 return now;
115} 79}
80
116static struct clocksource kvm_clock = { 81static struct clocksource kvm_clock = {
117 .name = "kvm-clock", 82 .name = "kvm-clock",
118 .read = kvm_clock_read, 83 .read = kvm_clock_read,
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
123 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 88 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
124}; 89};
125 90
126static int kvm_register_clock(void) 91static int kvm_register_clock(char *txt)
127{ 92{
128 int cpu = smp_processor_id(); 93 int cpu = smp_processor_id();
129 int low, high; 94 int low, high;
130 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; 95 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
131 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); 96 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
132 97 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
98 cpu, high, low, txt);
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); 99 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
134} 100}
135 101
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
140 * Now that the first cpu already had this clocksource initialized, 106 * Now that the first cpu already had this clocksource initialized,
141 * we shouldn't fail. 107 * we shouldn't fail.
142 */ 108 */
143 WARN_ON(kvm_register_clock()); 109 WARN_ON(kvm_register_clock("secondary cpu clock"));
144 /* ok, done with our trickery, call native */ 110 /* ok, done with our trickery, call native */
145 setup_secondary_APIC_clock(); 111 setup_secondary_APIC_clock();
146} 112}
147#endif 113#endif
148 114
115#ifdef CONFIG_SMP
116void __init kvm_smp_prepare_boot_cpu(void)
117{
118 WARN_ON(kvm_register_clock("primary cpu clock"));
119 native_smp_prepare_boot_cpu();
120}
121#endif
122
149/* 123/*
150 * After the clock is registered, the host will keep writing to the 124 * After the clock is registered, the host will keep writing to the
151 * registered memory location. If the guest happens to shutdown, this memory 125 * registered memory location. If the guest happens to shutdown, this memory
@@ -174,7 +148,7 @@ void __init kvmclock_init(void)
174 return; 148 return;
175 149
176 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { 150 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
177 if (kvm_register_clock()) 151 if (kvm_register_clock("boot clock"))
178 return; 152 return;
179 pv_time_ops.get_wallclock = kvm_get_wallclock; 153 pv_time_ops.get_wallclock = kvm_get_wallclock;
180 pv_time_ops.set_wallclock = kvm_set_wallclock; 154 pv_time_ops.set_wallclock = kvm_set_wallclock;
@@ -182,6 +156,9 @@ void __init kvmclock_init(void)
182#ifdef CONFIG_X86_LOCAL_APIC 156#ifdef CONFIG_X86_LOCAL_APIC
183 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; 157 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
184#endif 158#endif
159#ifdef CONFIG_SMP
160 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
161#endif
185 machine_ops.shutdown = kvm_shutdown; 162 machine_ops.shutdown = kvm_shutdown;
186#ifdef CONFIG_KEXEC 163#ifdef CONFIG_KEXEC
187 machine_ops.crash_shutdown = kvm_crash_shutdown; 164 machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3cad17fe026b..07c0f828f488 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -155,6 +155,7 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
155 wrmsr(msr, value, dummy); 155 wrmsr(msr, value, dummy);
156 return 0; 156 return 0;
157} 157}
158EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
158 159
159int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) 160int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
160{ 161{
@@ -222,6 +223,7 @@ int geode_mfgpt_alloc_timer(int timer, int domain)
222 /* No timers available - too bad */ 223 /* No timers available - too bad */
223 return -1; 224 return -1;
224} 225}
226EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
225 227
226 228
227#ifdef CONFIG_GEODE_MFGPT_TIMER 229#ifdef CONFIG_GEODE_MFGPT_TIMER
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 11b14bbaa61e..84160f74eeb0 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -26,7 +26,6 @@
26 26
27#include <asm/smp.h> 27#include <asm/smp.h>
28#include <asm/nmi.h> 28#include <asm/nmi.h>
29#include <asm/timer.h>
30 29
31#include "mach_traps.h" 30#include "mach_traps.h"
32 31
@@ -82,7 +81,7 @@ int __init check_nmi_watchdog(void)
82 81
83 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 82 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
84 if (!prev_nmi_count) 83 if (!prev_nmi_count)
85 goto error; 84 return -1;
86 85
87 printk(KERN_INFO "Testing NMI watchdog ... "); 86 printk(KERN_INFO "Testing NMI watchdog ... ");
88 87
@@ -119,7 +118,7 @@ int __init check_nmi_watchdog(void)
119 if (!atomic_read(&nmi_active)) { 118 if (!atomic_read(&nmi_active)) {
120 kfree(prev_nmi_count); 119 kfree(prev_nmi_count);
121 atomic_set(&nmi_active, -1); 120 atomic_set(&nmi_active, -1);
122 goto error; 121 return -1;
123 } 122 }
124 printk("OK.\n"); 123 printk("OK.\n");
125 124
@@ -130,10 +129,6 @@ int __init check_nmi_watchdog(void)
130 129
131 kfree(prev_nmi_count); 130 kfree(prev_nmi_count);
132 return 0; 131 return 0;
133error:
134 timer_ack = !cpu_has_tsc;
135
136 return -1;
137} 132}
138 133
139static int __init setup_nmi_watchdog(char *str) 134static int __init setup_nmi_watchdog(char *str)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index c5ef1af8e79d..dc00a1331ace 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -378,6 +378,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
378 struct page *page; 378 struct page *page;
379 unsigned long dma_mask = 0; 379 unsigned long dma_mask = 0;
380 dma_addr_t bus; 380 dma_addr_t bus;
381 int noretry = 0;
381 382
382 /* ignore region specifiers */ 383 /* ignore region specifiers */
383 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 384 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
@@ -397,20 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
397 if (dev->dma_mask == NULL) 398 if (dev->dma_mask == NULL)
398 return NULL; 399 return NULL;
399 400
400 /* Don't invoke OOM killer */ 401 /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
401 gfp |= __GFP_NORETRY; 402 if (gfp & __GFP_DMA)
403 noretry = 1;
402 404
403#ifdef CONFIG_X86_64 405#ifdef CONFIG_X86_64
404 /* Why <=? Even when the mask is smaller than 4GB it is often 406 /* Why <=? Even when the mask is smaller than 4GB it is often
405 larger than 16MB and in this case we have a chance of 407 larger than 16MB and in this case we have a chance of
406 finding fitting memory in the next higher zone first. If 408 finding fitting memory in the next higher zone first. If
407 not retry with true GFP_DMA. -AK */ 409 not retry with true GFP_DMA. -AK */
408 if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) 410 if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
409 gfp |= GFP_DMA32; 411 gfp |= GFP_DMA32;
412 if (dma_mask < DMA_32BIT_MASK)
413 noretry = 1;
414 }
410#endif 415#endif
411 416
412 again: 417 again:
413 page = dma_alloc_pages(dev, gfp, get_order(size)); 418 page = dma_alloc_pages(dev,
419 noretry ? gfp | __GFP_NORETRY : gfp, get_order(size));
414 if (page == NULL) 420 if (page == NULL)
415 return NULL; 421 return NULL;
416 422
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c07455d1695f..aa8ec928caa8 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -26,6 +26,7 @@
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <linux/scatterlist.h> 27#include <linux/scatterlist.h>
28#include <linux/iommu-helper.h> 28#include <linux/iommu-helper.h>
29#include <linux/sysdev.h>
29#include <asm/atomic.h> 30#include <asm/atomic.h>
30#include <asm/io.h> 31#include <asm/io.h>
31#include <asm/mtrr.h> 32#include <asm/mtrr.h>
@@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
548 return aper_base; 549 return aper_base;
549} 550}
550 551
552static int gart_resume(struct sys_device *dev)
553{
554 return 0;
555}
556
557static int gart_suspend(struct sys_device *dev, pm_message_t state)
558{
559 return -EINVAL;
560}
561
562static struct sysdev_class gart_sysdev_class = {
563 .name = "gart",
564 .suspend = gart_suspend,
565 .resume = gart_resume,
566
567};
568
569static struct sys_device device_gart = {
570 .id = 0,
571 .cls = &gart_sysdev_class,
572};
573
551/* 574/*
552 * Private Northbridge GATT initialization in case we cannot use the 575 * Private Northbridge GATT initialization in case we cannot use the
553 * AGP driver for some reason. 576 * AGP driver for some reason.
@@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
558 unsigned aper_base, new_aper_base; 581 unsigned aper_base, new_aper_base;
559 struct pci_dev *dev; 582 struct pci_dev *dev;
560 void *gatt; 583 void *gatt;
561 int i; 584 int i, error;
562 585
563 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 586 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
564 aper_size = aper_base = info->aper_size = 0; 587 aper_size = aper_base = info->aper_size = 0;
@@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
606 629
607 pci_write_config_dword(dev, 0x90, ctl); 630 pci_write_config_dword(dev, 0x90, ctl);
608 } 631 }
632
633 error = sysdev_class_register(&gart_sysdev_class);
634 if (!error)
635 error = sysdev_register(&device_gart);
636 if (error)
637 panic("Could not register gart_sysdev -- would corrupt data on next suspend");
609 flush_gart(); 638 flush_gart();
610 639
611 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", 640 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a30aa1f2607a..347a7aba8b16 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -336,6 +336,7 @@ void flush_thread(void)
336 /* 336 /*
337 * Forget coprocessor state.. 337 * Forget coprocessor state..
338 */ 338 */
339 tsk->fpu_counter = 0;
339 clear_fpu(tsk); 340 clear_fpu(tsk);
340 clear_used_math(); 341 clear_used_math();
341} 342}
@@ -652,8 +653,11 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
652 /* If the task has used fpu the last 5 timeslices, just do a full 653 /* If the task has used fpu the last 5 timeslices, just do a full
653 * restore of the math state immediately to avoid the trap; the 654 * restore of the math state immediately to avoid the trap; the
654 * chances of needing FPU soon are obviously high now 655 * chances of needing FPU soon are obviously high now
656 *
657 * tsk_used_math() checks prevent calling math_state_restore(),
658 * which can sleep in the case of !tsk_used_math()
655 */ 659 */
656 if (next_p->fpu_counter > 5) 660 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
657 math_state_restore(); 661 math_state_restore();
658 662
659 /* 663 /*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index dd349c92f051..ea090e6cfe39 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -297,6 +297,7 @@ void flush_thread(void)
297 /* 297 /*
298 * Forget coprocessor state.. 298 * Forget coprocessor state..
299 */ 299 */
300 tsk->fpu_counter = 0;
300 clear_fpu(tsk); 301 clear_fpu(tsk);
301 clear_used_math(); 302 clear_used_math();
302} 303}
@@ -661,8 +662,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
661 /* If the task has used fpu the last 5 timeslices, just do a full 662 /* If the task has used fpu the last 5 timeslices, just do a full
662 * restore of the math state immediately to avoid the trap; the 663 * restore of the math state immediately to avoid the trap; the
663 * chances of needing FPU soon are obviously high now 664 * chances of needing FPU soon are obviously high now
665 *
666 * tsk_used_math() checks prevent calling math_state_restore(),
667 * which can sleep in the case of !tsk_used_math()
664 */ 668 */
665 if (next_p->fpu_counter>5) 669 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
666 math_state_restore(); 670 math_state_restore();
667 return prev_p; 671 return prev_p;
668} 672}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000000000000..05fbe9a0325a
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,141 @@
1/* paravirtual clock -- common code used by kvm/xen
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16*/
17
18#include <linux/kernel.h>
19#include <linux/percpu.h>
20#include <asm/pvclock.h>
21
22/*
23 * These are perodically updated
24 * xen: magic shared_info page
25 * kvm: gpa registered via msr
26 * and then copied here.
27 */
28struct pvclock_shadow_time {
29 u64 tsc_timestamp; /* TSC at last update of time vals. */
30 u64 system_timestamp; /* Time, in nanosecs, since boot. */
31 u32 tsc_to_nsec_mul;
32 int tsc_shift;
33 u32 version;
34};
35
36/*
37 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
38 * yielding a 64-bit result.
39 */
40static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
41{
42 u64 product;
43#ifdef __i386__
44 u32 tmp1, tmp2;
45#endif
46
47 if (shift < 0)
48 delta >>= -shift;
49 else
50 delta <<= shift;
51
52#ifdef __i386__
53 __asm__ (
54 "mul %5 ; "
55 "mov %4,%%eax ; "
56 "mov %%edx,%4 ; "
57 "mul %5 ; "
58 "xor %5,%5 ; "
59 "add %4,%%eax ; "
60 "adc %5,%%edx ; "
61 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
62 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
63#elif __x86_64__
64 __asm__ (
65 "mul %%rdx ; shrd $32,%%rdx,%%rax"
66 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
67#else
68#error implement me!
69#endif
70
71 return product;
72}
73
74static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
75{
76 u64 delta = native_read_tsc() - shadow->tsc_timestamp;
77 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
78}
79
80/*
81 * Reads a consistent set of time-base values from hypervisor,
82 * into a shadow data area.
83 */
84static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
85 struct pvclock_vcpu_time_info *src)
86{
87 do {
88 dst->version = src->version;
89 rmb(); /* fetch version before data */
90 dst->tsc_timestamp = src->tsc_timestamp;
91 dst->system_timestamp = src->system_time;
92 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
93 dst->tsc_shift = src->tsc_shift;
94 rmb(); /* test version after fetching data */
95 } while ((src->version & 1) || (dst->version != src->version));
96
97 return dst->version;
98}
99
100cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
101{
102 struct pvclock_shadow_time shadow;
103 unsigned version;
104 cycle_t ret, offset;
105
106 do {
107 version = pvclock_get_time_values(&shadow, src);
108 barrier();
109 offset = pvclock_get_nsec_offset(&shadow);
110 ret = shadow.system_timestamp + offset;
111 barrier();
112 } while (version != src->version);
113
114 return ret;
115}
116
117void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
118 struct pvclock_vcpu_time_info *vcpu_time,
119 struct timespec *ts)
120{
121 u32 version;
122 u64 delta;
123 struct timespec now;
124
125 /* get wallclock at system boot */
126 do {
127 version = wall_clock->version;
128 rmb(); /* fetch version before time */
129 now.tv_sec = wall_clock->sec;
130 now.tv_nsec = wall_clock->nsec;
131 rmb(); /* fetch time before checking version */
132 } while ((wall_clock->version & 1) || (version != wall_clock->version));
133
134 delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
135 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
136
137 now.tv_nsec = do_div(delta, NSEC_PER_SEC);
138 now.tv_sec = delta;
139
140 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
141}
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 9615eee9b775..05191bbc68b8 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -4,6 +4,8 @@
4#include <linux/acpi.h> 4#include <linux/acpi.h>
5#include <linux/bcd.h> 5#include <linux/bcd.h>
6#include <linux/mc146818rtc.h> 6#include <linux/mc146818rtc.h>
7#include <linux/platform_device.h>
8#include <linux/pnp.h>
7 9
8#include <asm/time.h> 10#include <asm/time.h>
9#include <asm/vsyscall.h> 11#include <asm/vsyscall.h>
@@ -197,3 +199,35 @@ unsigned long long native_read_tsc(void)
197} 199}
198EXPORT_SYMBOL(native_read_tsc); 200EXPORT_SYMBOL(native_read_tsc);
199 201
202
203static struct resource rtc_resources[] = {
204 [0] = {
205 .start = RTC_PORT(0),
206 .end = RTC_PORT(1),
207 .flags = IORESOURCE_IO,
208 },
209 [1] = {
210 .start = RTC_IRQ,
211 .end = RTC_IRQ,
212 .flags = IORESOURCE_IRQ,
213 }
214};
215
216static struct platform_device rtc_device = {
217 .name = "rtc_cmos",
218 .id = -1,
219 .resource = rtc_resources,
220 .num_resources = ARRAY_SIZE(rtc_resources),
221};
222
223static __init int add_rtc_cmos(void)
224{
225#ifdef CONFIG_PNP
226 if (!pnp_platform_devices)
227 platform_device_register(&rtc_device);
228#else
229 platform_device_register(&rtc_device);
230#endif /* CONFIG_PNP */
231 return 0;
232}
233device_initcall(add_rtc_cmos);
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 2c5f8b213e86..5a2f8e063887 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -532,10 +532,16 @@ static void __init reserve_crashkernel(void)
532 (unsigned long)(crash_size >> 20), 532 (unsigned long)(crash_size >> 20),
533 (unsigned long)(crash_base >> 20), 533 (unsigned long)(crash_base >> 20),
534 (unsigned long)(total_mem >> 20)); 534 (unsigned long)(total_mem >> 20));
535
536 if (reserve_bootmem(crash_base, crash_size,
537 BOOTMEM_EXCLUSIVE) < 0) {
538 printk(KERN_INFO "crashkernel reservation "
539 "failed - memory is in use\n");
540 return;
541 }
542
535 crashk_res.start = crash_base; 543 crashk_res.start = crash_base;
536 crashk_res.end = crash_base + crash_size - 1; 544 crashk_res.end = crash_base + crash_size - 1;
537 reserve_bootmem(crash_base, crash_size,
538 BOOTMEM_DEFAULT);
539 } else 545 } else
540 printk(KERN_INFO "crashkernel reservation failed - " 546 printk(KERN_INFO "crashkernel reservation failed - "
541 "you have to specify a base address\n"); 547 "you have to specify a base address\n");
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 38988491c622..3e1cecedde42 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -996,7 +996,6 @@ do_rest:
996#endif 996#endif
997 cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ 997 cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
998 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ 998 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
999 cpu_clear(cpu, cpu_possible_map);
1000 cpu_clear(cpu, cpu_present_map); 999 cpu_clear(cpu, cpu_present_map);
1001 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; 1000 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
1002 } 1001 }
@@ -1190,6 +1189,7 @@ static void __init smp_cpu_index_default(void)
1190 */ 1189 */
1191void __init native_smp_prepare_cpus(unsigned int max_cpus) 1190void __init native_smp_prepare_cpus(unsigned int max_cpus)
1192{ 1191{
1192 preempt_disable();
1193 nmi_watchdog_default(); 1193 nmi_watchdog_default();
1194 smp_cpu_index_default(); 1194 smp_cpu_index_default();
1195 current_cpu_data = boot_cpu_data; 1195 current_cpu_data = boot_cpu_data;
@@ -1206,7 +1206,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1206 if (smp_sanity_check(max_cpus) < 0) { 1206 if (smp_sanity_check(max_cpus) < 0) {
1207 printk(KERN_INFO "SMP disabled\n"); 1207 printk(KERN_INFO "SMP disabled\n");
1208 disable_smp(); 1208 disable_smp();
1209 return; 1209 goto out;
1210 } 1210 }
1211 1211
1212 preempt_disable(); 1212 preempt_disable();
@@ -1246,6 +1246,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1246 printk(KERN_INFO "CPU%d: ", 0); 1246 printk(KERN_INFO "CPU%d: ", 0);
1247 print_cpu_info(&cpu_data(0)); 1247 print_cpu_info(&cpu_data(0));
1248 setup_boot_clock(); 1248 setup_boot_clock();
1249out:
1250 preempt_enable();
1249} 1251}
1250/* 1252/*
1251 * Early setup to make printk work. 1253 * Early setup to make printk work.
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index bde6f63e15d5..08d752de4eee 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -544,6 +544,7 @@ vm86_trap:
544#define DO_ERROR(trapnr, signr, str, name) \ 544#define DO_ERROR(trapnr, signr, str, name) \
545void do_##name(struct pt_regs *regs, long error_code) \ 545void do_##name(struct pt_regs *regs, long error_code) \
546{ \ 546{ \
547 trace_hardirqs_fixup(); \
547 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 548 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
548 == NOTIFY_STOP) \ 549 == NOTIFY_STOP) \
549 return; \ 550 return; \
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index e4790728b224..65b70637ad97 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -14,7 +14,10 @@
14 14
15#include "mach_timer.h" 15#include "mach_timer.h"
16 16
17static int tsc_enabled; 17/* native_sched_clock() is called before tsc_init(), so
18 we must start with the TSC soft disabled to prevent
19 erroneous rdtsc usage on !cpu_has_tsc processors */
20static int tsc_disabled = -1;
18 21
19/* 22/*
20 * On some systems the TSC frequency does not 23 * On some systems the TSC frequency does not
@@ -28,8 +31,8 @@ EXPORT_SYMBOL_GPL(tsc_khz);
28static int __init tsc_setup(char *str) 31static int __init tsc_setup(char *str)
29{ 32{
30 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " 33 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
31 "cannot disable TSC completely.\n"); 34 "cannot disable TSC completely.\n");
32 mark_tsc_unstable("user disabled TSC"); 35 tsc_disabled = 1;
33 return 1; 36 return 1;
34} 37}
35#else 38#else
@@ -120,7 +123,7 @@ unsigned long long native_sched_clock(void)
120 * very important for it to be as fast as the platform 123 * very important for it to be as fast as the platform
121 * can achive it. ) 124 * can achive it. )
122 */ 125 */
123 if (unlikely(!tsc_enabled && !tsc_unstable)) 126 if (unlikely(tsc_disabled))
124 /* No locking but a rare wrong value is not a big deal: */ 127 /* No locking but a rare wrong value is not a big deal: */
125 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 128 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
126 129
@@ -322,7 +325,6 @@ void mark_tsc_unstable(char *reason)
322{ 325{
323 if (!tsc_unstable) { 326 if (!tsc_unstable) {
324 tsc_unstable = 1; 327 tsc_unstable = 1;
325 tsc_enabled = 0;
326 printk("Marking TSC unstable due to: %s.\n", reason); 328 printk("Marking TSC unstable due to: %s.\n", reason);
327 /* Can be called before registration */ 329 /* Can be called before registration */
328 if (clocksource_tsc.mult) 330 if (clocksource_tsc.mult)
@@ -336,7 +338,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
336static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) 338static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
337{ 339{
338 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", 340 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
339 d->ident); 341 d->ident);
340 tsc_unstable = 1; 342 tsc_unstable = 1;
341 return 0; 343 return 0;
342} 344}
@@ -403,7 +405,7 @@ void __init tsc_init(void)
403{ 405{
404 int cpu; 406 int cpu;
405 407
406 if (!cpu_has_tsc) 408 if (!cpu_has_tsc || tsc_disabled > 0)
407 return; 409 return;
408 410
409 cpu_khz = calculate_cpu_khz(); 411 cpu_khz = calculate_cpu_khz();
@@ -414,6 +416,9 @@ void __init tsc_init(void)
414 return; 416 return;
415 } 417 }
416 418
419 /* now allow native_sched_clock() to use rdtsc */
420 tsc_disabled = 0;
421
417 printk("Detected %lu.%03lu MHz processor.\n", 422 printk("Detected %lu.%03lu MHz processor.\n",
418 (unsigned long)cpu_khz / 1000, 423 (unsigned long)cpu_khz / 1000,
419 (unsigned long)cpu_khz % 1000); 424 (unsigned long)cpu_khz % 1000);
@@ -441,8 +446,6 @@ void __init tsc_init(void)
441 if (check_tsc_unstable()) { 446 if (check_tsc_unstable()) {
442 clocksource_tsc.rating = 0; 447 clocksource_tsc.rating = 0;
443 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 448 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
444 } else 449 }
445 tsc_enabled = 1;
446
447 clocksource_register(&clocksource_tsc); 450 clocksource_register(&clocksource_tsc);
448} 451}
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index fcc16e58609e..1784b8077a12 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -227,14 +227,14 @@ void __init tsc_calibrate(void)
227 /* hpet or pmtimer available ? */ 227 /* hpet or pmtimer available ? */
228 if (!hpet && !pm1 && !pm2) { 228 if (!hpet && !pm1 && !pm2) {
229 printk(KERN_INFO "TSC calibrated against PIT\n"); 229 printk(KERN_INFO "TSC calibrated against PIT\n");
230 return; 230 goto out;
231 } 231 }
232 232
233 /* Check, whether the sampling was disturbed by an SMI */ 233 /* Check, whether the sampling was disturbed by an SMI */
234 if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { 234 if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) {
235 printk(KERN_WARNING "TSC calibration disturbed by SMI, " 235 printk(KERN_WARNING "TSC calibration disturbed by SMI, "
236 "using PIT calibration result\n"); 236 "using PIT calibration result\n");
237 return; 237 goto out;
238 } 238 }
239 239
240 tsc2 = (tsc2 - tsc1) * 1000000L; 240 tsc2 = (tsc2 - tsc1) * 1000000L;
@@ -255,6 +255,7 @@ void __init tsc_calibrate(void)
255 255
256 tsc_khz = tsc2 / tsc1; 256 tsc_khz = tsc2 / tsc1;
257 257
258out:
258 for_each_possible_cpu(cpu) 259 for_each_possible_cpu(cpu)
259 set_cyc2ns_scale(tsc_khz, cpu); 260 set_cyc2ns_scale(tsc_khz, cpu);
260} 261}