diff options
Diffstat (limited to 'arch/x86')
55 files changed, 698 insertions, 521 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0cd1695c24fb..09a28a36ff26 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -383,6 +383,7 @@ config VMI | |||
383 | config KVM_CLOCK | 383 | config KVM_CLOCK |
384 | bool "KVM paravirtualized clock" | 384 | bool "KVM paravirtualized clock" |
385 | select PARAVIRT | 385 | select PARAVIRT |
386 | select PARAVIRT_CLOCK | ||
386 | depends on !(X86_VISWS || X86_VOYAGER) | 387 | depends on !(X86_VISWS || X86_VOYAGER) |
387 | help | 388 | help |
388 | Turning on this option will allow you to run a paravirtualized clock | 389 | Turning on this option will allow you to run a paravirtualized clock |
@@ -410,6 +411,10 @@ config PARAVIRT | |||
410 | over full virtualization. However, when run without a hypervisor | 411 | over full virtualization. However, when run without a hypervisor |
411 | the kernel is theoretically slower and slightly larger. | 412 | the kernel is theoretically slower and slightly larger. |
412 | 413 | ||
414 | config PARAVIRT_CLOCK | ||
415 | bool | ||
416 | default n | ||
417 | |||
413 | endif | 418 | endif |
414 | 419 | ||
415 | config MEMTEST_BOOTPARAM | 420 | config MEMTEST_BOOTPARAM |
@@ -1505,13 +1510,13 @@ config PCI_GOMMCONFIG | |||
1505 | config PCI_GODIRECT | 1510 | config PCI_GODIRECT |
1506 | bool "Direct" | 1511 | bool "Direct" |
1507 | 1512 | ||
1508 | config PCI_GOANY | ||
1509 | bool "Any" | ||
1510 | |||
1511 | config PCI_GOOLPC | 1513 | config PCI_GOOLPC |
1512 | bool "OLPC" | 1514 | bool "OLPC" |
1513 | depends on OLPC | 1515 | depends on OLPC |
1514 | 1516 | ||
1517 | config PCI_GOANY | ||
1518 | bool "Any" | ||
1519 | |||
1515 | endchoice | 1520 | endchoice |
1516 | 1521 | ||
1517 | config PCI_BIOS | 1522 | config PCI_BIOS |
@@ -1528,9 +1533,8 @@ config PCI_MMCONFIG | |||
1528 | depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) | 1533 | depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) |
1529 | 1534 | ||
1530 | config PCI_OLPC | 1535 | config PCI_OLPC |
1531 | bool | 1536 | def_bool y |
1532 | depends on PCI && PCI_GOOLPC | 1537 | depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) |
1533 | default y | ||
1534 | 1538 | ||
1535 | config PCI_DOMAINS | 1539 | config PCI_DOMAINS |
1536 | def_bool y | 1540 | def_bool y |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index b5c5b55d0437..a8d3c7e0414a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -6,15 +6,19 @@ config TRACE_IRQFLAGS_SUPPORT | |||
6 | source "lib/Kconfig.debug" | 6 | source "lib/Kconfig.debug" |
7 | 7 | ||
8 | config NONPROMISC_DEVMEM | 8 | config NONPROMISC_DEVMEM |
9 | bool "Disable promiscuous /dev/mem" | 9 | bool "Filter access to /dev/mem" |
10 | help | 10 | help |
11 | The /dev/mem file by default only allows userspace access to PCI | 11 | If this option is left off, you allow userspace access to all |
12 | space and the BIOS code and data regions. This is sufficient for | 12 | of memory, including kernel and userspace memory. Accidental |
13 | dosemu and X and all common users of /dev/mem. With this config | 13 | access to this is obviously disastrous, but specific access can |
14 | option, you allow userspace access to all of memory, including | 14 | be used by people debugging the kernel. |
15 | kernel and userspace memory. Accidental access to this is | 15 | |
16 | obviously disasterous, but specific access can be used by people | 16 | If this option is switched on, the /dev/mem file only allows |
17 | debugging the kernel. | 17 | userspace access to PCI space and the BIOS code and data regions. |
18 | This is sufficient for dosemu and X and all common users of | ||
19 | /dev/mem. | ||
20 | |||
21 | If in doubt, say Y. | ||
18 | 22 | ||
19 | config EARLY_PRINTK | 23 | config EARLY_PRINTK |
20 | bool "Early printk" if EMBEDDED | 24 | bool "Early printk" if EMBEDDED |
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 90943f83e84d..e01aafd03bde 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c | |||
@@ -115,8 +115,6 @@ static void enable_a20_fast(void) | |||
115 | 115 | ||
116 | int enable_a20(void) | 116 | int enable_a20(void) |
117 | { | 117 | { |
118 | int loops = A20_ENABLE_LOOPS; | ||
119 | |||
120 | #if defined(CONFIG_X86_ELAN) | 118 | #if defined(CONFIG_X86_ELAN) |
121 | /* Elan croaks if we try to touch the KBC */ | 119 | /* Elan croaks if we try to touch the KBC */ |
122 | enable_a20_fast(); | 120 | enable_a20_fast(); |
@@ -128,6 +126,7 @@ int enable_a20(void) | |||
128 | enable_a20_kbc(); | 126 | enable_a20_kbc(); |
129 | return 0; | 127 | return 0; |
130 | #else | 128 | #else |
129 | int loops = A20_ENABLE_LOOPS; | ||
131 | while (loops--) { | 130 | while (loops--) { |
132 | /* First, check to see if A20 is already enabled | 131 | /* First, check to see if A20 is already enabled |
133 | (legacy free, etc.) */ | 132 | (legacy free, etc.) */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8161e5a91ca9..d001739d8b06 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -83,6 +83,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | |||
83 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 83 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
84 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 84 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
85 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 85 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o |
86 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | ||
86 | 87 | ||
87 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 88 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
88 | 89 | ||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c49ebcc6c41e..33c5216fd3e1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -242,12 +242,19 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) | |||
242 | 242 | ||
243 | static void __cpuinit acpi_register_lapic(int id, u8 enabled) | 243 | static void __cpuinit acpi_register_lapic(int id, u8 enabled) |
244 | { | 244 | { |
245 | unsigned int ver = 0; | ||
246 | |||
245 | if (!enabled) { | 247 | if (!enabled) { |
246 | ++disabled_cpus; | 248 | ++disabled_cpus; |
247 | return; | 249 | return; |
248 | } | 250 | } |
249 | 251 | ||
250 | generic_processor_info(id, 0); | 252 | #ifdef CONFIG_X86_32 |
253 | if (boot_cpu_physical_apicid != -1U) | ||
254 | ver = apic_version[boot_cpu_physical_apicid]; | ||
255 | #endif | ||
256 | |||
257 | generic_processor_info(id, ver); | ||
251 | } | 258 | } |
252 | 259 | ||
253 | static int __init | 260 | static int __init |
@@ -767,8 +774,13 @@ static void __init acpi_register_lapic_address(unsigned long address) | |||
767 | mp_lapic_addr = address; | 774 | mp_lapic_addr = address; |
768 | 775 | ||
769 | set_fixmap_nocache(FIX_APIC_BASE, address); | 776 | set_fixmap_nocache(FIX_APIC_BASE, address); |
770 | if (boot_cpu_physical_apicid == -1U) | 777 | if (boot_cpu_physical_apicid == -1U) { |
771 | boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); | 778 | boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); |
779 | #ifdef CONFIG_X86_32 | ||
780 | apic_version[boot_cpu_physical_apicid] = | ||
781 | GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
782 | #endif | ||
783 | } | ||
772 | } | 784 | } |
773 | 785 | ||
774 | static int __init early_acpi_parse_madt_lapic_addr_ovr(void) | 786 | static int __init early_acpi_parse_madt_lapic_addr_ovr(void) |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2a609dc3271c..c778e4fa55a2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -248,6 +248,7 @@ ENTRY(resume_userspace) | |||
248 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | 248 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt |
249 | # setting need_resched or sigpending | 249 | # setting need_resched or sigpending |
250 | # between sampling and the iret | 250 | # between sampling and the iret |
251 | TRACE_IRQS_OFF | ||
251 | movl TI_flags(%ebp), %ecx | 252 | movl TI_flags(%ebp), %ecx |
252 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done on | 253 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done on |
253 | # int/exception return? | 254 | # int/exception return? |
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c index e8edd63ab000..9b08e852fd1a 100644 --- a/arch/x86/kernel/geode_32.c +++ b/arch/x86/kernel/geode_32.c | |||
@@ -166,6 +166,8 @@ int geode_has_vsa2(void) | |||
166 | static int has_vsa2 = -1; | 166 | static int has_vsa2 = -1; |
167 | 167 | ||
168 | if (has_vsa2 == -1) { | 168 | if (has_vsa2 == -1) { |
169 | u16 val; | ||
170 | |||
169 | /* | 171 | /* |
170 | * The VSA has virtual registers that we can query for a | 172 | * The VSA has virtual registers that we can query for a |
171 | * signature. | 173 | * signature. |
@@ -173,7 +175,8 @@ int geode_has_vsa2(void) | |||
173 | outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); | 175 | outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); |
174 | outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); | 176 | outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); |
175 | 177 | ||
176 | has_vsa2 = (inw(VSA_VRC_DATA) == VSA_SIG); | 178 | val = inw(VSA_VRC_DATA); |
179 | has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG); | ||
177 | } | 180 | } |
178 | 181 | ||
179 | return has_vsa2; | 182 | return has_vsa2; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b2cc73768a9d..f7357cc0162c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -189,7 +189,7 @@ default_entry: | |||
189 | * this stage. | 189 | * this stage. |
190 | */ | 190 | */ |
191 | 191 | ||
192 | #define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */ | 192 | #define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ |
193 | 193 | ||
194 | xorl %ebx,%ebx /* %ebx is kept at zero */ | 194 | xorl %ebx,%ebx /* %ebx is kept at zero */ |
195 | 195 | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e03cc952f233..eb9ddd8efb82 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -56,6 +56,11 @@ void __cpuinit mxcsr_feature_mask_init(void) | |||
56 | 56 | ||
57 | void __init init_thread_xstate(void) | 57 | void __init init_thread_xstate(void) |
58 | { | 58 | { |
59 | if (!HAVE_HWFP) { | ||
60 | xstate_size = sizeof(struct i387_soft_struct); | ||
61 | return; | ||
62 | } | ||
63 | |||
59 | if (cpu_has_fxsr) | 64 | if (cpu_has_fxsr) |
60 | xstate_size = sizeof(struct i387_fxsave_struct); | 65 | xstate_size = sizeof(struct i387_fxsave_struct); |
61 | #ifdef CONFIG_X86_32 | 66 | #ifdef CONFIG_X86_32 |
@@ -94,7 +99,7 @@ void __cpuinit fpu_init(void) | |||
94 | int init_fpu(struct task_struct *tsk) | 99 | int init_fpu(struct task_struct *tsk) |
95 | { | 100 | { |
96 | if (tsk_used_math(tsk)) { | 101 | if (tsk_used_math(tsk)) { |
97 | if (tsk == current) | 102 | if (HAVE_HWFP && tsk == current) |
98 | unlazy_fpu(tsk); | 103 | unlazy_fpu(tsk); |
99 | return 0; | 104 | return 0; |
100 | } | 105 | } |
@@ -109,6 +114,15 @@ int init_fpu(struct task_struct *tsk) | |||
109 | return -ENOMEM; | 114 | return -ENOMEM; |
110 | } | 115 | } |
111 | 116 | ||
117 | #ifdef CONFIG_X86_32 | ||
118 | if (!HAVE_HWFP) { | ||
119 | memset(tsk->thread.xstate, 0, xstate_size); | ||
120 | finit(); | ||
121 | set_stopped_child_used_math(tsk); | ||
122 | return 0; | ||
123 | } | ||
124 | #endif | ||
125 | |||
112 | if (cpu_has_fxsr) { | 126 | if (cpu_has_fxsr) { |
113 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 127 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; |
114 | 128 | ||
@@ -330,13 +344,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
330 | struct user_i387_ia32_struct env; | 344 | struct user_i387_ia32_struct env; |
331 | int ret; | 345 | int ret; |
332 | 346 | ||
333 | if (!HAVE_HWFP) | ||
334 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | ||
335 | |||
336 | ret = init_fpu(target); | 347 | ret = init_fpu(target); |
337 | if (ret) | 348 | if (ret) |
338 | return ret; | 349 | return ret; |
339 | 350 | ||
351 | if (!HAVE_HWFP) | ||
352 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | ||
353 | |||
340 | if (!cpu_has_fxsr) { | 354 | if (!cpu_has_fxsr) { |
341 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 355 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
342 | &target->thread.xstate->fsave, 0, | 356 | &target->thread.xstate->fsave, 0, |
@@ -360,15 +374,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
360 | struct user_i387_ia32_struct env; | 374 | struct user_i387_ia32_struct env; |
361 | int ret; | 375 | int ret; |
362 | 376 | ||
363 | if (!HAVE_HWFP) | ||
364 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | ||
365 | |||
366 | ret = init_fpu(target); | 377 | ret = init_fpu(target); |
367 | if (ret) | 378 | if (ret) |
368 | return ret; | 379 | return ret; |
369 | 380 | ||
370 | set_stopped_child_used_math(target); | 381 | set_stopped_child_used_math(target); |
371 | 382 | ||
383 | if (!HAVE_HWFP) | ||
384 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | ||
385 | |||
372 | if (!cpu_has_fxsr) { | 386 | if (!cpu_has_fxsr) { |
373 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 387 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
374 | &target->thread.xstate->fsave, 0, -1); | 388 | &target->thread.xstate->fsave, 0, -1); |
@@ -474,18 +488,18 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) | |||
474 | int restore_i387_ia32(struct _fpstate_ia32 __user *buf) | 488 | int restore_i387_ia32(struct _fpstate_ia32 __user *buf) |
475 | { | 489 | { |
476 | int err; | 490 | int err; |
491 | struct task_struct *tsk = current; | ||
477 | 492 | ||
478 | if (HAVE_HWFP) { | 493 | if (HAVE_HWFP) |
479 | struct task_struct *tsk = current; | ||
480 | |||
481 | clear_fpu(tsk); | 494 | clear_fpu(tsk); |
482 | 495 | ||
483 | if (!used_math()) { | 496 | if (!used_math()) { |
484 | err = init_fpu(tsk); | 497 | err = init_fpu(tsk); |
485 | if (err) | 498 | if (err) |
486 | return err; | 499 | return err; |
487 | } | 500 | } |
488 | 501 | ||
502 | if (HAVE_HWFP) { | ||
489 | if (cpu_has_fxsr) | 503 | if (cpu_has_fxsr) |
490 | err = restore_i387_fxsave(buf); | 504 | err = restore_i387_fxsave(buf); |
491 | else | 505 | else |
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index a40d54fc1fdd..4dc8600d9d20 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c | |||
@@ -2130,14 +2130,10 @@ static inline void __init check_timer(void) | |||
2130 | { | 2130 | { |
2131 | int apic1, pin1, apic2, pin2; | 2131 | int apic1, pin1, apic2, pin2; |
2132 | int vector; | 2132 | int vector; |
2133 | unsigned int ver; | ||
2134 | unsigned long flags; | 2133 | unsigned long flags; |
2135 | 2134 | ||
2136 | local_irq_save(flags); | 2135 | local_irq_save(flags); |
2137 | 2136 | ||
2138 | ver = apic_read(APIC_LVR); | ||
2139 | ver = GET_APIC_VERSION(ver); | ||
2140 | |||
2141 | /* | 2137 | /* |
2142 | * get/set the timer IRQ vector: | 2138 | * get/set the timer IRQ vector: |
2143 | */ | 2139 | */ |
@@ -2150,15 +2146,11 @@ static inline void __init check_timer(void) | |||
2150 | * mode for the 8259A whenever interrupts are routed | 2146 | * mode for the 8259A whenever interrupts are routed |
2151 | * through I/O APICs. Also IRQ0 has to be enabled in | 2147 | * through I/O APICs. Also IRQ0 has to be enabled in |
2152 | * the 8259A which implies the virtual wire has to be | 2148 | * the 8259A which implies the virtual wire has to be |
2153 | * disabled in the local APIC. Finally timer interrupts | 2149 | * disabled in the local APIC. |
2154 | * need to be acknowledged manually in the 8259A for | ||
2155 | * timer_interrupt() and for the i82489DX when using | ||
2156 | * the NMI watchdog. | ||
2157 | */ | 2150 | */ |
2158 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2151 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2159 | init_8259A(1); | 2152 | init_8259A(1); |
2160 | timer_ack = !cpu_has_tsc; | 2153 | timer_ack = 1; |
2161 | timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | ||
2162 | if (timer_over_8254 > 0) | 2154 | if (timer_over_8254 > 0) |
2163 | enable_8259A_irq(0); | 2155 | enable_8259A_irq(0); |
2164 | 2156 | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 08a30986d472..87edf1ceb1df 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include <linux/clocksource.h> | 19 | #include <linux/clocksource.h> |
20 | #include <linux/kvm_para.h> | 20 | #include <linux/kvm_para.h> |
21 | #include <asm/pvclock.h> | ||
21 | #include <asm/arch_hooks.h> | 22 | #include <asm/arch_hooks.h> |
22 | #include <asm/msr.h> | 23 | #include <asm/msr.h> |
23 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg) | |||
36 | early_param("no-kvmclock", parse_no_kvmclock); | 37 | early_param("no-kvmclock", parse_no_kvmclock); |
37 | 38 | ||
38 | /* The hypervisor will put information about time periodically here */ | 39 | /* The hypervisor will put information about time periodically here */ |
39 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); | 40 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); |
40 | #define get_clock(cpu, field) per_cpu(hv_clock, cpu).field | 41 | static struct pvclock_wall_clock wall_clock; |
41 | 42 | ||
42 | static inline u64 kvm_get_delta(u64 last_tsc) | ||
43 | { | ||
44 | int cpu = smp_processor_id(); | ||
45 | u64 delta = native_read_tsc() - last_tsc; | ||
46 | return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; | ||
47 | } | ||
48 | |||
49 | static struct kvm_wall_clock wall_clock; | ||
50 | static cycle_t kvm_clock_read(void); | ||
51 | /* | 43 | /* |
52 | * The wallclock is the time of day when we booted. Since then, some time may | 44 | * The wallclock is the time of day when we booted. Since then, some time may |
53 | * have elapsed since the hypervisor wrote the data. So we try to account for | 45 | * have elapsed since the hypervisor wrote the data. So we try to account for |
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void); | |||
55 | */ | 47 | */ |
56 | static unsigned long kvm_get_wallclock(void) | 48 | static unsigned long kvm_get_wallclock(void) |
57 | { | 49 | { |
58 | u32 wc_sec, wc_nsec; | 50 | struct pvclock_vcpu_time_info *vcpu_time; |
59 | u64 delta; | ||
60 | struct timespec ts; | 51 | struct timespec ts; |
61 | int version, nsec; | ||
62 | int low, high; | 52 | int low, high; |
63 | 53 | ||
64 | low = (int)__pa(&wall_clock); | 54 | low = (int)__pa(&wall_clock); |
65 | high = ((u64)__pa(&wall_clock) >> 32); | 55 | high = ((u64)__pa(&wall_clock) >> 32); |
56 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | ||
66 | 57 | ||
67 | delta = kvm_clock_read(); | 58 | vcpu_time = &get_cpu_var(hv_clock); |
59 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); | ||
60 | put_cpu_var(hv_clock); | ||
68 | 61 | ||
69 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | 62 | return ts.tv_sec; |
70 | do { | ||
71 | version = wall_clock.wc_version; | ||
72 | rmb(); | ||
73 | wc_sec = wall_clock.wc_sec; | ||
74 | wc_nsec = wall_clock.wc_nsec; | ||
75 | rmb(); | ||
76 | } while ((wall_clock.wc_version != version) || (version & 1)); | ||
77 | |||
78 | delta = kvm_clock_read() - delta; | ||
79 | delta += wc_nsec; | ||
80 | nsec = do_div(delta, NSEC_PER_SEC); | ||
81 | set_normalized_timespec(&ts, wc_sec + delta, nsec); | ||
82 | /* | ||
83 | * Of all mechanisms of time adjustment I've tested, this one | ||
84 | * was the champion! | ||
85 | */ | ||
86 | return ts.tv_sec + 1; | ||
87 | } | 63 | } |
88 | 64 | ||
89 | static int kvm_set_wallclock(unsigned long now) | 65 | static int kvm_set_wallclock(unsigned long now) |
90 | { | 66 | { |
91 | return 0; | 67 | return -1; |
92 | } | 68 | } |
93 | 69 | ||
94 | /* | ||
95 | * This is our read_clock function. The host puts an tsc timestamp each time | ||
96 | * it updates a new time. Without the tsc adjustment, we can have a situation | ||
97 | * in which a vcpu starts to run earlier (smaller system_time), but probes | ||
98 | * time later (compared to another vcpu), leading to backwards time | ||
99 | */ | ||
100 | static cycle_t kvm_clock_read(void) | 70 | static cycle_t kvm_clock_read(void) |
101 | { | 71 | { |
102 | u64 last_tsc, now; | 72 | struct pvclock_vcpu_time_info *src; |
103 | int cpu; | 73 | cycle_t ret; |
104 | 74 | ||
105 | preempt_disable(); | 75 | src = &get_cpu_var(hv_clock); |
106 | cpu = smp_processor_id(); | 76 | ret = pvclock_clocksource_read(src); |
107 | 77 | put_cpu_var(hv_clock); | |
108 | last_tsc = get_clock(cpu, tsc_timestamp); | 78 | return ret; |
109 | now = get_clock(cpu, system_time); | ||
110 | |||
111 | now += kvm_get_delta(last_tsc); | ||
112 | preempt_enable(); | ||
113 | |||
114 | return now; | ||
115 | } | 79 | } |
80 | |||
116 | static struct clocksource kvm_clock = { | 81 | static struct clocksource kvm_clock = { |
117 | .name = "kvm-clock", | 82 | .name = "kvm-clock", |
118 | .read = kvm_clock_read, | 83 | .read = kvm_clock_read, |
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = { | |||
123 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 88 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
124 | }; | 89 | }; |
125 | 90 | ||
126 | static int kvm_register_clock(void) | 91 | static int kvm_register_clock(char *txt) |
127 | { | 92 | { |
128 | int cpu = smp_processor_id(); | 93 | int cpu = smp_processor_id(); |
129 | int low, high; | 94 | int low, high; |
130 | low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; | 95 | low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; |
131 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | 96 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); |
132 | 97 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | |
98 | cpu, high, low, txt); | ||
133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 99 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); |
134 | } | 100 | } |
135 | 101 | ||
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void) | |||
140 | * Now that the first cpu already had this clocksource initialized, | 106 | * Now that the first cpu already had this clocksource initialized, |
141 | * we shouldn't fail. | 107 | * we shouldn't fail. |
142 | */ | 108 | */ |
143 | WARN_ON(kvm_register_clock()); | 109 | WARN_ON(kvm_register_clock("secondary cpu clock")); |
144 | /* ok, done with our trickery, call native */ | 110 | /* ok, done with our trickery, call native */ |
145 | setup_secondary_APIC_clock(); | 111 | setup_secondary_APIC_clock(); |
146 | } | 112 | } |
147 | #endif | 113 | #endif |
148 | 114 | ||
115 | #ifdef CONFIG_SMP | ||
116 | void __init kvm_smp_prepare_boot_cpu(void) | ||
117 | { | ||
118 | WARN_ON(kvm_register_clock("primary cpu clock")); | ||
119 | native_smp_prepare_boot_cpu(); | ||
120 | } | ||
121 | #endif | ||
122 | |||
149 | /* | 123 | /* |
150 | * After the clock is registered, the host will keep writing to the | 124 | * After the clock is registered, the host will keep writing to the |
151 | * registered memory location. If the guest happens to shutdown, this memory | 125 | * registered memory location. If the guest happens to shutdown, this memory |
@@ -174,7 +148,7 @@ void __init kvmclock_init(void) | |||
174 | return; | 148 | return; |
175 | 149 | ||
176 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | 150 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { |
177 | if (kvm_register_clock()) | 151 | if (kvm_register_clock("boot clock")) |
178 | return; | 152 | return; |
179 | pv_time_ops.get_wallclock = kvm_get_wallclock; | 153 | pv_time_ops.get_wallclock = kvm_get_wallclock; |
180 | pv_time_ops.set_wallclock = kvm_set_wallclock; | 154 | pv_time_ops.set_wallclock = kvm_set_wallclock; |
@@ -182,6 +156,9 @@ void __init kvmclock_init(void) | |||
182 | #ifdef CONFIG_X86_LOCAL_APIC | 156 | #ifdef CONFIG_X86_LOCAL_APIC |
183 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | 157 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; |
184 | #endif | 158 | #endif |
159 | #ifdef CONFIG_SMP | ||
160 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | ||
161 | #endif | ||
185 | machine_ops.shutdown = kvm_shutdown; | 162 | machine_ops.shutdown = kvm_shutdown; |
186 | #ifdef CONFIG_KEXEC | 163 | #ifdef CONFIG_KEXEC |
187 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 164 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 3cad17fe026b..07c0f828f488 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
@@ -155,6 +155,7 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) | |||
155 | wrmsr(msr, value, dummy); | 155 | wrmsr(msr, value, dummy); |
156 | return 0; | 156 | return 0; |
157 | } | 157 | } |
158 | EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); | ||
158 | 159 | ||
159 | int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) | 160 | int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) |
160 | { | 161 | { |
@@ -222,6 +223,7 @@ int geode_mfgpt_alloc_timer(int timer, int domain) | |||
222 | /* No timers available - too bad */ | 223 | /* No timers available - too bad */ |
223 | return -1; | 224 | return -1; |
224 | } | 225 | } |
226 | EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); | ||
225 | 227 | ||
226 | 228 | ||
227 | #ifdef CONFIG_GEODE_MFGPT_TIMER | 229 | #ifdef CONFIG_GEODE_MFGPT_TIMER |
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 11b14bbaa61e..84160f74eeb0 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c | |||
@@ -26,7 +26,6 @@ | |||
26 | 26 | ||
27 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
28 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
29 | #include <asm/timer.h> | ||
30 | 29 | ||
31 | #include "mach_traps.h" | 30 | #include "mach_traps.h" |
32 | 31 | ||
@@ -82,7 +81,7 @@ int __init check_nmi_watchdog(void) | |||
82 | 81 | ||
83 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 82 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
84 | if (!prev_nmi_count) | 83 | if (!prev_nmi_count) |
85 | goto error; | 84 | return -1; |
86 | 85 | ||
87 | printk(KERN_INFO "Testing NMI watchdog ... "); | 86 | printk(KERN_INFO "Testing NMI watchdog ... "); |
88 | 87 | ||
@@ -119,7 +118,7 @@ int __init check_nmi_watchdog(void) | |||
119 | if (!atomic_read(&nmi_active)) { | 118 | if (!atomic_read(&nmi_active)) { |
120 | kfree(prev_nmi_count); | 119 | kfree(prev_nmi_count); |
121 | atomic_set(&nmi_active, -1); | 120 | atomic_set(&nmi_active, -1); |
122 | goto error; | 121 | return -1; |
123 | } | 122 | } |
124 | printk("OK.\n"); | 123 | printk("OK.\n"); |
125 | 124 | ||
@@ -130,10 +129,6 @@ int __init check_nmi_watchdog(void) | |||
130 | 129 | ||
131 | kfree(prev_nmi_count); | 130 | kfree(prev_nmi_count); |
132 | return 0; | 131 | return 0; |
133 | error: | ||
134 | timer_ack = !cpu_has_tsc; | ||
135 | |||
136 | return -1; | ||
137 | } | 132 | } |
138 | 133 | ||
139 | static int __init setup_nmi_watchdog(char *str) | 134 | static int __init setup_nmi_watchdog(char *str) |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index c5ef1af8e79d..dc00a1331ace 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -378,6 +378,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
378 | struct page *page; | 378 | struct page *page; |
379 | unsigned long dma_mask = 0; | 379 | unsigned long dma_mask = 0; |
380 | dma_addr_t bus; | 380 | dma_addr_t bus; |
381 | int noretry = 0; | ||
381 | 382 | ||
382 | /* ignore region specifiers */ | 383 | /* ignore region specifiers */ |
383 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | 384 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); |
@@ -397,20 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
397 | if (dev->dma_mask == NULL) | 398 | if (dev->dma_mask == NULL) |
398 | return NULL; | 399 | return NULL; |
399 | 400 | ||
400 | /* Don't invoke OOM killer */ | 401 | /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ |
401 | gfp |= __GFP_NORETRY; | 402 | if (gfp & __GFP_DMA) |
403 | noretry = 1; | ||
402 | 404 | ||
403 | #ifdef CONFIG_X86_64 | 405 | #ifdef CONFIG_X86_64 |
404 | /* Why <=? Even when the mask is smaller than 4GB it is often | 406 | /* Why <=? Even when the mask is smaller than 4GB it is often |
405 | larger than 16MB and in this case we have a chance of | 407 | larger than 16MB and in this case we have a chance of |
406 | finding fitting memory in the next higher zone first. If | 408 | finding fitting memory in the next higher zone first. If |
407 | not retry with true GFP_DMA. -AK */ | 409 | not retry with true GFP_DMA. -AK */ |
408 | if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) | 410 | if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { |
409 | gfp |= GFP_DMA32; | 411 | gfp |= GFP_DMA32; |
412 | if (dma_mask < DMA_32BIT_MASK) | ||
413 | noretry = 1; | ||
414 | } | ||
410 | #endif | 415 | #endif |
411 | 416 | ||
412 | again: | 417 | again: |
413 | page = dma_alloc_pages(dev, gfp, get_order(size)); | 418 | page = dma_alloc_pages(dev, |
419 | noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); | ||
414 | if (page == NULL) | 420 | if (page == NULL) |
415 | return NULL; | 421 | return NULL; |
416 | 422 | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c07455d1695f..aa8ec928caa8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/kdebug.h> | 26 | #include <linux/kdebug.h> |
27 | #include <linux/scatterlist.h> | 27 | #include <linux/scatterlist.h> |
28 | #include <linux/iommu-helper.h> | 28 | #include <linux/iommu-helper.h> |
29 | #include <linux/sysdev.h> | ||
29 | #include <asm/atomic.h> | 30 | #include <asm/atomic.h> |
30 | #include <asm/io.h> | 31 | #include <asm/io.h> |
31 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
@@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) | |||
548 | return aper_base; | 549 | return aper_base; |
549 | } | 550 | } |
550 | 551 | ||
552 | static int gart_resume(struct sys_device *dev) | ||
553 | { | ||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | static int gart_suspend(struct sys_device *dev, pm_message_t state) | ||
558 | { | ||
559 | return -EINVAL; | ||
560 | } | ||
561 | |||
562 | static struct sysdev_class gart_sysdev_class = { | ||
563 | .name = "gart", | ||
564 | .suspend = gart_suspend, | ||
565 | .resume = gart_resume, | ||
566 | |||
567 | }; | ||
568 | |||
569 | static struct sys_device device_gart = { | ||
570 | .id = 0, | ||
571 | .cls = &gart_sysdev_class, | ||
572 | }; | ||
573 | |||
551 | /* | 574 | /* |
552 | * Private Northbridge GATT initialization in case we cannot use the | 575 | * Private Northbridge GATT initialization in case we cannot use the |
553 | * AGP driver for some reason. | 576 | * AGP driver for some reason. |
@@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
558 | unsigned aper_base, new_aper_base; | 581 | unsigned aper_base, new_aper_base; |
559 | struct pci_dev *dev; | 582 | struct pci_dev *dev; |
560 | void *gatt; | 583 | void *gatt; |
561 | int i; | 584 | int i, error; |
562 | 585 | ||
563 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 586 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); |
564 | aper_size = aper_base = info->aper_size = 0; | 587 | aper_size = aper_base = info->aper_size = 0; |
@@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
606 | 629 | ||
607 | pci_write_config_dword(dev, 0x90, ctl); | 630 | pci_write_config_dword(dev, 0x90, ctl); |
608 | } | 631 | } |
632 | |||
633 | error = sysdev_class_register(&gart_sysdev_class); | ||
634 | if (!error) | ||
635 | error = sysdev_register(&device_gart); | ||
636 | if (error) | ||
637 | panic("Could not register gart_sysdev -- would corrupt data on next suspend"); | ||
609 | flush_gart(); | 638 | flush_gart(); |
610 | 639 | ||
611 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", | 640 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476dfbb60d..e2db9ac5c61c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -333,6 +333,7 @@ void flush_thread(void) | |||
333 | /* | 333 | /* |
334 | * Forget coprocessor state.. | 334 | * Forget coprocessor state.. |
335 | */ | 335 | */ |
336 | tsk->fpu_counter = 0; | ||
336 | clear_fpu(tsk); | 337 | clear_fpu(tsk); |
337 | clear_used_math(); | 338 | clear_used_math(); |
338 | } | 339 | } |
@@ -649,8 +650,11 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct | |||
649 | /* If the task has used fpu the last 5 timeslices, just do a full | 650 | /* If the task has used fpu the last 5 timeslices, just do a full |
650 | * restore of the math state immediately to avoid the trap; the | 651 | * restore of the math state immediately to avoid the trap; the |
651 | * chances of needing FPU soon are obviously high now | 652 | * chances of needing FPU soon are obviously high now |
653 | * | ||
654 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
655 | * which can sleep in the case of !tsk_used_math() | ||
652 | */ | 656 | */ |
653 | if (next_p->fpu_counter > 5) | 657 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) |
654 | math_state_restore(); | 658 | math_state_restore(); |
655 | 659 | ||
656 | /* | 660 | /* |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index cce47f7fbf22..f73cfbc2c281 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -306,6 +306,7 @@ void flush_thread(void) | |||
306 | /* | 306 | /* |
307 | * Forget coprocessor state.. | 307 | * Forget coprocessor state.. |
308 | */ | 308 | */ |
309 | tsk->fpu_counter = 0; | ||
309 | clear_fpu(tsk); | 310 | clear_fpu(tsk); |
310 | clear_used_math(); | 311 | clear_used_math(); |
311 | } | 312 | } |
@@ -669,8 +670,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
669 | /* If the task has used fpu the last 5 timeslices, just do a full | 670 | /* If the task has used fpu the last 5 timeslices, just do a full |
670 | * restore of the math state immediately to avoid the trap; the | 671 | * restore of the math state immediately to avoid the trap; the |
671 | * chances of needing FPU soon are obviously high now | 672 | * chances of needing FPU soon are obviously high now |
673 | * | ||
674 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
675 | * which can sleep in the case of !tsk_used_math() | ||
672 | */ | 676 | */ |
673 | if (next_p->fpu_counter>5) | 677 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) |
674 | math_state_restore(); | 678 | math_state_restore(); |
675 | return prev_p; | 679 | return prev_p; |
676 | } | 680 | } |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c new file mode 100644 index 000000000000..05fbe9a0325a --- /dev/null +++ b/arch/x86/kernel/pvclock.c | |||
@@ -0,0 +1,141 @@ | |||
1 | /* paravirtual clock -- common code used by kvm/xen | ||
2 | |||
3 | This program is free software; you can redistribute it and/or modify | ||
4 | it under the terms of the GNU General Public License as published by | ||
5 | the Free Software Foundation; either version 2 of the License, or | ||
6 | (at your option) any later version. | ||
7 | |||
8 | This program is distributed in the hope that it will be useful, | ||
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | GNU General Public License for more details. | ||
12 | |||
13 | You should have received a copy of the GNU General Public License | ||
14 | along with this program; if not, write to the Free Software | ||
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
16 | */ | ||
17 | |||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/percpu.h> | ||
20 | #include <asm/pvclock.h> | ||
21 | |||
22 | /* | ||
23 | * These are perodically updated | ||
24 | * xen: magic shared_info page | ||
25 | * kvm: gpa registered via msr | ||
26 | * and then copied here. | ||
27 | */ | ||
28 | struct pvclock_shadow_time { | ||
29 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
30 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
31 | u32 tsc_to_nsec_mul; | ||
32 | int tsc_shift; | ||
33 | u32 version; | ||
34 | }; | ||
35 | |||
36 | /* | ||
37 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | ||
38 | * yielding a 64-bit result. | ||
39 | */ | ||
40 | static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | ||
41 | { | ||
42 | u64 product; | ||
43 | #ifdef __i386__ | ||
44 | u32 tmp1, tmp2; | ||
45 | #endif | ||
46 | |||
47 | if (shift < 0) | ||
48 | delta >>= -shift; | ||
49 | else | ||
50 | delta <<= shift; | ||
51 | |||
52 | #ifdef __i386__ | ||
53 | __asm__ ( | ||
54 | "mul %5 ; " | ||
55 | "mov %4,%%eax ; " | ||
56 | "mov %%edx,%4 ; " | ||
57 | "mul %5 ; " | ||
58 | "xor %5,%5 ; " | ||
59 | "add %4,%%eax ; " | ||
60 | "adc %5,%%edx ; " | ||
61 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | ||
62 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | ||
63 | #elif __x86_64__ | ||
64 | __asm__ ( | ||
65 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | ||
66 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | ||
67 | #else | ||
68 | #error implement me! | ||
69 | #endif | ||
70 | |||
71 | return product; | ||
72 | } | ||
73 | |||
74 | static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) | ||
75 | { | ||
76 | u64 delta = native_read_tsc() - shadow->tsc_timestamp; | ||
77 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Reads a consistent set of time-base values from hypervisor, | ||
82 | * into a shadow data area. | ||
83 | */ | ||
84 | static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | ||
85 | struct pvclock_vcpu_time_info *src) | ||
86 | { | ||
87 | do { | ||
88 | dst->version = src->version; | ||
89 | rmb(); /* fetch version before data */ | ||
90 | dst->tsc_timestamp = src->tsc_timestamp; | ||
91 | dst->system_timestamp = src->system_time; | ||
92 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
93 | dst->tsc_shift = src->tsc_shift; | ||
94 | rmb(); /* test version after fetching data */ | ||
95 | } while ((src->version & 1) || (dst->version != src->version)); | ||
96 | |||
97 | return dst->version; | ||
98 | } | ||
99 | |||
100 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | ||
101 | { | ||
102 | struct pvclock_shadow_time shadow; | ||
103 | unsigned version; | ||
104 | cycle_t ret, offset; | ||
105 | |||
106 | do { | ||
107 | version = pvclock_get_time_values(&shadow, src); | ||
108 | barrier(); | ||
109 | offset = pvclock_get_nsec_offset(&shadow); | ||
110 | ret = shadow.system_timestamp + offset; | ||
111 | barrier(); | ||
112 | } while (version != src->version); | ||
113 | |||
114 | return ret; | ||
115 | } | ||
116 | |||
117 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | ||
118 | struct pvclock_vcpu_time_info *vcpu_time, | ||
119 | struct timespec *ts) | ||
120 | { | ||
121 | u32 version; | ||
122 | u64 delta; | ||
123 | struct timespec now; | ||
124 | |||
125 | /* get wallclock at system boot */ | ||
126 | do { | ||
127 | version = wall_clock->version; | ||
128 | rmb(); /* fetch version before time */ | ||
129 | now.tv_sec = wall_clock->sec; | ||
130 | now.tv_nsec = wall_clock->nsec; | ||
131 | rmb(); /* fetch time before checking version */ | ||
132 | } while ((wall_clock->version & 1) || (version != wall_clock->version)); | ||
133 | |||
134 | delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ | ||
135 | delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; | ||
136 | |||
137 | now.tv_nsec = do_div(delta, NSEC_PER_SEC); | ||
138 | now.tv_sec = delta; | ||
139 | |||
140 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | ||
141 | } | ||
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 9615eee9b775..05191bbc68b8 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -4,6 +4,8 @@ | |||
4 | #include <linux/acpi.h> | 4 | #include <linux/acpi.h> |
5 | #include <linux/bcd.h> | 5 | #include <linux/bcd.h> |
6 | #include <linux/mc146818rtc.h> | 6 | #include <linux/mc146818rtc.h> |
7 | #include <linux/platform_device.h> | ||
8 | #include <linux/pnp.h> | ||
7 | 9 | ||
8 | #include <asm/time.h> | 10 | #include <asm/time.h> |
9 | #include <asm/vsyscall.h> | 11 | #include <asm/vsyscall.h> |
@@ -197,3 +199,35 @@ unsigned long long native_read_tsc(void) | |||
197 | } | 199 | } |
198 | EXPORT_SYMBOL(native_read_tsc); | 200 | EXPORT_SYMBOL(native_read_tsc); |
199 | 201 | ||
202 | |||
203 | static struct resource rtc_resources[] = { | ||
204 | [0] = { | ||
205 | .start = RTC_PORT(0), | ||
206 | .end = RTC_PORT(1), | ||
207 | .flags = IORESOURCE_IO, | ||
208 | }, | ||
209 | [1] = { | ||
210 | .start = RTC_IRQ, | ||
211 | .end = RTC_IRQ, | ||
212 | .flags = IORESOURCE_IRQ, | ||
213 | } | ||
214 | }; | ||
215 | |||
216 | static struct platform_device rtc_device = { | ||
217 | .name = "rtc_cmos", | ||
218 | .id = -1, | ||
219 | .resource = rtc_resources, | ||
220 | .num_resources = ARRAY_SIZE(rtc_resources), | ||
221 | }; | ||
222 | |||
223 | static __init int add_rtc_cmos(void) | ||
224 | { | ||
225 | #ifdef CONFIG_PNP | ||
226 | if (!pnp_platform_devices) | ||
227 | platform_device_register(&rtc_device); | ||
228 | #else | ||
229 | platform_device_register(&rtc_device); | ||
230 | #endif /* CONFIG_PNP */ | ||
231 | return 0; | ||
232 | } | ||
233 | device_initcall(add_rtc_cmos); | ||
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 2c5f8b213e86..5a2f8e063887 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
@@ -532,10 +532,16 @@ static void __init reserve_crashkernel(void) | |||
532 | (unsigned long)(crash_size >> 20), | 532 | (unsigned long)(crash_size >> 20), |
533 | (unsigned long)(crash_base >> 20), | 533 | (unsigned long)(crash_base >> 20), |
534 | (unsigned long)(total_mem >> 20)); | 534 | (unsigned long)(total_mem >> 20)); |
535 | |||
536 | if (reserve_bootmem(crash_base, crash_size, | ||
537 | BOOTMEM_EXCLUSIVE) < 0) { | ||
538 | printk(KERN_INFO "crashkernel reservation " | ||
539 | "failed - memory is in use\n"); | ||
540 | return; | ||
541 | } | ||
542 | |||
535 | crashk_res.start = crash_base; | 543 | crashk_res.start = crash_base; |
536 | crashk_res.end = crash_base + crash_size - 1; | 544 | crashk_res.end = crash_base + crash_size - 1; |
537 | reserve_bootmem(crash_base, crash_size, | ||
538 | BOOTMEM_DEFAULT); | ||
539 | } else | 545 | } else |
540 | printk(KERN_INFO "crashkernel reservation failed - " | 546 | printk(KERN_INFO "crashkernel reservation failed - " |
541 | "you have to specify a base address\n"); | 547 | "you have to specify a base address\n"); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 38988491c622..56078d61c793 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1190,6 +1190,7 @@ static void __init smp_cpu_index_default(void) | |||
1190 | */ | 1190 | */ |
1191 | void __init native_smp_prepare_cpus(unsigned int max_cpus) | 1191 | void __init native_smp_prepare_cpus(unsigned int max_cpus) |
1192 | { | 1192 | { |
1193 | preempt_disable(); | ||
1193 | nmi_watchdog_default(); | 1194 | nmi_watchdog_default(); |
1194 | smp_cpu_index_default(); | 1195 | smp_cpu_index_default(); |
1195 | current_cpu_data = boot_cpu_data; | 1196 | current_cpu_data = boot_cpu_data; |
@@ -1206,7 +1207,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1206 | if (smp_sanity_check(max_cpus) < 0) { | 1207 | if (smp_sanity_check(max_cpus) < 0) { |
1207 | printk(KERN_INFO "SMP disabled\n"); | 1208 | printk(KERN_INFO "SMP disabled\n"); |
1208 | disable_smp(); | 1209 | disable_smp(); |
1209 | return; | 1210 | goto out; |
1210 | } | 1211 | } |
1211 | 1212 | ||
1212 | preempt_disable(); | 1213 | preempt_disable(); |
@@ -1246,6 +1247,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1246 | printk(KERN_INFO "CPU%d: ", 0); | 1247 | printk(KERN_INFO "CPU%d: ", 0); |
1247 | print_cpu_info(&cpu_data(0)); | 1248 | print_cpu_info(&cpu_data(0)); |
1248 | setup_boot_clock(); | 1249 | setup_boot_clock(); |
1250 | out: | ||
1251 | preempt_enable(); | ||
1249 | } | 1252 | } |
1250 | /* | 1253 | /* |
1251 | * Early setup to make printk work. | 1254 | * Early setup to make printk work. |
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index bde6f63e15d5..08d752de4eee 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c | |||
@@ -544,6 +544,7 @@ vm86_trap: | |||
544 | #define DO_ERROR(trapnr, signr, str, name) \ | 544 | #define DO_ERROR(trapnr, signr, str, name) \ |
545 | void do_##name(struct pt_regs *regs, long error_code) \ | 545 | void do_##name(struct pt_regs *regs, long error_code) \ |
546 | { \ | 546 | { \ |
547 | trace_hardirqs_fixup(); \ | ||
547 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 548 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
548 | == NOTIFY_STOP) \ | 549 | == NOTIFY_STOP) \ |
549 | return; \ | 550 | return; \ |
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 068759db63dd..65b70637ad97 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c | |||
@@ -14,7 +14,10 @@ | |||
14 | 14 | ||
15 | #include "mach_timer.h" | 15 | #include "mach_timer.h" |
16 | 16 | ||
17 | static int tsc_disabled; | 17 | /* native_sched_clock() is called before tsc_init(), so |
18 | we must start with the TSC soft disabled to prevent | ||
19 | erroneous rdtsc usage on !cpu_has_tsc processors */ | ||
20 | static int tsc_disabled = -1; | ||
18 | 21 | ||
19 | /* | 22 | /* |
20 | * On some systems the TSC frequency does not | 23 | * On some systems the TSC frequency does not |
@@ -402,25 +405,20 @@ void __init tsc_init(void) | |||
402 | { | 405 | { |
403 | int cpu; | 406 | int cpu; |
404 | 407 | ||
405 | if (!cpu_has_tsc || tsc_disabled) { | 408 | if (!cpu_has_tsc || tsc_disabled > 0) |
406 | /* Disable the TSC in case of !cpu_has_tsc */ | ||
407 | tsc_disabled = 1; | ||
408 | return; | 409 | return; |
409 | } | ||
410 | 410 | ||
411 | cpu_khz = calculate_cpu_khz(); | 411 | cpu_khz = calculate_cpu_khz(); |
412 | tsc_khz = cpu_khz; | 412 | tsc_khz = cpu_khz; |
413 | 413 | ||
414 | if (!cpu_khz) { | 414 | if (!cpu_khz) { |
415 | mark_tsc_unstable("could not calculate TSC khz"); | 415 | mark_tsc_unstable("could not calculate TSC khz"); |
416 | /* | ||
417 | * We need to disable the TSC completely in this case | ||
418 | * to prevent sched_clock() from using it. | ||
419 | */ | ||
420 | tsc_disabled = 1; | ||
421 | return; | 416 | return; |
422 | } | 417 | } |
423 | 418 | ||
419 | /* now allow native_sched_clock() to use rdtsc */ | ||
420 | tsc_disabled = 0; | ||
421 | |||
424 | printk("Detected %lu.%03lu MHz processor.\n", | 422 | printk("Detected %lu.%03lu MHz processor.\n", |
425 | (unsigned long)cpu_khz / 1000, | 423 | (unsigned long)cpu_khz / 1000, |
426 | (unsigned long)cpu_khz % 1000); | 424 | (unsigned long)cpu_khz % 1000); |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 7c077a9d9777..3829aa7b663f 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -200,10 +200,12 @@ int __pit_timer_fn(struct kvm_kpit_state *ps) | |||
200 | 200 | ||
201 | atomic_inc(&pt->pending); | 201 | atomic_inc(&pt->pending); |
202 | smp_mb__after_atomic_inc(); | 202 | smp_mb__after_atomic_inc(); |
203 | /* FIXME: handle case where the guest is in guest mode */ | 203 | if (vcpu0) { |
204 | if (vcpu0 && waitqueue_active(&vcpu0->wq)) { | 204 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); |
205 | vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 205 | if (waitqueue_active(&vcpu0->wq)) { |
206 | wake_up_interruptible(&vcpu0->wq); | 206 | vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
207 | wake_up_interruptible(&vcpu0->wq); | ||
208 | } | ||
207 | } | 209 | } |
208 | 210 | ||
209 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); | 211 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); |
@@ -237,6 +239,19 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | |||
237 | return HRTIMER_NORESTART; | 239 | return HRTIMER_NORESTART; |
238 | } | 240 | } |
239 | 241 | ||
242 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | ||
243 | { | ||
244 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | ||
245 | struct hrtimer *timer; | ||
246 | |||
247 | if (vcpu->vcpu_id != 0 || !pit) | ||
248 | return; | ||
249 | |||
250 | timer = &pit->pit_state.pit_timer.timer; | ||
251 | if (hrtimer_cancel(timer)) | ||
252 | hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); | ||
253 | } | ||
254 | |||
240 | static void destroy_pit_timer(struct kvm_kpit_timer *pt) | 255 | static void destroy_pit_timer(struct kvm_kpit_timer *pt) |
241 | { | 256 | { |
242 | pr_debug("pit: execute del timer!\n"); | 257 | pr_debug("pit: execute del timer!\n"); |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index ce1f583459b1..76d736b5f664 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -94,3 +94,9 @@ void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) | |||
94 | /* TODO: PIT, RTC etc. */ | 94 | /* TODO: PIT, RTC etc. */ |
95 | } | 95 | } |
96 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); | 96 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); |
97 | |||
98 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu) | ||
99 | { | ||
100 | __kvm_migrate_apic_timer(vcpu); | ||
101 | __kvm_migrate_pit_timer(vcpu); | ||
102 | } | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 1802134b836f..2a15be2275c0 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -84,6 +84,8 @@ void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); | |||
84 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); | 84 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); |
85 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); | 85 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); |
86 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); | 86 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); |
87 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); | ||
88 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu); | ||
87 | 89 | ||
88 | int pit_has_pending_timer(struct kvm_vcpu *vcpu); | 90 | int pit_has_pending_timer(struct kvm_vcpu *vcpu); |
89 | int apic_has_pending_timer(struct kvm_vcpu *vcpu); | 91 | int apic_has_pending_timer(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c297c50eba63..ebc03f5ae162 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -940,6 +940,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic) | |||
940 | wait_queue_head_t *q = &apic->vcpu->wq; | 940 | wait_queue_head_t *q = &apic->vcpu->wq; |
941 | 941 | ||
942 | atomic_inc(&apic->timer.pending); | 942 | atomic_inc(&apic->timer.pending); |
943 | set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); | ||
943 | if (waitqueue_active(q)) { | 944 | if (waitqueue_active(q)) { |
944 | apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 945 | apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
945 | wake_up_interruptible(q); | 946 | wake_up_interruptible(q); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7246b60afb96..7e7c3969f7a2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -640,6 +640,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
640 | rmap_remove(kvm, spte); | 640 | rmap_remove(kvm, spte); |
641 | --kvm->stat.lpages; | 641 | --kvm->stat.lpages; |
642 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); | 642 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); |
643 | spte = NULL; | ||
643 | write_protected = 1; | 644 | write_protected = 1; |
644 | } | 645 | } |
645 | spte = rmap_next(kvm, rmapp, spte); | 646 | spte = rmap_next(kvm, rmapp, spte); |
@@ -658,7 +659,7 @@ static int is_empty_shadow_page(u64 *spt) | |||
658 | u64 *end; | 659 | u64 *end; |
659 | 660 | ||
660 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) | 661 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) |
661 | if (*pos != shadow_trap_nonpresent_pte) { | 662 | if (is_shadow_present_pte(*pos)) { |
662 | printk(KERN_ERR "%s: %p %llx\n", __func__, | 663 | printk(KERN_ERR "%s: %p %llx\n", __func__, |
663 | pos, *pos); | 664 | pos, *pos); |
664 | return 0; | 665 | return 0; |
@@ -1082,10 +1083,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1082 | struct kvm_mmu_page *shadow; | 1083 | struct kvm_mmu_page *shadow; |
1083 | 1084 | ||
1084 | spte |= PT_WRITABLE_MASK; | 1085 | spte |= PT_WRITABLE_MASK; |
1085 | if (user_fault) { | ||
1086 | mmu_unshadow(vcpu->kvm, gfn); | ||
1087 | goto unshadowed; | ||
1088 | } | ||
1089 | 1086 | ||
1090 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | 1087 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); |
1091 | if (shadow || | 1088 | if (shadow || |
@@ -1102,8 +1099,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1102 | } | 1099 | } |
1103 | } | 1100 | } |
1104 | 1101 | ||
1105 | unshadowed: | ||
1106 | |||
1107 | if (pte_access & ACC_WRITE_MASK) | 1102 | if (pte_access & ACC_WRITE_MASK) |
1108 | mark_page_dirty(vcpu->kvm, gfn); | 1103 | mark_page_dirty(vcpu->kvm, gfn); |
1109 | 1104 | ||
@@ -1580,11 +1575,13 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
1580 | u64 *spte, | 1575 | u64 *spte, |
1581 | const void *new) | 1576 | const void *new) |
1582 | { | 1577 | { |
1583 | if ((sp->role.level != PT_PAGE_TABLE_LEVEL) | 1578 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { |
1584 | && !vcpu->arch.update_pte.largepage) { | 1579 | if (!vcpu->arch.update_pte.largepage || |
1585 | ++vcpu->kvm->stat.mmu_pde_zapped; | 1580 | sp->role.glevels == PT32_ROOT_LEVEL) { |
1586 | return; | 1581 | ++vcpu->kvm->stat.mmu_pde_zapped; |
1587 | } | 1582 | return; |
1583 | } | ||
1584 | } | ||
1588 | 1585 | ||
1589 | ++vcpu->kvm->stat.mmu_pte_updated; | 1586 | ++vcpu->kvm->stat.mmu_pte_updated; |
1590 | if (sp->role.glevels == PT32_ROOT_LEVEL) | 1587 | if (sp->role.glevels == PT32_ROOT_LEVEL) |
@@ -1858,6 +1855,7 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu) | |||
1858 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.next, | 1855 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.next, |
1859 | struct kvm_mmu_page, link); | 1856 | struct kvm_mmu_page, link); |
1860 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1857 | kvm_mmu_zap_page(vcpu->kvm, sp); |
1858 | cond_resched(); | ||
1861 | } | 1859 | } |
1862 | free_page((unsigned long)vcpu->arch.mmu.pae_root); | 1860 | free_page((unsigned long)vcpu->arch.mmu.pae_root); |
1863 | } | 1861 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 156fe10288ae..934c7b619396 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -418,7 +418,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
418 | 418 | ||
419 | /* mmio */ | 419 | /* mmio */ |
420 | if (is_error_pfn(pfn)) { | 420 | if (is_error_pfn(pfn)) { |
421 | pgprintk("gfn %x is mmio\n", walker.gfn); | 421 | pgprintk("gfn %lx is mmio\n", walker.gfn); |
422 | kvm_release_pfn_clean(pfn); | 422 | kvm_release_pfn_clean(pfn); |
423 | return 1; | 423 | return 1; |
424 | } | 424 | } |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ab22615eee89..6b0d5fa5bab3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -688,7 +688,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
688 | delta = vcpu->arch.host_tsc - tsc_this; | 688 | delta = vcpu->arch.host_tsc - tsc_this; |
689 | svm->vmcb->control.tsc_offset += delta; | 689 | svm->vmcb->control.tsc_offset += delta; |
690 | vcpu->cpu = cpu; | 690 | vcpu->cpu = cpu; |
691 | kvm_migrate_apic_timer(vcpu); | 691 | kvm_migrate_timers(vcpu); |
692 | } | 692 | } |
693 | 693 | ||
694 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 694 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bfe4db11989c..540e95179074 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -566,7 +566,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
566 | load_transition_efer(vmx); | 566 | load_transition_efer(vmx); |
567 | } | 567 | } |
568 | 568 | ||
569 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | 569 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) |
570 | { | 570 | { |
571 | unsigned long flags; | 571 | unsigned long flags; |
572 | 572 | ||
@@ -596,6 +596,13 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) | |||
596 | reload_host_efer(vmx); | 596 | reload_host_efer(vmx); |
597 | } | 597 | } |
598 | 598 | ||
599 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | ||
600 | { | ||
601 | preempt_disable(); | ||
602 | __vmx_load_host_state(vmx); | ||
603 | preempt_enable(); | ||
604 | } | ||
605 | |||
599 | /* | 606 | /* |
600 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes | 607 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes |
601 | * vcpu mutex is already taken. | 608 | * vcpu mutex is already taken. |
@@ -608,7 +615,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
608 | 615 | ||
609 | if (vcpu->cpu != cpu) { | 616 | if (vcpu->cpu != cpu) { |
610 | vcpu_clear(vmx); | 617 | vcpu_clear(vmx); |
611 | kvm_migrate_apic_timer(vcpu); | 618 | kvm_migrate_timers(vcpu); |
612 | vpid_sync_vcpu_all(vmx); | 619 | vpid_sync_vcpu_all(vmx); |
613 | } | 620 | } |
614 | 621 | ||
@@ -654,7 +661,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
654 | 661 | ||
655 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | 662 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) |
656 | { | 663 | { |
657 | vmx_load_host_state(to_vmx(vcpu)); | 664 | __vmx_load_host_state(to_vmx(vcpu)); |
658 | } | 665 | } |
659 | 666 | ||
660 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | 667 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) |
@@ -884,11 +891,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
884 | switch (msr_index) { | 891 | switch (msr_index) { |
885 | #ifdef CONFIG_X86_64 | 892 | #ifdef CONFIG_X86_64 |
886 | case MSR_EFER: | 893 | case MSR_EFER: |
894 | vmx_load_host_state(vmx); | ||
887 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 895 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
888 | if (vmx->host_state.loaded) { | ||
889 | reload_host_efer(vmx); | ||
890 | load_transition_efer(vmx); | ||
891 | } | ||
892 | break; | 896 | break; |
893 | case MSR_FS_BASE: | 897 | case MSR_FS_BASE: |
894 | vmcs_writel(GUEST_FS_BASE, data); | 898 | vmcs_writel(GUEST_FS_BASE, data); |
@@ -910,11 +914,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
910 | guest_write_tsc(data); | 914 | guest_write_tsc(data); |
911 | break; | 915 | break; |
912 | default: | 916 | default: |
917 | vmx_load_host_state(vmx); | ||
913 | msr = find_msr_entry(vmx, msr_index); | 918 | msr = find_msr_entry(vmx, msr_index); |
914 | if (msr) { | 919 | if (msr) { |
915 | msr->data = data; | 920 | msr->data = data; |
916 | if (vmx->host_state.loaded) | ||
917 | load_msrs(vmx->guest_msrs, vmx->save_nmsrs); | ||
918 | break; | 921 | break; |
919 | } | 922 | } |
920 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 923 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
@@ -1036,6 +1039,7 @@ static void hardware_enable(void *garbage) | |||
1036 | static void hardware_disable(void *garbage) | 1039 | static void hardware_disable(void *garbage) |
1037 | { | 1040 | { |
1038 | asm volatile (ASM_VMX_VMXOFF : : : "cc"); | 1041 | asm volatile (ASM_VMX_VMXOFF : : : "cc"); |
1042 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | ||
1039 | } | 1043 | } |
1040 | 1044 | ||
1041 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | 1045 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 21338bdb28ff..63a77caa59f1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -492,8 +492,8 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |||
492 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | 492 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) |
493 | { | 493 | { |
494 | static int version; | 494 | static int version; |
495 | struct kvm_wall_clock wc; | 495 | struct pvclock_wall_clock wc; |
496 | struct timespec wc_ts; | 496 | struct timespec now, sys, boot; |
497 | 497 | ||
498 | if (!wall_clock) | 498 | if (!wall_clock) |
499 | return; | 499 | return; |
@@ -502,10 +502,19 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | |||
502 | 502 | ||
503 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | 503 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); |
504 | 504 | ||
505 | wc_ts = current_kernel_time(); | 505 | /* |
506 | wc.wc_sec = wc_ts.tv_sec; | 506 | * The guest calculates current wall clock time by adding |
507 | wc.wc_nsec = wc_ts.tv_nsec; | 507 | * system time (updated by kvm_write_guest_time below) to the |
508 | wc.wc_version = version; | 508 | * wall clock specified here. guest system time equals host |
509 | * system time for us, thus we must fill in host boot time here. | ||
510 | */ | ||
511 | now = current_kernel_time(); | ||
512 | ktime_get_ts(&sys); | ||
513 | boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys)); | ||
514 | |||
515 | wc.sec = boot.tv_sec; | ||
516 | wc.nsec = boot.tv_nsec; | ||
517 | wc.version = version; | ||
509 | 518 | ||
510 | kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); | 519 | kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); |
511 | 520 | ||
@@ -513,6 +522,45 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | |||
513 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | 522 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); |
514 | } | 523 | } |
515 | 524 | ||
525 | static uint32_t div_frac(uint32_t dividend, uint32_t divisor) | ||
526 | { | ||
527 | uint32_t quotient, remainder; | ||
528 | |||
529 | /* Don't try to replace with do_div(), this one calculates | ||
530 | * "(dividend << 32) / divisor" */ | ||
531 | __asm__ ( "divl %4" | ||
532 | : "=a" (quotient), "=d" (remainder) | ||
533 | : "0" (0), "1" (dividend), "r" (divisor) ); | ||
534 | return quotient; | ||
535 | } | ||
536 | |||
537 | static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock) | ||
538 | { | ||
539 | uint64_t nsecs = 1000000000LL; | ||
540 | int32_t shift = 0; | ||
541 | uint64_t tps64; | ||
542 | uint32_t tps32; | ||
543 | |||
544 | tps64 = tsc_khz * 1000LL; | ||
545 | while (tps64 > nsecs*2) { | ||
546 | tps64 >>= 1; | ||
547 | shift--; | ||
548 | } | ||
549 | |||
550 | tps32 = (uint32_t)tps64; | ||
551 | while (tps32 <= (uint32_t)nsecs) { | ||
552 | tps32 <<= 1; | ||
553 | shift++; | ||
554 | } | ||
555 | |||
556 | hv_clock->tsc_shift = shift; | ||
557 | hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32); | ||
558 | |||
559 | pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n", | ||
560 | __FUNCTION__, tsc_khz, hv_clock->tsc_shift, | ||
561 | hv_clock->tsc_to_system_mul); | ||
562 | } | ||
563 | |||
516 | static void kvm_write_guest_time(struct kvm_vcpu *v) | 564 | static void kvm_write_guest_time(struct kvm_vcpu *v) |
517 | { | 565 | { |
518 | struct timespec ts; | 566 | struct timespec ts; |
@@ -523,6 +571,11 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
523 | if ((!vcpu->time_page)) | 571 | if ((!vcpu->time_page)) |
524 | return; | 572 | return; |
525 | 573 | ||
574 | if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { | ||
575 | kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); | ||
576 | vcpu->hv_clock_tsc_khz = tsc_khz; | ||
577 | } | ||
578 | |||
526 | /* Keep irq disabled to prevent changes to the clock */ | 579 | /* Keep irq disabled to prevent changes to the clock */ |
527 | local_irq_save(flags); | 580 | local_irq_save(flags); |
528 | kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, | 581 | kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, |
@@ -537,14 +590,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
537 | /* | 590 | /* |
538 | * The interface expects us to write an even number signaling that the | 591 | * The interface expects us to write an even number signaling that the |
539 | * update is finished. Since the guest won't see the intermediate | 592 | * update is finished. Since the guest won't see the intermediate |
540 | * state, we just write "2" at the end | 593 | * state, we just increase by 2 at the end. |
541 | */ | 594 | */ |
542 | vcpu->hv_clock.version = 2; | 595 | vcpu->hv_clock.version += 2; |
543 | 596 | ||
544 | shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); | 597 | shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); |
545 | 598 | ||
546 | memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, | 599 | memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, |
547 | sizeof(vcpu->hv_clock)); | 600 | sizeof(vcpu->hv_clock)); |
548 | 601 | ||
549 | kunmap_atomic(shared_kaddr, KM_USER0); | 602 | kunmap_atomic(shared_kaddr, KM_USER0); |
550 | 603 | ||
@@ -599,10 +652,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
599 | /* ...but clean it before doing the actual write */ | 652 | /* ...but clean it before doing the actual write */ |
600 | vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); | 653 | vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); |
601 | 654 | ||
602 | vcpu->arch.hv_clock.tsc_to_system_mul = | ||
603 | clocksource_khz2mult(tsc_khz, 22); | ||
604 | vcpu->arch.hv_clock.tsc_shift = 22; | ||
605 | |||
606 | down_read(¤t->mm->mmap_sem); | 655 | down_read(¤t->mm->mmap_sem); |
607 | vcpu->arch.time_page = | 656 | vcpu->arch.time_page = |
608 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); | 657 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); |
@@ -2758,7 +2807,9 @@ again: | |||
2758 | 2807 | ||
2759 | if (vcpu->requests) { | 2808 | if (vcpu->requests) { |
2760 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) | 2809 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) |
2761 | __kvm_migrate_apic_timer(vcpu); | 2810 | __kvm_migrate_timers(vcpu); |
2811 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | ||
2812 | kvm_x86_ops->tlb_flush(vcpu); | ||
2762 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 2813 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, |
2763 | &vcpu->requests)) { | 2814 | &vcpu->requests)) { |
2764 | kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; | 2815 | kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; |
@@ -2772,6 +2823,7 @@ again: | |||
2772 | } | 2823 | } |
2773 | } | 2824 | } |
2774 | 2825 | ||
2826 | clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); | ||
2775 | kvm_inject_pending_timer_irqs(vcpu); | 2827 | kvm_inject_pending_timer_irqs(vcpu); |
2776 | 2828 | ||
2777 | preempt_disable(); | 2829 | preempt_disable(); |
@@ -2781,21 +2833,13 @@ again: | |||
2781 | 2833 | ||
2782 | local_irq_disable(); | 2834 | local_irq_disable(); |
2783 | 2835 | ||
2784 | if (need_resched()) { | 2836 | if (vcpu->requests || need_resched()) { |
2785 | local_irq_enable(); | 2837 | local_irq_enable(); |
2786 | preempt_enable(); | 2838 | preempt_enable(); |
2787 | r = 1; | 2839 | r = 1; |
2788 | goto out; | 2840 | goto out; |
2789 | } | 2841 | } |
2790 | 2842 | ||
2791 | if (vcpu->requests) | ||
2792 | if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) { | ||
2793 | local_irq_enable(); | ||
2794 | preempt_enable(); | ||
2795 | r = 1; | ||
2796 | goto out; | ||
2797 | } | ||
2798 | |||
2799 | if (signal_pending(current)) { | 2843 | if (signal_pending(current)) { |
2800 | local_irq_enable(); | 2844 | local_irq_enable(); |
2801 | preempt_enable(); | 2845 | preempt_enable(); |
@@ -2825,9 +2869,6 @@ again: | |||
2825 | 2869 | ||
2826 | kvm_guest_enter(); | 2870 | kvm_guest_enter(); |
2827 | 2871 | ||
2828 | if (vcpu->requests) | ||
2829 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | ||
2830 | kvm_x86_ops->tlb_flush(vcpu); | ||
2831 | 2872 | ||
2832 | KVMTRACE_0D(VMENTRY, vcpu, entryexit); | 2873 | KVMTRACE_0D(VMENTRY, vcpu, entryexit); |
2833 | kvm_x86_ops->run(vcpu, kvm_run); | 2874 | kvm_x86_ops->run(vcpu, kvm_run); |
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 8a96320ab071..932f216d890c 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -1727,7 +1727,8 @@ twobyte_insn: | |||
1727 | if (rc) | 1727 | if (rc) |
1728 | goto done; | 1728 | goto done; |
1729 | 1729 | ||
1730 | kvm_emulate_hypercall(ctxt->vcpu); | 1730 | /* Let the processor re-execute the fixed hypercall */ |
1731 | c->eip = ctxt->vcpu->arch.rip; | ||
1731 | /* Disable writeback. */ | 1732 | /* Disable writeback. */ |
1732 | c->dst.type = OP_NONE; | 1733 | c->dst.type = OP_NONE; |
1733 | break; | 1734 | break; |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index af65b2da3ba0..5c7e2fd52075 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -582,8 +582,9 @@ static void __init lguest_init_IRQ(void) | |||
582 | int vector = FIRST_EXTERNAL_VECTOR + i; | 582 | int vector = FIRST_EXTERNAL_VECTOR + i; |
583 | if (vector != SYSCALL_VECTOR) { | 583 | if (vector != SYSCALL_VECTOR) { |
584 | set_intr_gate(vector, interrupt[i]); | 584 | set_intr_gate(vector, interrupt[i]); |
585 | set_irq_chip_and_handler(i, &lguest_irq_controller, | 585 | set_irq_chip_and_handler_name(i, &lguest_irq_controller, |
586 | handle_level_irq); | 586 | handle_level_irq, |
587 | "level"); | ||
587 | } | 588 | } |
588 | } | 589 | } |
589 | /* This call is required to set up for 4k stacks, where we have | 590 | /* This call is required to set up for 4k stacks, where we have |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 70bebd310408..ee1c3f635157 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -217,19 +217,19 @@ ENTRY(copy_user_generic_unrolled) | |||
217 | /* table sorted by exception address */ | 217 | /* table sorted by exception address */ |
218 | .section __ex_table,"a" | 218 | .section __ex_table,"a" |
219 | .align 8 | 219 | .align 8 |
220 | .quad .Ls1,.Ls1e | 220 | .quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */ |
221 | .quad .Ls2,.Ls2e | 221 | .quad .Ls2,.Ls1e |
222 | .quad .Ls3,.Ls3e | 222 | .quad .Ls3,.Ls1e |
223 | .quad .Ls4,.Ls4e | 223 | .quad .Ls4,.Ls1e |
224 | .quad .Ld1,.Ls1e | 224 | .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */ |
225 | .quad .Ld2,.Ls2e | 225 | .quad .Ld2,.Ls2e |
226 | .quad .Ld3,.Ls3e | 226 | .quad .Ld3,.Ls3e |
227 | .quad .Ld4,.Ls4e | 227 | .quad .Ld4,.Ls4e |
228 | .quad .Ls5,.Ls5e | 228 | .quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */ |
229 | .quad .Ls6,.Ls6e | 229 | .quad .Ls6,.Ls5e |
230 | .quad .Ls7,.Ls7e | 230 | .quad .Ls7,.Ls5e |
231 | .quad .Ls8,.Ls8e | 231 | .quad .Ls8,.Ls5e |
232 | .quad .Ld5,.Ls5e | 232 | .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */ |
233 | .quad .Ld6,.Ls6e | 233 | .quad .Ld6,.Ls6e |
234 | .quad .Ld7,.Ls7e | 234 | .quad .Ld7,.Ls7e |
235 | .quad .Ld8,.Ls8e | 235 | .quad .Ld8,.Ls8e |
@@ -244,11 +244,8 @@ ENTRY(copy_user_generic_unrolled) | |||
244 | .quad .Le5,.Le_zero | 244 | .quad .Le5,.Le_zero |
245 | .previous | 245 | .previous |
246 | 246 | ||
247 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
248 | pessimistic side. this is gross. it would be better to fix the | ||
249 | interface. */ | ||
250 | /* eax: zero, ebx: 64 */ | 247 | /* eax: zero, ebx: 64 */ |
251 | .Ls1e: addl $8,%eax | 248 | .Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */ |
252 | .Ls2e: addl $8,%eax | 249 | .Ls2e: addl $8,%eax |
253 | .Ls3e: addl $8,%eax | 250 | .Ls3e: addl $8,%eax |
254 | .Ls4e: addl $8,%eax | 251 | .Ls4e: addl $8,%eax |
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 5196762b3b0e..9d3d1ab83763 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S | |||
@@ -145,19 +145,19 @@ ENTRY(__copy_user_nocache) | |||
145 | /* table sorted by exception address */ | 145 | /* table sorted by exception address */ |
146 | .section __ex_table,"a" | 146 | .section __ex_table,"a" |
147 | .align 8 | 147 | .align 8 |
148 | .quad .Ls1,.Ls1e | 148 | .quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */ |
149 | .quad .Ls2,.Ls2e | 149 | .quad .Ls2,.Ls1e |
150 | .quad .Ls3,.Ls3e | 150 | .quad .Ls3,.Ls1e |
151 | .quad .Ls4,.Ls4e | 151 | .quad .Ls4,.Ls1e |
152 | .quad .Ld1,.Ls1e | 152 | .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */ |
153 | .quad .Ld2,.Ls2e | 153 | .quad .Ld2,.Ls2e |
154 | .quad .Ld3,.Ls3e | 154 | .quad .Ld3,.Ls3e |
155 | .quad .Ld4,.Ls4e | 155 | .quad .Ld4,.Ls4e |
156 | .quad .Ls5,.Ls5e | 156 | .quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */ |
157 | .quad .Ls6,.Ls6e | 157 | .quad .Ls6,.Ls5e |
158 | .quad .Ls7,.Ls7e | 158 | .quad .Ls7,.Ls5e |
159 | .quad .Ls8,.Ls8e | 159 | .quad .Ls8,.Ls5e |
160 | .quad .Ld5,.Ls5e | 160 | .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */ |
161 | .quad .Ld6,.Ls6e | 161 | .quad .Ld6,.Ls6e |
162 | .quad .Ld7,.Ls7e | 162 | .quad .Ld7,.Ls7e |
163 | .quad .Ld8,.Ls8e | 163 | .quad .Ld8,.Ls8e |
@@ -172,11 +172,8 @@ ENTRY(__copy_user_nocache) | |||
172 | .quad .Le5,.Le_zero | 172 | .quad .Le5,.Le_zero |
173 | .previous | 173 | .previous |
174 | 174 | ||
175 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
176 | pessimistic side. this is gross. it would be better to fix the | ||
177 | interface. */ | ||
178 | /* eax: zero, ebx: 64 */ | 175 | /* eax: zero, ebx: 64 */ |
179 | .Ls1e: addl $8,%eax | 176 | .Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */ |
180 | .Ls2e: addl $8,%eax | 177 | .Ls2e: addl $8,%eax |
181 | .Ls3e: addl $8,%eax | 178 | .Ls3e: addl $8,%eax |
182 | .Ls4e: addl $8,%eax | 179 | .Ls4e: addl $8,%eax |
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index 4535e6d147ad..d710f2d167bb 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c | |||
@@ -44,13 +44,36 @@ static void delay_loop(unsigned long loops) | |||
44 | static void delay_tsc(unsigned long loops) | 44 | static void delay_tsc(unsigned long loops) |
45 | { | 45 | { |
46 | unsigned long bclock, now; | 46 | unsigned long bclock, now; |
47 | int cpu; | ||
47 | 48 | ||
48 | preempt_disable(); /* TSC's are per-cpu */ | 49 | preempt_disable(); |
50 | cpu = smp_processor_id(); | ||
49 | rdtscl(bclock); | 51 | rdtscl(bclock); |
50 | do { | 52 | for (;;) { |
51 | rep_nop(); | ||
52 | rdtscl(now); | 53 | rdtscl(now); |
53 | } while ((now-bclock) < loops); | 54 | if ((now - bclock) >= loops) |
55 | break; | ||
56 | |||
57 | /* Allow RT tasks to run */ | ||
58 | preempt_enable(); | ||
59 | rep_nop(); | ||
60 | preempt_disable(); | ||
61 | |||
62 | /* | ||
63 | * It is possible that we moved to another CPU, and | ||
64 | * since TSC's are per-cpu we need to calculate | ||
65 | * that. The delay must guarantee that we wait "at | ||
66 | * least" the amount of time. Being moved to another | ||
67 | * CPU could make the wait longer but we just need to | ||
68 | * make sure we waited long enough. Rebalance the | ||
69 | * counter for this CPU. | ||
70 | */ | ||
71 | if (unlikely(cpu != smp_processor_id())) { | ||
72 | loops -= (now - bclock); | ||
73 | cpu = smp_processor_id(); | ||
74 | rdtscl(bclock); | ||
75 | } | ||
76 | } | ||
54 | preempt_enable(); | 77 | preempt_enable(); |
55 | } | 78 | } |
56 | 79 | ||
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index bbc610518516..4c441be92641 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c | |||
@@ -31,14 +31,36 @@ int __devinit read_current_timer(unsigned long *timer_value) | |||
31 | void __delay(unsigned long loops) | 31 | void __delay(unsigned long loops) |
32 | { | 32 | { |
33 | unsigned bclock, now; | 33 | unsigned bclock, now; |
34 | int cpu; | ||
34 | 35 | ||
35 | preempt_disable(); /* TSC's are pre-cpu */ | 36 | preempt_disable(); |
37 | cpu = smp_processor_id(); | ||
36 | rdtscl(bclock); | 38 | rdtscl(bclock); |
37 | do { | 39 | for (;;) { |
38 | rep_nop(); | ||
39 | rdtscl(now); | 40 | rdtscl(now); |
41 | if ((now - bclock) >= loops) | ||
42 | break; | ||
43 | |||
44 | /* Allow RT tasks to run */ | ||
45 | preempt_enable(); | ||
46 | rep_nop(); | ||
47 | preempt_disable(); | ||
48 | |||
49 | /* | ||
50 | * It is possible that we moved to another CPU, and | ||
51 | * since TSC's are per-cpu we need to calculate | ||
52 | * that. The delay must guarantee that we wait "at | ||
53 | * least" the amount of time. Being moved to another | ||
54 | * CPU could make the wait longer but we just need to | ||
55 | * make sure we waited long enough. Rebalance the | ||
56 | * counter for this CPU. | ||
57 | */ | ||
58 | if (unlikely(cpu != smp_processor_id())) { | ||
59 | loops -= (now - bclock); | ||
60 | cpu = smp_processor_id(); | ||
61 | rdtscl(bclock); | ||
62 | } | ||
40 | } | 63 | } |
41 | while ((now-bclock) < loops); | ||
42 | preempt_enable(); | 64 | preempt_enable(); |
43 | } | 65 | } |
44 | EXPORT_SYMBOL(__delay); | 66 | EXPORT_SYMBOL(__delay); |
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 6e38d877ea77..c7b06feb139b 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
31 | #include <asm/desc.h> | 31 | #include <asm/desc.h> |
32 | #include <asm/user.h> | 32 | #include <asm/user.h> |
33 | #include <asm/i387.h> | ||
33 | 34 | ||
34 | #include "fpu_system.h" | 35 | #include "fpu_system.h" |
35 | #include "fpu_emu.h" | 36 | #include "fpu_emu.h" |
@@ -146,6 +147,13 @@ asmlinkage void math_emulate(long arg) | |||
146 | unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ | 147 | unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ |
147 | struct desc_struct code_descriptor; | 148 | struct desc_struct code_descriptor; |
148 | 149 | ||
150 | if (!used_math()) { | ||
151 | if (init_fpu(current)) { | ||
152 | do_group_exit(SIGKILL); | ||
153 | return; | ||
154 | } | ||
155 | } | ||
156 | |||
149 | #ifdef RE_ENTRANT_CHECKING | 157 | #ifdef RE_ENTRANT_CHECKING |
150 | if (emulating) { | 158 | if (emulating) { |
151 | printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); | 159 | printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); |
@@ -153,11 +161,6 @@ asmlinkage void math_emulate(long arg) | |||
153 | RE_ENTRANT_CHECK_ON; | 161 | RE_ENTRANT_CHECK_ON; |
154 | #endif /* RE_ENTRANT_CHECKING */ | 162 | #endif /* RE_ENTRANT_CHECKING */ |
155 | 163 | ||
156 | if (!used_math()) { | ||
157 | finit(); | ||
158 | set_used_math(); | ||
159 | } | ||
160 | |||
161 | SETUP_DATA_AREA(arg); | 164 | SETUP_DATA_AREA(arg); |
162 | 165 | ||
163 | FPU_ORIG_EIP = FPU_EIP; | 166 | FPU_ORIG_EIP = FPU_EIP; |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1f524df68b96..0c5dcee23bb1 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -498,6 +498,11 @@ static int vmalloc_fault(unsigned long address) | |||
498 | unsigned long pgd_paddr; | 498 | unsigned long pgd_paddr; |
499 | pmd_t *pmd_k; | 499 | pmd_t *pmd_k; |
500 | pte_t *pte_k; | 500 | pte_t *pte_k; |
501 | |||
502 | /* Make sure we are in vmalloc area */ | ||
503 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) | ||
504 | return -1; | ||
505 | |||
501 | /* | 506 | /* |
502 | * Synchronize this task's top level page-table | 507 | * Synchronize this task's top level page-table |
503 | * with the 'reference' page table. | 508 | * with the 'reference' page table. |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 32ba13b0f818..156e6d7b0e32 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -206,7 +206,7 @@ void __init cleanup_highmap(void) | |||
206 | pmd_t *last_pmd = pmd + PTRS_PER_PMD; | 206 | pmd_t *last_pmd = pmd + PTRS_PER_PMD; |
207 | 207 | ||
208 | for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { | 208 | for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { |
209 | if (!pmd_present(*pmd)) | 209 | if (pmd_none(*pmd)) |
210 | continue; | 210 | continue; |
211 | if (vaddr < (unsigned long) _text || vaddr > end) | 211 | if (vaddr < (unsigned long) _text || vaddr > end) |
212 | set_pmd(pmd, __pmd(0)); | 212 | set_pmd(pmd, __pmd(0)); |
@@ -506,7 +506,7 @@ early_param("memtest", parse_memtest); | |||
506 | 506 | ||
507 | static void __init early_memtest(unsigned long start, unsigned long end) | 507 | static void __init early_memtest(unsigned long start, unsigned long end) |
508 | { | 508 | { |
509 | unsigned long t_start, t_size; | 509 | u64 t_start, t_size; |
510 | unsigned pattern; | 510 | unsigned pattern; |
511 | 511 | ||
512 | if (!memtest_pattern) | 512 | if (!memtest_pattern) |
@@ -525,7 +525,7 @@ static void __init early_memtest(unsigned long start, unsigned long end) | |||
525 | if (t_start + t_size > end) | 525 | if (t_start + t_size > end) |
526 | t_size = end - t_start; | 526 | t_size = end - t_start; |
527 | 527 | ||
528 | printk(KERN_CONT "\n %016lx - %016lx pattern %d", | 528 | printk(KERN_CONT "\n %016llx - %016llx pattern %d", |
529 | t_start, t_start + t_size, pattern); | 529 | t_start, t_start + t_size, pattern); |
530 | 530 | ||
531 | memtest(t_start, t_size, pattern); | 531 | memtest(t_start, t_size, pattern); |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 71bb3159031a..2b2bb3f9b683 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -593,10 +593,11 @@ void __init early_iounmap(void *addr, unsigned long size) | |||
593 | unsigned long offset; | 593 | unsigned long offset; |
594 | unsigned int nrpages; | 594 | unsigned int nrpages; |
595 | enum fixed_addresses idx; | 595 | enum fixed_addresses idx; |
596 | unsigned int nesting; | 596 | int nesting; |
597 | 597 | ||
598 | nesting = --early_ioremap_nested; | 598 | nesting = --early_ioremap_nested; |
599 | WARN_ON(nesting < 0); | 599 | if (WARN_ON(nesting < 0)) |
600 | return; | ||
600 | 601 | ||
601 | if (early_ioremap_debug) { | 602 | if (early_ioremap_debug) { |
602 | printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, | 603 | printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index de3a99812450..06b7a1c90fb8 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -34,7 +34,7 @@ void __cpuinit pat_disable(char *reason) | |||
34 | printk(KERN_INFO "%s\n", reason); | 34 | printk(KERN_INFO "%s\n", reason); |
35 | } | 35 | } |
36 | 36 | ||
37 | static int nopat(char *str) | 37 | static int __init nopat(char *str) |
38 | { | 38 | { |
39 | pat_disable("PAT support disabled."); | 39 | pat_disable("PAT support disabled."); |
40 | return 0; | 40 | return 0; |
@@ -151,32 +151,33 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, | |||
151 | unsigned long pat_type; | 151 | unsigned long pat_type; |
152 | u8 mtrr_type; | 152 | u8 mtrr_type; |
153 | 153 | ||
154 | mtrr_type = mtrr_type_lookup(start, end); | ||
155 | if (mtrr_type == 0xFF) { /* MTRR not enabled */ | ||
156 | *ret_prot = prot; | ||
157 | return 0; | ||
158 | } | ||
159 | if (mtrr_type == 0xFE) { /* MTRR match error */ | ||
160 | *ret_prot = _PAGE_CACHE_UC; | ||
161 | return -1; | ||
162 | } | ||
163 | if (mtrr_type != MTRR_TYPE_UNCACHABLE && | ||
164 | mtrr_type != MTRR_TYPE_WRBACK && | ||
165 | mtrr_type != MTRR_TYPE_WRCOMB) { /* MTRR type unhandled */ | ||
166 | *ret_prot = _PAGE_CACHE_UC; | ||
167 | return -1; | ||
168 | } | ||
169 | |||
170 | pat_type = prot & _PAGE_CACHE_MASK; | 154 | pat_type = prot & _PAGE_CACHE_MASK; |
171 | prot &= (~_PAGE_CACHE_MASK); | 155 | prot &= (~_PAGE_CACHE_MASK); |
172 | 156 | ||
173 | /* Currently doing intersection by hand. Optimize it later. */ | 157 | /* |
158 | * We return the PAT request directly for types where PAT takes | ||
159 | * precedence with respect to MTRR and for UC_MINUS. | ||
160 | * Consistency checks with other PAT requests is done later | ||
161 | * while going through memtype list. | ||
162 | */ | ||
174 | if (pat_type == _PAGE_CACHE_WC) { | 163 | if (pat_type == _PAGE_CACHE_WC) { |
175 | *ret_prot = prot | _PAGE_CACHE_WC; | 164 | *ret_prot = prot | _PAGE_CACHE_WC; |
165 | return 0; | ||
176 | } else if (pat_type == _PAGE_CACHE_UC_MINUS) { | 166 | } else if (pat_type == _PAGE_CACHE_UC_MINUS) { |
177 | *ret_prot = prot | _PAGE_CACHE_UC_MINUS; | 167 | *ret_prot = prot | _PAGE_CACHE_UC_MINUS; |
178 | } else if (pat_type == _PAGE_CACHE_UC || | 168 | return 0; |
179 | mtrr_type == MTRR_TYPE_UNCACHABLE) { | 169 | } else if (pat_type == _PAGE_CACHE_UC) { |
170 | *ret_prot = prot | _PAGE_CACHE_UC; | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Look for MTRR hint to get the effective type in case where PAT | ||
176 | * request is for WB. | ||
177 | */ | ||
178 | mtrr_type = mtrr_type_lookup(start, end); | ||
179 | |||
180 | if (mtrr_type == MTRR_TYPE_UNCACHABLE) { | ||
180 | *ret_prot = prot | _PAGE_CACHE_UC; | 181 | *ret_prot = prot | _PAGE_CACHE_UC; |
181 | } else if (mtrr_type == MTRR_TYPE_WRCOMB) { | 182 | } else if (mtrr_type == MTRR_TYPE_WRCOMB) { |
182 | *ret_prot = prot | _PAGE_CACHE_WC; | 183 | *ret_prot = prot | _PAGE_CACHE_WC; |
@@ -233,14 +234,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
233 | 234 | ||
234 | if (req_type == -1) { | 235 | if (req_type == -1) { |
235 | /* | 236 | /* |
236 | * Special case where caller wants to inherit from mtrr or | 237 | * Call mtrr_lookup to get the type hint. This is an |
237 | * existing pat mapping, defaulting to UC_MINUS in case of | 238 | * optimization for /dev/mem mmap'ers into WB memory (BIOS |
238 | * no match. | 239 | * tools and ACPI tools). Use WB request for WB memory and use |
240 | * UC_MINUS otherwise. | ||
239 | */ | 241 | */ |
240 | u8 mtrr_type = mtrr_type_lookup(start, end); | 242 | u8 mtrr_type = mtrr_type_lookup(start, end); |
241 | if (mtrr_type == 0xFE) { /* MTRR match error */ | ||
242 | err = -1; | ||
243 | } | ||
244 | 243 | ||
245 | if (mtrr_type == MTRR_TYPE_WRBACK) { | 244 | if (mtrr_type == MTRR_TYPE_WRBACK) { |
246 | req_type = _PAGE_CACHE_WB; | 245 | req_type = _PAGE_CACHE_WB; |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 3890234e5b26..99649dccad28 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -97,36 +97,9 @@ static __init inline int srat_disabled(void) | |||
97 | return numa_off || acpi_numa < 0; | 97 | return numa_off || acpi_numa < 0; |
98 | } | 98 | } |
99 | 99 | ||
100 | /* | ||
101 | * A lot of BIOS fill in 10 (= no distance) everywhere. This messes | ||
102 | * up the NUMA heuristics which wants the local node to have a smaller | ||
103 | * distance than the others. | ||
104 | * Do some quick checks here and only use the SLIT if it passes. | ||
105 | */ | ||
106 | static __init int slit_valid(struct acpi_table_slit *slit) | ||
107 | { | ||
108 | int i, j; | ||
109 | int d = slit->locality_count; | ||
110 | for (i = 0; i < d; i++) { | ||
111 | for (j = 0; j < d; j++) { | ||
112 | u8 val = slit->entry[d*i + j]; | ||
113 | if (i == j) { | ||
114 | if (val != LOCAL_DISTANCE) | ||
115 | return 0; | ||
116 | } else if (val <= LOCAL_DISTANCE) | ||
117 | return 0; | ||
118 | } | ||
119 | } | ||
120 | return 1; | ||
121 | } | ||
122 | |||
123 | /* Callback for SLIT parsing */ | 100 | /* Callback for SLIT parsing */ |
124 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | 101 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) |
125 | { | 102 | { |
126 | if (!slit_valid(slit)) { | ||
127 | printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n"); | ||
128 | return; | ||
129 | } | ||
130 | acpi_slit = slit; | 103 | acpi_slit = slit; |
131 | } | 104 | } |
132 | 105 | ||
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 6e64aaf00d1d..940185ecaeda 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -328,18 +328,18 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { | |||
328 | #endif | 328 | #endif |
329 | { | 329 | { |
330 | .callback = set_bf_sort, | 330 | .callback = set_bf_sort, |
331 | .ident = "HP ProLiant DL385 G2", | 331 | .ident = "HP ProLiant DL360", |
332 | .matches = { | 332 | .matches = { |
333 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), | 333 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), |
334 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL385 G2"), | 334 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL360"), |
335 | }, | 335 | }, |
336 | }, | 336 | }, |
337 | { | 337 | { |
338 | .callback = set_bf_sort, | 338 | .callback = set_bf_sort, |
339 | .ident = "HP ProLiant DL585 G2", | 339 | .ident = "HP ProLiant DL380", |
340 | .matches = { | 340 | .matches = { |
341 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), | 341 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), |
342 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), | 342 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL380"), |
343 | }, | 343 | }, |
344 | }, | 344 | }, |
345 | {} | 345 | {} |
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index e70b9c57b88e..b821f4462d99 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c | |||
@@ -15,7 +15,8 @@ static __init int pci_access_init(void) | |||
15 | pci_mmcfg_early_init(); | 15 | pci_mmcfg_early_init(); |
16 | 16 | ||
17 | #ifdef CONFIG_PCI_OLPC | 17 | #ifdef CONFIG_PCI_OLPC |
18 | pci_olpc_init(); | 18 | if (!pci_olpc_init()) |
19 | return 0; /* skip additional checks if it's an XO */ | ||
19 | #endif | 20 | #endif |
20 | #ifdef CONFIG_PCI_BIOS | 21 | #ifdef CONFIG_PCI_BIOS |
21 | pci_pcbios_init(); | 22 | pci_pcbios_init(); |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 0908fca901bf..ca8df9c260bc 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -621,6 +621,13 @@ static __init int via_router_probe(struct irq_router *r, | |||
621 | */ | 621 | */ |
622 | device = PCI_DEVICE_ID_VIA_8235; | 622 | device = PCI_DEVICE_ID_VIA_8235; |
623 | break; | 623 | break; |
624 | case PCI_DEVICE_ID_VIA_8237: | ||
625 | /** | ||
626 | * Asus a7v600 bios wrongly reports 8237 | ||
627 | * as 586-compatible | ||
628 | */ | ||
629 | device = PCI_DEVICE_ID_VIA_8237; | ||
630 | break; | ||
624 | } | 631 | } |
625 | } | 632 | } |
626 | 633 | ||
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index 5e7636558c02..e11e9e803d5f 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c | |||
@@ -302,12 +302,13 @@ static struct pci_raw_ops pci_olpc_conf = { | |||
302 | .write = pci_olpc_write, | 302 | .write = pci_olpc_write, |
303 | }; | 303 | }; |
304 | 304 | ||
305 | void __init pci_olpc_init(void) | 305 | int __init pci_olpc_init(void) |
306 | { | 306 | { |
307 | if (!machine_is_olpc() || olpc_has_vsa()) | 307 | if (!machine_is_olpc() || olpc_has_vsa()) |
308 | return; | 308 | return -ENODEV; |
309 | 309 | ||
310 | printk(KERN_INFO "PCI: Using configuration type OLPC\n"); | 310 | printk(KERN_INFO "PCI: Using configuration type OLPC\n"); |
311 | raw_pci_ops = &pci_olpc_conf; | 311 | raw_pci_ops = &pci_olpc_conf; |
312 | is_lx = is_geode_lx(); | 312 | is_lx = is_geode_lx(); |
313 | return 0; | ||
313 | } | 314 | } |
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index f3972b12c60a..720c4c554534 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h | |||
@@ -101,7 +101,7 @@ extern struct pci_raw_ops pci_direct_conf1; | |||
101 | extern int pci_direct_probe(void); | 101 | extern int pci_direct_probe(void); |
102 | extern void pci_direct_init(int type); | 102 | extern void pci_direct_init(int type); |
103 | extern void pci_pcbios_init(void); | 103 | extern void pci_pcbios_init(void); |
104 | extern void pci_olpc_init(void); | 104 | extern int pci_olpc_init(void); |
105 | 105 | ||
106 | /* pci-mmconfig.c */ | 106 | /* pci-mmconfig.c */ |
107 | 107 | ||
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 2e641be2737e..6c388e593bc8 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -5,8 +5,9 @@ | |||
5 | config XEN | 5 | config XEN |
6 | bool "Xen guest support" | 6 | bool "Xen guest support" |
7 | select PARAVIRT | 7 | select PARAVIRT |
8 | select PARAVIRT_CLOCK | ||
8 | depends on X86_32 | 9 | depends on X86_32 |
9 | depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) | 10 | depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER) |
10 | help | 11 | help |
11 | This is the Linux Xen port. Enabling this will allow the | 12 | This is the Linux Xen port. Enabling this will allow the |
12 | kernel to boot in a paravirtualized environment under the | 13 | kernel to boot in a paravirtualized environment under the |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8a56e457d61..f09c1c69c37a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | |||
785 | static __init void xen_pagetable_setup_start(pgd_t *base) | 785 | static __init void xen_pagetable_setup_start(pgd_t *base) |
786 | { | 786 | { |
787 | pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; | 787 | pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; |
788 | int i; | ||
788 | 789 | ||
789 | /* special set_pte for pagetable initialization */ | 790 | /* special set_pte for pagetable initialization */ |
790 | pv_mmu_ops.set_pte = xen_set_pte_init; | 791 | pv_mmu_ops.set_pte = xen_set_pte_init; |
791 | 792 | ||
792 | init_mm.pgd = base; | 793 | init_mm.pgd = base; |
793 | /* | 794 | /* |
794 | * copy top-level of Xen-supplied pagetable into place. For | 795 | * copy top-level of Xen-supplied pagetable into place. This |
795 | * !PAE we can use this as-is, but for PAE it is a stand-in | 796 | * is a stand-in while we copy the pmd pages. |
796 | * while we copy the pmd pages. | ||
797 | */ | 797 | */ |
798 | memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); | 798 | memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); |
799 | 799 | ||
800 | if (PTRS_PER_PMD > 1) { | 800 | /* |
801 | int i; | 801 | * For PAE, need to allocate new pmds, rather than |
802 | /* | 802 | * share Xen's, since Xen doesn't like pmd's being |
803 | * For PAE, need to allocate new pmds, rather than | 803 | * shared between address spaces. |
804 | * share Xen's, since Xen doesn't like pmd's being | 804 | */ |
805 | * shared between address spaces. | 805 | for (i = 0; i < PTRS_PER_PGD; i++) { |
806 | */ | 806 | if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { |
807 | for (i = 0; i < PTRS_PER_PGD; i++) { | 807 | pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
808 | if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { | ||
809 | pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | ||
810 | 808 | ||
811 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), | 809 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), |
812 | PAGE_SIZE); | 810 | PAGE_SIZE); |
813 | 811 | ||
814 | make_lowmem_page_readonly(pmd); | 812 | make_lowmem_page_readonly(pmd); |
815 | 813 | ||
816 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); | 814 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); |
817 | } else | 815 | } else |
818 | pgd_clear(&base[i]); | 816 | pgd_clear(&base[i]); |
819 | } | ||
820 | } | 817 | } |
821 | 818 | ||
822 | /* make sure zero_page is mapped RO so we can use it in pagetables */ | 819 | /* make sure zero_page is mapped RO so we can use it in pagetables */ |
@@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) | |||
873 | 870 | ||
874 | /* Actually pin the pagetable down, but we can't set PG_pinned | 871 | /* Actually pin the pagetable down, but we can't set PG_pinned |
875 | yet because the page structures don't exist yet. */ | 872 | yet because the page structures don't exist yet. */ |
876 | { | 873 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); |
877 | unsigned level; | ||
878 | |||
879 | #ifdef CONFIG_X86_PAE | ||
880 | level = MMUEXT_PIN_L3_TABLE; | ||
881 | #else | ||
882 | level = MMUEXT_PIN_L2_TABLE; | ||
883 | #endif | ||
884 | |||
885 | pin_pagetable_pfn(level, PFN_DOWN(__pa(base))); | ||
886 | } | ||
887 | } | 874 | } |
888 | 875 | ||
889 | /* This is called once we have the cpu_possible_map */ | 876 | /* This is called once we have the cpu_possible_map */ |
@@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1093 | .make_pte = xen_make_pte, | 1080 | .make_pte = xen_make_pte, |
1094 | .make_pgd = xen_make_pgd, | 1081 | .make_pgd = xen_make_pgd, |
1095 | 1082 | ||
1096 | #ifdef CONFIG_X86_PAE | ||
1097 | .set_pte_atomic = xen_set_pte_atomic, | 1083 | .set_pte_atomic = xen_set_pte_atomic, |
1098 | .set_pte_present = xen_set_pte_at, | 1084 | .set_pte_present = xen_set_pte_at, |
1099 | .set_pud = xen_set_pud, | 1085 | .set_pud = xen_set_pud, |
@@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1102 | 1088 | ||
1103 | .make_pmd = xen_make_pmd, | 1089 | .make_pmd = xen_make_pmd, |
1104 | .pmd_val = xen_pmd_val, | 1090 | .pmd_val = xen_pmd_val, |
1105 | #endif /* PAE */ | ||
1106 | 1091 | ||
1107 | .activate_mm = xen_activate_mm, | 1092 | .activate_mm = xen_activate_mm, |
1108 | .dup_mmap = xen_dup_mmap, | 1093 | .dup_mmap = xen_dup_mmap, |
@@ -1228,6 +1213,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1228 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | 1213 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
1229 | pv_info.kernel_rpl = 0; | 1214 | pv_info.kernel_rpl = 0; |
1230 | 1215 | ||
1216 | /* Prevent unwanted bits from being set in PTEs. */ | ||
1217 | __supported_pte_mask &= ~_PAGE_GLOBAL; | ||
1218 | if (!is_initial_xendomain()) | ||
1219 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | ||
1220 | |||
1231 | /* set the limit of our address space */ | 1221 | /* set the limit of our address space */ |
1232 | xen_reserve_top(); | 1222 | xen_reserve_top(); |
1233 | 1223 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3525ef523a74..df40bf74ea75 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -179,50 +179,56 @@ out: | |||
179 | preempt_enable(); | 179 | preempt_enable(); |
180 | } | 180 | } |
181 | 181 | ||
182 | pteval_t xen_pte_val(pte_t pte) | 182 | /* Assume pteval_t is equivalent to all the other *val_t types. */ |
183 | static pteval_t pte_mfn_to_pfn(pteval_t val) | ||
184 | { | ||
185 | if (val & _PAGE_PRESENT) { | ||
186 | unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; | ||
187 | pteval_t flags = val & ~PTE_MASK; | ||
188 | val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; | ||
189 | } | ||
190 | |||
191 | return val; | ||
192 | } | ||
193 | |||
194 | static pteval_t pte_pfn_to_mfn(pteval_t val) | ||
183 | { | 195 | { |
184 | pteval_t ret = pte.pte; | 196 | if (val & _PAGE_PRESENT) { |
197 | unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; | ||
198 | pteval_t flags = val & ~PTE_MASK; | ||
199 | val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; | ||
200 | } | ||
185 | 201 | ||
186 | if (ret & _PAGE_PRESENT) | 202 | return val; |
187 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | 203 | } |
188 | 204 | ||
189 | return ret; | 205 | pteval_t xen_pte_val(pte_t pte) |
206 | { | ||
207 | return pte_mfn_to_pfn(pte.pte); | ||
190 | } | 208 | } |
191 | 209 | ||
192 | pgdval_t xen_pgd_val(pgd_t pgd) | 210 | pgdval_t xen_pgd_val(pgd_t pgd) |
193 | { | 211 | { |
194 | pgdval_t ret = pgd.pgd; | 212 | return pte_mfn_to_pfn(pgd.pgd); |
195 | if (ret & _PAGE_PRESENT) | ||
196 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | ||
197 | return ret; | ||
198 | } | 213 | } |
199 | 214 | ||
200 | pte_t xen_make_pte(pteval_t pte) | 215 | pte_t xen_make_pte(pteval_t pte) |
201 | { | 216 | { |
202 | if (pte & _PAGE_PRESENT) { | 217 | pte = pte_pfn_to_mfn(pte); |
203 | pte = phys_to_machine(XPADDR(pte)).maddr; | 218 | return native_make_pte(pte); |
204 | pte &= ~(_PAGE_PCD | _PAGE_PWT); | ||
205 | } | ||
206 | |||
207 | return (pte_t){ .pte = pte }; | ||
208 | } | 219 | } |
209 | 220 | ||
210 | pgd_t xen_make_pgd(pgdval_t pgd) | 221 | pgd_t xen_make_pgd(pgdval_t pgd) |
211 | { | 222 | { |
212 | if (pgd & _PAGE_PRESENT) | 223 | pgd = pte_pfn_to_mfn(pgd); |
213 | pgd = phys_to_machine(XPADDR(pgd)).maddr; | 224 | return native_make_pgd(pgd); |
214 | |||
215 | return (pgd_t){ pgd }; | ||
216 | } | 225 | } |
217 | 226 | ||
218 | pmdval_t xen_pmd_val(pmd_t pmd) | 227 | pmdval_t xen_pmd_val(pmd_t pmd) |
219 | { | 228 | { |
220 | pmdval_t ret = native_pmd_val(pmd); | 229 | return pte_mfn_to_pfn(pmd.pmd); |
221 | if (ret & _PAGE_PRESENT) | ||
222 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | ||
223 | return ret; | ||
224 | } | 230 | } |
225 | #ifdef CONFIG_X86_PAE | 231 | |
226 | void xen_set_pud(pud_t *ptr, pud_t val) | 232 | void xen_set_pud(pud_t *ptr, pud_t val) |
227 | { | 233 | { |
228 | struct multicall_space mcs; | 234 | struct multicall_space mcs; |
@@ -267,17 +273,9 @@ void xen_pmd_clear(pmd_t *pmdp) | |||
267 | 273 | ||
268 | pmd_t xen_make_pmd(pmdval_t pmd) | 274 | pmd_t xen_make_pmd(pmdval_t pmd) |
269 | { | 275 | { |
270 | if (pmd & _PAGE_PRESENT) | 276 | pmd = pte_pfn_to_mfn(pmd); |
271 | pmd = phys_to_machine(XPADDR(pmd)).maddr; | ||
272 | |||
273 | return native_make_pmd(pmd); | 277 | return native_make_pmd(pmd); |
274 | } | 278 | } |
275 | #else /* !PAE */ | ||
276 | void xen_set_pte(pte_t *ptep, pte_t pte) | ||
277 | { | ||
278 | *ptep = pte; | ||
279 | } | ||
280 | #endif /* CONFIG_X86_PAE */ | ||
281 | 279 | ||
282 | /* | 280 | /* |
283 | (Yet another) pagetable walker. This one is intended for pinning a | 281 | (Yet another) pagetable walker. This one is intended for pinning a |
@@ -430,8 +428,6 @@ static int pin_page(struct page *page, enum pt_level level) | |||
430 | read-only, and can be pinned. */ | 428 | read-only, and can be pinned. */ |
431 | void xen_pgd_pin(pgd_t *pgd) | 429 | void xen_pgd_pin(pgd_t *pgd) |
432 | { | 430 | { |
433 | unsigned level; | ||
434 | |||
435 | xen_mc_batch(); | 431 | xen_mc_batch(); |
436 | 432 | ||
437 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 433 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { |
@@ -441,14 +437,7 @@ void xen_pgd_pin(pgd_t *pgd) | |||
441 | xen_mc_batch(); | 437 | xen_mc_batch(); |
442 | } | 438 | } |
443 | 439 | ||
444 | #ifdef CONFIG_X86_PAE | 440 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
445 | level = MMUEXT_PIN_L3_TABLE; | ||
446 | #else | ||
447 | level = MMUEXT_PIN_L2_TABLE; | ||
448 | #endif | ||
449 | |||
450 | xen_do_pin(level, PFN_DOWN(__pa(pgd))); | ||
451 | |||
452 | xen_mc_issue(0); | 441 | xen_mc_issue(0); |
453 | } | 442 | } |
454 | 443 | ||
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index b5e189b1519d..5fe961caffd4 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm); | |||
37 | void xen_pgd_pin(pgd_t *pgd); | 37 | void xen_pgd_pin(pgd_t *pgd); |
38 | //void xen_pgd_unpin(pgd_t *pgd); | 38 | //void xen_pgd_unpin(pgd_t *pgd); |
39 | 39 | ||
40 | #ifdef CONFIG_X86_PAE | 40 | pteval_t xen_pte_val(pte_t); |
41 | unsigned long long xen_pte_val(pte_t); | 41 | pmdval_t xen_pmd_val(pmd_t); |
42 | unsigned long long xen_pmd_val(pmd_t); | 42 | pgdval_t xen_pgd_val(pgd_t); |
43 | unsigned long long xen_pgd_val(pgd_t); | ||
44 | 43 | ||
45 | pte_t xen_make_pte(unsigned long long); | 44 | pte_t xen_make_pte(pteval_t); |
46 | pmd_t xen_make_pmd(unsigned long long); | 45 | pmd_t xen_make_pmd(pmdval_t); |
47 | pgd_t xen_make_pgd(unsigned long long); | 46 | pgd_t xen_make_pgd(pgdval_t); |
48 | 47 | ||
49 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 48 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
50 | pte_t *ptep, pte_t pteval); | 49 | pte_t *ptep, pte_t pteval); |
@@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val); | |||
53 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 52 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
54 | void xen_pmd_clear(pmd_t *pmdp); | 53 | void xen_pmd_clear(pmd_t *pmdp); |
55 | 54 | ||
56 | |||
57 | #else | ||
58 | unsigned long xen_pte_val(pte_t); | ||
59 | unsigned long xen_pmd_val(pmd_t); | ||
60 | unsigned long xen_pgd_val(pgd_t); | ||
61 | |||
62 | pte_t xen_make_pte(unsigned long); | ||
63 | pmd_t xen_make_pmd(unsigned long); | ||
64 | pgd_t xen_make_pgd(unsigned long); | ||
65 | #endif | ||
66 | |||
67 | #endif /* _XEN_MMU_H */ | 55 | #endif /* _XEN_MMU_H */ |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5aa241..41e217503c96 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -12,7 +12,9 @@ | |||
12 | #include <linux/clocksource.h> | 12 | #include <linux/clocksource.h> |
13 | #include <linux/clockchips.h> | 13 | #include <linux/clockchips.h> |
14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
15 | #include <linux/math64.h> | ||
15 | 16 | ||
17 | #include <asm/pvclock.h> | ||
16 | #include <asm/xen/hypervisor.h> | 18 | #include <asm/xen/hypervisor.h> |
17 | #include <asm/xen/hypercall.h> | 19 | #include <asm/xen/hypercall.h> |
18 | 20 | ||
@@ -30,17 +32,6 @@ | |||
30 | 32 | ||
31 | static cycle_t xen_clocksource_read(void); | 33 | static cycle_t xen_clocksource_read(void); |
32 | 34 | ||
33 | /* These are perodically updated in shared_info, and then copied here. */ | ||
34 | struct shadow_time_info { | ||
35 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
36 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
37 | u32 tsc_to_nsec_mul; | ||
38 | int tsc_shift; | ||
39 | u32 version; | ||
40 | }; | ||
41 | |||
42 | static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); | ||
43 | |||
44 | /* runstate info updated by Xen */ | 35 | /* runstate info updated by Xen */ |
45 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); | 36 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); |
46 | 37 | ||
@@ -150,11 +141,7 @@ static void do_stolen_accounting(void) | |||
150 | if (stolen < 0) | 141 | if (stolen < 0) |
151 | stolen = 0; | 142 | stolen = 0; |
152 | 143 | ||
153 | ticks = 0; | 144 | ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); |
154 | while (stolen >= NS_PER_TICK) { | ||
155 | ticks++; | ||
156 | stolen -= NS_PER_TICK; | ||
157 | } | ||
158 | __get_cpu_var(residual_stolen) = stolen; | 145 | __get_cpu_var(residual_stolen) = stolen; |
159 | account_steal_time(NULL, ticks); | 146 | account_steal_time(NULL, ticks); |
160 | 147 | ||
@@ -166,11 +153,7 @@ static void do_stolen_accounting(void) | |||
166 | if (blocked < 0) | 153 | if (blocked < 0) |
167 | blocked = 0; | 154 | blocked = 0; |
168 | 155 | ||
169 | ticks = 0; | 156 | ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); |
170 | while (blocked >= NS_PER_TICK) { | ||
171 | ticks++; | ||
172 | blocked -= NS_PER_TICK; | ||
173 | } | ||
174 | __get_cpu_var(residual_blocked) = blocked; | 157 | __get_cpu_var(residual_blocked) = blocked; |
175 | account_steal_time(idle_task(smp_processor_id()), ticks); | 158 | account_steal_time(idle_task(smp_processor_id()), ticks); |
176 | } | 159 | } |
@@ -218,7 +201,7 @@ unsigned long long xen_sched_clock(void) | |||
218 | unsigned long xen_cpu_khz(void) | 201 | unsigned long xen_cpu_khz(void) |
219 | { | 202 | { |
220 | u64 xen_khz = 1000000ULL << 32; | 203 | u64 xen_khz = 1000000ULL << 32; |
221 | const struct vcpu_time_info *info = | 204 | const struct pvclock_vcpu_time_info *info = |
222 | &HYPERVISOR_shared_info->vcpu_info[0].time; | 205 | &HYPERVISOR_shared_info->vcpu_info[0].time; |
223 | 206 | ||
224 | do_div(xen_khz, info->tsc_to_system_mul); | 207 | do_div(xen_khz, info->tsc_to_system_mul); |
@@ -230,121 +213,26 @@ unsigned long xen_cpu_khz(void) | |||
230 | return xen_khz; | 213 | return xen_khz; |
231 | } | 214 | } |
232 | 215 | ||
233 | /* | ||
234 | * Reads a consistent set of time-base values from Xen, into a shadow data | ||
235 | * area. | ||
236 | */ | ||
237 | static unsigned get_time_values_from_xen(void) | ||
238 | { | ||
239 | struct vcpu_time_info *src; | ||
240 | struct shadow_time_info *dst; | ||
241 | |||
242 | /* src is shared memory with the hypervisor, so we need to | ||
243 | make sure we get a consistent snapshot, even in the face of | ||
244 | being preempted. */ | ||
245 | src = &__get_cpu_var(xen_vcpu)->time; | ||
246 | dst = &__get_cpu_var(shadow_time); | ||
247 | |||
248 | do { | ||
249 | dst->version = src->version; | ||
250 | rmb(); /* fetch version before data */ | ||
251 | dst->tsc_timestamp = src->tsc_timestamp; | ||
252 | dst->system_timestamp = src->system_time; | ||
253 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
254 | dst->tsc_shift = src->tsc_shift; | ||
255 | rmb(); /* test version after fetching data */ | ||
256 | } while ((src->version & 1) | (dst->version ^ src->version)); | ||
257 | |||
258 | return dst->version; | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | ||
263 | * yielding a 64-bit result. | ||
264 | */ | ||
265 | static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | ||
266 | { | ||
267 | u64 product; | ||
268 | #ifdef __i386__ | ||
269 | u32 tmp1, tmp2; | ||
270 | #endif | ||
271 | |||
272 | if (shift < 0) | ||
273 | delta >>= -shift; | ||
274 | else | ||
275 | delta <<= shift; | ||
276 | |||
277 | #ifdef __i386__ | ||
278 | __asm__ ( | ||
279 | "mul %5 ; " | ||
280 | "mov %4,%%eax ; " | ||
281 | "mov %%edx,%4 ; " | ||
282 | "mul %5 ; " | ||
283 | "xor %5,%5 ; " | ||
284 | "add %4,%%eax ; " | ||
285 | "adc %5,%%edx ; " | ||
286 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | ||
287 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | ||
288 | #elif __x86_64__ | ||
289 | __asm__ ( | ||
290 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | ||
291 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | ||
292 | #else | ||
293 | #error implement me! | ||
294 | #endif | ||
295 | |||
296 | return product; | ||
297 | } | ||
298 | |||
299 | static u64 get_nsec_offset(struct shadow_time_info *shadow) | ||
300 | { | ||
301 | u64 now, delta; | ||
302 | now = native_read_tsc(); | ||
303 | delta = now - shadow->tsc_timestamp; | ||
304 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); | ||
305 | } | ||
306 | |||
307 | static cycle_t xen_clocksource_read(void) | 216 | static cycle_t xen_clocksource_read(void) |
308 | { | 217 | { |
309 | struct shadow_time_info *shadow = &get_cpu_var(shadow_time); | 218 | struct pvclock_vcpu_time_info *src; |
310 | cycle_t ret; | 219 | cycle_t ret; |
311 | unsigned version; | ||
312 | |||
313 | do { | ||
314 | version = get_time_values_from_xen(); | ||
315 | barrier(); | ||
316 | ret = shadow->system_timestamp + get_nsec_offset(shadow); | ||
317 | barrier(); | ||
318 | } while (version != __get_cpu_var(xen_vcpu)->time.version); | ||
319 | |||
320 | put_cpu_var(shadow_time); | ||
321 | 220 | ||
221 | src = &get_cpu_var(xen_vcpu)->time; | ||
222 | ret = pvclock_clocksource_read(src); | ||
223 | put_cpu_var(xen_vcpu); | ||
322 | return ret; | 224 | return ret; |
323 | } | 225 | } |
324 | 226 | ||
325 | static void xen_read_wallclock(struct timespec *ts) | 227 | static void xen_read_wallclock(struct timespec *ts) |
326 | { | 228 | { |
327 | const struct shared_info *s = HYPERVISOR_shared_info; | 229 | struct shared_info *s = HYPERVISOR_shared_info; |
328 | u32 version; | 230 | struct pvclock_wall_clock *wall_clock = &(s->wc); |
329 | u64 delta; | 231 | struct pvclock_vcpu_time_info *vcpu_time; |
330 | struct timespec now; | ||
331 | |||
332 | /* get wallclock at system boot */ | ||
333 | do { | ||
334 | version = s->wc_version; | ||
335 | rmb(); /* fetch version before time */ | ||
336 | now.tv_sec = s->wc_sec; | ||
337 | now.tv_nsec = s->wc_nsec; | ||
338 | rmb(); /* fetch time before checking version */ | ||
339 | } while ((s->wc_version & 1) | (version ^ s->wc_version)); | ||
340 | 232 | ||
341 | delta = xen_clocksource_read(); /* time since system boot */ | 233 | vcpu_time = &get_cpu_var(xen_vcpu)->time; |
342 | delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; | 234 | pvclock_read_wallclock(wall_clock, vcpu_time, ts); |
343 | 235 | put_cpu_var(xen_vcpu); | |
344 | now.tv_nsec = do_div(delta, NSEC_PER_SEC); | ||
345 | now.tv_sec = delta; | ||
346 | |||
347 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | ||
348 | } | 236 | } |
349 | 237 | ||
350 | unsigned long xen_get_wallclock(void) | 238 | unsigned long xen_get_wallclock(void) |
@@ -352,7 +240,6 @@ unsigned long xen_get_wallclock(void) | |||
352 | struct timespec ts; | 240 | struct timespec ts; |
353 | 241 | ||
354 | xen_read_wallclock(&ts); | 242 | xen_read_wallclock(&ts); |
355 | |||
356 | return ts.tv_sec; | 243 | return ts.tv_sec; |
357 | } | 244 | } |
358 | 245 | ||
@@ -576,8 +463,6 @@ __init void xen_time_init(void) | |||
576 | { | 463 | { |
577 | int cpu = smp_processor_id(); | 464 | int cpu = smp_processor_id(); |
578 | 465 | ||
579 | get_time_values_from_xen(); | ||
580 | |||
581 | clocksource_register(&xen_clocksource); | 466 | clocksource_register(&xen_clocksource); |
582 | 467 | ||
583 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { | 468 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 288d587ce73c..6ec3b4f7719b 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -17,7 +17,7 @@ ENTRY(startup_xen) | |||
17 | 17 | ||
18 | __FINIT | 18 | __FINIT |
19 | 19 | ||
20 | .pushsection .bss.page_aligned | 20 | .pushsection .text |
21 | .align PAGE_SIZE_asm | 21 | .align PAGE_SIZE_asm |
22 | ENTRY(hypercall_page) | 22 | ENTRY(hypercall_page) |
23 | .skip 0x1000 | 23 | .skip 0x1000 |
@@ -30,11 +30,7 @@ ENTRY(hypercall_page) | |||
30 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) | 30 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) |
31 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) | 31 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) |
32 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 32 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") |
33 | #ifdef CONFIG_X86_PAE | ||
34 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 33 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
35 | #else | ||
36 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") | ||
37 | #endif | ||
38 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 34 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
39 | 35 | ||
40 | #endif /*CONFIG_XEN */ | 36 | #endif /*CONFIG_XEN */ |