aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig.debug24
-rw-r--r--arch/x86/include/asm/kvm.h7
-rw-r--r--arch/x86/include/asm/page.h1
-rw-r--r--arch/x86/include/asm/paravirt.h17
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c12
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/olpc.c2
-rw-r--r--arch/x86/kernel/paravirt.c26
-rw-r--r--arch/x86/kernel/ptrace.c16
-rw-r--r--arch/x86/kernel/traps.c10
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/irq.c7
-rw-r--r--arch/x86/kvm/irq.h1
-rw-r--r--arch/x86/kvm/lapic.c66
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c9
-rw-r--r--arch/x86/kvm/svm.c1
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c10
-rw-r--r--arch/x86/mm/ioremap.c19
-rw-r--r--arch/x86/mm/pageattr.c15
-rw-r--r--arch/x86/mm/pat.c83
22 files changed, 158 insertions, 179 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 10d6cc3fd052..e1983fa025d2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,28 +174,8 @@ config IOMMU_LEAK
174 Add a simple leak tracer to the IOMMU code. This is useful when you 174 Add a simple leak tracer to the IOMMU code. This is useful when you
175 are debugging a buggy device driver that leaks IOMMU mappings. 175 are debugging a buggy device driver that leaks IOMMU mappings.
176 176
177config MMIOTRACE 177config HAVE_MMIOTRACE_SUPPORT
178 bool "Memory mapped IO tracing" 178 def_bool y
179 depends on DEBUG_KERNEL && PCI
180 select TRACING
181 help
182 Mmiotrace traces Memory Mapped I/O access and is meant for
183 debugging and reverse engineering. It is called from the ioremap
184 implementation and works via page faults. Tracing is disabled by
185 default and can be enabled at run-time.
186
187 See Documentation/tracers/mmiotrace.txt.
188 If you are not helping to develop drivers, say N.
189
190config MMIOTRACE_TEST
191 tristate "Test module for mmiotrace"
192 depends on MMIOTRACE && m
193 help
194 This is a dumb module for testing mmiotrace. It is very dangerous
195 as it will write garbage to IO memory starting at a given address.
196 However, it should be safe to use on e.g. unused portion of VRAM.
197
198 Say N, unless you absolutely know what you are doing.
199 179
200# 180#
201# IO delay types: 181# IO delay types:
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index d2e3bf3608af..886c9402ec45 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,13 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/ioctl.h> 10#include <linux/ioctl.h>
11 11
12/* Select x86 specific features in <linux/kvm.h> */
13#define __KVM_HAVE_PIT
14#define __KVM_HAVE_IOAPIC
15#define __KVM_HAVE_DEVICE_ASSIGNMENT
16#define __KVM_HAVE_MSI
17#define __KVM_HAVE_USER_NMI
18
12/* Architectural interrupt line count. */ 19/* Architectural interrupt line count. */
13#define KVM_NR_INTERRUPTS 256 20#define KVM_NR_INTERRUPTS 256
14 21
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index e9873a2e8695..776579119a00 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t;
57typedef struct { pgprotval_t pgprot; } pgprot_t; 57typedef struct { pgprotval_t pgprot; } pgprot_t;
58 58
59extern int page_is_ram(unsigned long pagenr); 59extern int page_is_ram(unsigned long pagenr);
60extern int pagerange_is_ram(unsigned long start, unsigned long end);
61extern int devmem_is_allowed(unsigned long pagenr); 60extern int devmem_is_allowed(unsigned long pagenr);
62extern void map_devmem(unsigned long pfn, unsigned long size, 61extern void map_devmem(unsigned long pfn, unsigned long size,
63 pgprot_t vma_prot); 62 pgprot_t vma_prot);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c09a14127584..e299287e8e33 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void)
1352 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 1352 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
1353} 1353}
1354 1354
1355static inline void arch_flush_lazy_cpu_mode(void) 1355void arch_flush_lazy_cpu_mode(void);
1356{
1357 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
1358 arch_leave_lazy_cpu_mode();
1359 arch_enter_lazy_cpu_mode();
1360 }
1361}
1362
1363 1356
1364#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 1357#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
1365static inline void arch_enter_lazy_mmu_mode(void) 1358static inline void arch_enter_lazy_mmu_mode(void)
@@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
1372 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); 1365 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
1373} 1366}
1374 1367
1375static inline void arch_flush_lazy_mmu_mode(void) 1368void arch_flush_lazy_mmu_mode(void);
1376{
1377 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
1378 arch_leave_lazy_mmu_mode();
1379 arch_enter_lazy_mmu_mode();
1380 }
1381}
1382 1369
1383static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, 1370static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
1384 unsigned long phys, pgprot_t flags) 1371 unsigned long phys, pgprot_t flags)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index fb039cd345d8..6428aa17b40e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1157 data->cpu = pol->cpu; 1157 data->cpu = pol->cpu;
1158 data->currpstate = HW_PSTATE_INVALID; 1158 data->currpstate = HW_PSTATE_INVALID;
1159 1159
1160 rc = powernow_k8_cpu_init_acpi(data); 1160 if (powernow_k8_cpu_init_acpi(data)) {
1161 if (rc) {
1162 /* 1161 /*
1163 * Use the PSB BIOS structure. This is only availabe on 1162 * Use the PSB BIOS structure. This is only availabe on
1164 * an UP version, and is deprecated by AMD. 1163 * an UP version, and is deprecated by AMD.
@@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1176 "ACPI maintainers and complain to your BIOS " 1175 "ACPI maintainers and complain to your BIOS "
1177 "vendor.\n"); 1176 "vendor.\n");
1178#endif 1177#endif
1179 goto err_out; 1178 kfree(data);
1179 return -ENODEV;
1180 } 1180 }
1181 if (pol->cpu != 0) { 1181 if (pol->cpu != 0) {
1182 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " 1182 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1183 "CPU other than CPU0. Complain to your BIOS " 1183 "CPU other than CPU0. Complain to your BIOS "
1184 "vendor.\n"); 1184 "vendor.\n");
1185 goto err_out; 1185 kfree(data);
1186 return -ENODEV;
1186 } 1187 }
1187 rc = find_psb_table(data); 1188 rc = find_psb_table(data);
1188 if (rc) { 1189 if (rc) {
1189 goto err_out; 1190 kfree(data);
1191 return -ENODEV;
1190 } 1192 }
1191 /* Take a crude guess here. 1193 /* Take a crude guess here.
1192 * That guess was in microseconds, so multiply with 1000 */ 1194 * That guess was in microseconds, so multiply with 1000 */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 388254f69a2a..a00545fe5cdd 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
269 now = hpet_readl(HPET_COUNTER); 269 now = hpet_readl(HPET_COUNTER);
270 cmp = now + (unsigned long) delta; 270 cmp = now + (unsigned long) delta;
271 cfg = hpet_readl(HPET_Tn_CFG(timer)); 271 cfg = hpet_readl(HPET_Tn_CFG(timer));
272 /* Make sure we use edge triggered interrupts */
273 cfg &= ~HPET_TN_LEVEL;
272 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 274 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
273 HPET_TN_SETVAL | HPET_TN_32BIT; 275 HPET_TN_SETVAL | HPET_TN_32BIT;
274 hpet_writel(cfg, HPET_Tn_CFG(timer)); 276 hpet_writel(cfg, HPET_Tn_CFG(timer));
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 7a13fac63a1f..4006c522adc7 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
203static void __init platform_detect(void) 203static void __init platform_detect(void)
204{ 204{
205 /* stopgap until OFW support is added to the kernel */ 205 /* stopgap until OFW support is added to the kernel */
206 olpc_platform_info.boardrev = 0xc2; 206 olpc_platform_info.boardrev = olpc_board(0xc2);
207} 207}
208#endif 208#endif
209 209
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb608873..c6520a4e85d4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,6 +268,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
268 return __get_cpu_var(paravirt_lazy_mode); 268 return __get_cpu_var(paravirt_lazy_mode);
269} 269}
270 270
271void arch_flush_lazy_mmu_mode(void)
272{
273 preempt_disable();
274
275 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
276 WARN_ON(preempt_count() == 1);
277 arch_leave_lazy_mmu_mode();
278 arch_enter_lazy_mmu_mode();
279 }
280
281 preempt_enable();
282}
283
284void arch_flush_lazy_cpu_mode(void)
285{
286 preempt_disable();
287
288 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
289 WARN_ON(preempt_count() == 1);
290 arch_leave_lazy_cpu_mode();
291 arch_enter_lazy_cpu_mode();
292 }
293
294 preempt_enable();
295}
296
271struct pv_info pv_info = { 297struct pv_info pv_info = {
272 .name = "bare hardware", 298 .name = "bare hardware",
273 .paravirt_enabled = 0, 299 .paravirt_enabled = 0,
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb9..5a4c23d89892 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
810 810
811static void ptrace_bts_detach(struct task_struct *child) 811static void ptrace_bts_detach(struct task_struct *child)
812{ 812{
813 if (unlikely(child->bts)) { 813 /*
814 ds_release_bts(child->bts); 814 * Ptrace_detach() races with ptrace_untrace() in case
815 child->bts = NULL; 815 * the child dies and is reaped by another thread.
816 816 *
817 ptrace_bts_free_buffer(child); 817 * We only do the memory accounting at this point and
818 } 818 * leave the buffer deallocation and the bts tracer
819 * release to ptrace_bts_untrace() which will be called
820 * later on with tasklist_lock held.
821 */
822 release_locked_buffer(child->bts_buffer, child->bts_size);
819} 823}
820#else 824#else
821static inline void ptrace_bts_fork(struct task_struct *tsk) {} 825static inline void ptrace_bts_fork(struct task_struct *tsk) {}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7932338d7cb3..a9e7548e1790 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs)
99 local_irq_enable(); 99 local_irq_enable();
100} 100}
101 101
102static inline void conditional_cli(struct pt_regs *regs)
103{
104 if (regs->flags & X86_EFLAGS_IF)
105 local_irq_disable();
106}
107
102static inline void preempt_conditional_cli(struct pt_regs *regs) 108static inline void preempt_conditional_cli(struct pt_regs *regs)
103{ 109{
104 if (regs->flags & X86_EFLAGS_IF) 110 if (regs->flags & X86_EFLAGS_IF)
@@ -626,8 +632,10 @@ clear_dr7:
626 632
627#ifdef CONFIG_X86_32 633#ifdef CONFIG_X86_32
628debug_vm86: 634debug_vm86:
635 /* reenable preemption: handle_vm86_trap() might sleep */
636 dec_preempt_count();
629 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); 637 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
630 preempt_conditional_cli(regs); 638 conditional_cli(regs);
631 return; 639 return;
632#endif 640#endif
633 641
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index e665d1c623ca..72bd275a9b5c 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
207 hrtimer_add_expires_ns(&pt->timer, pt->period); 207 hrtimer_add_expires_ns(&pt->timer, pt->period);
208 pt->scheduled = hrtimer_get_expires_ns(&pt->timer); 208 pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
209 if (pt->period) 209 if (pt->period)
210 ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); 210 ps->channels[0].count_load_time = ktime_get();
211 211
212 return (pt->period == 0 ? 0 : 1); 212 return (pt->period == 0 ? 0 : 1);
213} 213}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index c019b8edcdb7..cf17ed52f6fb 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
87} 87}
88EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); 88EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
89 89
90void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
91{
92 kvm_apic_timer_intr_post(vcpu, vec);
93 /* TODO: PIT, RTC etc. */
94}
95EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
96
97void __kvm_migrate_timers(struct kvm_vcpu *vcpu) 90void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
98{ 91{
99 __kvm_migrate_apic_timer(vcpu); 92 __kvm_migrate_apic_timer(vcpu);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2bf32a03ceec..82579ee538d0 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
89 89
90void kvm_pic_reset(struct kvm_kpic_state *s); 90void kvm_pic_reset(struct kvm_kpic_state *s);
91 91
92void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
93void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 92void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
94void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 93void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
95void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); 94void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afac68c0815c..f0b67f2cdd69 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -35,6 +35,12 @@
35#include "kvm_cache_regs.h" 35#include "kvm_cache_regs.h"
36#include "irq.h" 36#include "irq.h"
37 37
38#ifndef CONFIG_X86_64
39#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
40#else
41#define mod_64(x, y) ((x) % (y))
42#endif
43
38#define PRId64 "d" 44#define PRId64 "d"
39#define PRIx64 "llx" 45#define PRIx64 "llx"
40#define PRIu64 "u" 46#define PRIu64 "u"
@@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic)
511 517
512static u32 apic_get_tmcct(struct kvm_lapic *apic) 518static u32 apic_get_tmcct(struct kvm_lapic *apic)
513{ 519{
514 u64 counter_passed; 520 ktime_t remaining;
515 ktime_t passed, now; 521 s64 ns;
516 u32 tmcct; 522 u32 tmcct;
517 523
518 ASSERT(apic != NULL); 524 ASSERT(apic != NULL);
519 525
520 now = apic->timer.dev.base->get_time();
521 tmcct = apic_get_reg(apic, APIC_TMICT);
522
523 /* if initial count is 0, current count should also be 0 */ 526 /* if initial count is 0, current count should also be 0 */
524 if (tmcct == 0) 527 if (apic_get_reg(apic, APIC_TMICT) == 0)
525 return 0; 528 return 0;
526 529
527 if (unlikely(ktime_to_ns(now) <= 530 remaining = hrtimer_expires_remaining(&apic->timer.dev);
528 ktime_to_ns(apic->timer.last_update))) { 531 if (ktime_to_ns(remaining) < 0)
529 /* Wrap around */ 532 remaining = ktime_set(0, 0);
530 passed = ktime_add(( { 533
531 (ktime_t) { 534 ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
532 .tv64 = KTIME_MAX - 535 tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
533 (apic->timer.last_update).tv64}; }
534 ), now);
535 apic_debug("time elapsed\n");
536 } else
537 passed = ktime_sub(now, apic->timer.last_update);
538
539 counter_passed = div64_u64(ktime_to_ns(passed),
540 (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
541
542 if (counter_passed > tmcct) {
543 if (unlikely(!apic_lvtt_period(apic))) {
544 /* one-shot timers stick at 0 until reset */
545 tmcct = 0;
546 } else {
547 /*
548 * periodic timers reset to APIC_TMICT when they
549 * hit 0. The while loop simulates this happening N
550 * times. (counter_passed %= tmcct) would also work,
551 * but might be slower or not work on 32-bit??
552 */
553 while (counter_passed > tmcct)
554 counter_passed -= tmcct;
555 tmcct -= counter_passed;
556 }
557 } else {
558 tmcct -= counter_passed;
559 }
560 536
561 return tmcct; 537 return tmcct;
562} 538}
@@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
653{ 629{
654 ktime_t now = apic->timer.dev.base->get_time(); 630 ktime_t now = apic->timer.dev.base->get_time();
655 631
656 apic->timer.last_update = now;
657
658 apic->timer.period = apic_get_reg(apic, APIC_TMICT) * 632 apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
659 APIC_BUS_CYCLE_NS * apic->timer.divide_count; 633 APIC_BUS_CYCLE_NS * apic->timer.divide_count;
660 atomic_set(&apic->timer.pending, 0); 634 atomic_set(&apic->timer.pending, 0);
@@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1110 } 1084 }
1111} 1085}
1112 1086
1113void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
1114{
1115 struct kvm_lapic *apic = vcpu->arch.apic;
1116
1117 if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
1118 apic->timer.last_update = ktime_add_ns(
1119 apic->timer.last_update,
1120 apic->timer.period);
1121}
1122
1123int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1087int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
1124{ 1088{
1125 int vector = kvm_apic_has_interrupt(vcpu); 1089 int vector = kvm_apic_has_interrupt(vcpu);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 81858881287e..45ab6ee71209 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,7 +12,6 @@ struct kvm_lapic {
12 atomic_t pending; 12 atomic_t pending;
13 s64 period; /* unit: ns */ 13 s64 period; /* unit: ns */
14 u32 divide_count; 14 u32 divide_count;
15 ktime_t last_update;
16 struct hrtimer dev; 15 struct hrtimer dev;
17 } timer; 16 } timer;
18 struct kvm_vcpu *vcpu; 17 struct kvm_vcpu *vcpu;
@@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
42void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); 41void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
43int kvm_lapic_enabled(struct kvm_vcpu *vcpu); 42int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
44int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); 43int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
45void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
46 44
47void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); 45void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
48void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); 46void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 83f11c7474a1..2d4477c71473 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1698 if (largepage) 1698 if (largepage)
1699 spte |= PT_PAGE_SIZE_MASK; 1699 spte |= PT_PAGE_SIZE_MASK;
1700 if (mt_mask) { 1700 if (mt_mask) {
1701 mt_mask = get_memory_type(vcpu, gfn) << 1701 if (!kvm_is_mmio_pfn(pfn)) {
1702 kvm_x86_ops->get_mt_mask_shift(); 1702 mt_mask = get_memory_type(vcpu, gfn) <<
1703 kvm_x86_ops->get_mt_mask_shift();
1704 mt_mask |= VMX_EPT_IGMT_BIT;
1705 } else
1706 mt_mask = MTRR_TYPE_UNCACHABLE <<
1707 kvm_x86_ops->get_mt_mask_shift();
1703 spte |= mt_mask; 1708 spte |= mt_mask;
1704 } 1709 }
1705 1710
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1452851ae258..a9e769e4e251 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1600 /* Okay, we can deliver the interrupt: grab it and update PIC state. */ 1600 /* Okay, we can deliver the interrupt: grab it and update PIC state. */
1601 intr_vector = kvm_cpu_get_interrupt(vcpu); 1601 intr_vector = kvm_cpu_get_interrupt(vcpu);
1602 svm_inject_irq(svm, intr_vector); 1602 svm_inject_irq(svm, intr_vector);
1603 kvm_timer_intr_post(vcpu, intr_vector);
1604out: 1603out:
1605 update_cr8_intercept(vcpu); 1604 update_cr8_intercept(vcpu);
1606} 1605}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6259d7467648..7611af576829 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
903 data = vmcs_readl(GUEST_SYSENTER_ESP); 903 data = vmcs_readl(GUEST_SYSENTER_ESP);
904 break; 904 break;
905 default: 905 default:
906 vmx_load_host_state(to_vmx(vcpu));
906 msr = find_msr_entry(to_vmx(vcpu), msr_index); 907 msr = find_msr_entry(to_vmx(vcpu), msr_index);
907 if (msr) { 908 if (msr) {
908 data = msr->data; 909 data = msr->data;
@@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3285 } 3286 }
3286 if (vcpu->arch.interrupt.pending) { 3287 if (vcpu->arch.interrupt.pending) {
3287 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 3288 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
3288 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
3289 if (kvm_cpu_has_interrupt(vcpu)) 3289 if (kvm_cpu_has_interrupt(vcpu))
3290 enable_irq_window(vcpu); 3290 enable_irq_window(vcpu);
3291 } 3291 }
@@ -3687,8 +3687,7 @@ static int __init vmx_init(void)
3687 if (vm_need_ept()) { 3687 if (vm_need_ept()) {
3688 bypass_guest_pf = 0; 3688 bypass_guest_pf = 0;
3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3690 VMX_EPT_WRITABLE_MASK | 3690 VMX_EPT_WRITABLE_MASK);
3691 VMX_EPT_IGMT_BIT);
3692 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3691 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3693 VMX_EPT_EXECUTABLE_MASK, 3692 VMX_EPT_EXECUTABLE_MASK,
3694 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); 3693 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cc17546a2406..758b7a155ae9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext)
967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
968 case KVM_CAP_SET_TSS_ADDR: 968 case KVM_CAP_SET_TSS_ADDR:
969 case KVM_CAP_EXT_CPUID: 969 case KVM_CAP_EXT_CPUID:
970 case KVM_CAP_CLOCKSOURCE:
971 case KVM_CAP_PIT: 970 case KVM_CAP_PIT:
972 case KVM_CAP_NOP_IO_DELAY: 971 case KVM_CAP_NOP_IO_DELAY:
973 case KVM_CAP_MP_STATE: 972 case KVM_CAP_MP_STATE:
@@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext)
992 case KVM_CAP_IOMMU: 991 case KVM_CAP_IOMMU:
993 r = iommu_found(); 992 r = iommu_found();
994 break; 993 break;
994 case KVM_CAP_CLOCKSOURCE:
995 r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
996 break;
995 default: 997 default:
996 r = 0; 998 r = 0;
997 break; 999 break;
@@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
4127 4129
4128} 4130}
4129 4131
4130void kvm_arch_destroy_vm(struct kvm *kvm) 4132void kvm_arch_sync_events(struct kvm *kvm)
4131{ 4133{
4132 kvm_free_all_assigned_devices(kvm); 4134 kvm_free_all_assigned_devices(kvm);
4135}
4136
4137void kvm_arch_destroy_vm(struct kvm *kvm)
4138{
4133 kvm_iommu_unmap_guest(kvm); 4139 kvm_iommu_unmap_guest(kvm);
4134 kvm_free_pit(kvm); 4140 kvm_free_pit(kvm);
4135 kfree(kvm->arch.vpic); 4141 kfree(kvm->arch.vpic);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index af750ab973b6..f45d5e29a72e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr)
134 return 0; 134 return 0;
135} 135}
136 136
137int pagerange_is_ram(unsigned long start, unsigned long end)
138{
139 int ram_page = 0, not_rampage = 0;
140 unsigned long page_nr;
141
142 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
143 ++page_nr) {
144 if (page_is_ram(page_nr))
145 ram_page = 1;
146 else
147 not_rampage = 1;
148
149 if (ram_page == not_rampage)
150 return -1;
151 }
152
153 return ram_page;
154}
155
156/* 137/*
157 * Fix up the linear direct mapping of the kernel to avoid cache attribute 138 * Fix up the linear direct mapping of the kernel to avoid cache attribute
158 * conflicts. 139 * conflicts.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 84ba74820ad6..8ca0d8566fc8 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -575,7 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
575 address = cpa->vaddr[cpa->curpage]; 575 address = cpa->vaddr[cpa->curpage];
576 else 576 else
577 address = *cpa->vaddr; 577 address = *cpa->vaddr;
578
579repeat: 578repeat:
580 kpte = lookup_address(address, &level); 579 kpte = lookup_address(address, &level);
581 if (!kpte) 580 if (!kpte)
@@ -812,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
812 811
813 vm_unmap_aliases(); 812 vm_unmap_aliases();
814 813
814 /*
815 * If we're called with lazy mmu updates enabled, the
816 * in-memory pte state may be stale. Flush pending updates to
817 * bring them up to date.
818 */
819 arch_flush_lazy_mmu_mode();
820
815 cpa.vaddr = addr; 821 cpa.vaddr = addr;
816 cpa.numpages = numpages; 822 cpa.numpages = numpages;
817 cpa.mask_set = mask_set; 823 cpa.mask_set = mask_set;
@@ -854,6 +860,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
854 } else 860 } else
855 cpa_flush_all(cache); 861 cpa_flush_all(cache);
856 862
863 /*
864 * If we've been called with lazy mmu updates enabled, then
865 * make sure that everything gets flushed out before we
866 * return.
867 */
868 arch_flush_lazy_mmu_mode();
869
857out: 870out:
858 return ret; 871 return ret;
859} 872}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7b61036427df..aebbf67a79d0 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
211static struct memtype *cached_entry; 211static struct memtype *cached_entry;
212static u64 cached_start; 212static u64 cached_start;
213 213
214static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
215{
216 int ram_page = 0, not_rampage = 0;
217 unsigned long page_nr;
218
219 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
220 ++page_nr) {
221 /*
222 * For legacy reasons, physical address range in the legacy ISA
223 * region is tracked as non-RAM. This will allow users of
224 * /dev/mem to map portions of legacy ISA region, even when
225 * some of those portions are listed(or not even listed) with
226 * different e820 types(RAM/reserved/..)
227 */
228 if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
229 page_is_ram(page_nr))
230 ram_page = 1;
231 else
232 not_rampage = 1;
233
234 if (ram_page == not_rampage)
235 return -1;
236 }
237
238 return ram_page;
239}
240
214/* 241/*
215 * For RAM pages, mark the pages as non WB memory type using 242 * For RAM pages, mark the pages as non WB memory type using
216 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or 243 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
@@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
336 if (new_type) 363 if (new_type)
337 *new_type = actual_type; 364 *new_type = actual_type;
338 365
339 /* 366 is_range_ram = pat_pagerange_is_ram(start, end);
340 * For legacy reasons, some parts of the physical address range in the 367 if (is_range_ram == 1)
341 * legacy 1MB region is treated as non-RAM (even when listed as RAM in 368 return reserve_ram_pages_type(start, end, req_type,
342 * the e820 tables). So we will track the memory attributes of this 369 new_type);
343 * legacy 1MB region using the linear memtype_list always. 370 else if (is_range_ram < 0)
344 */ 371 return -EINVAL;
345 if (end >= ISA_END_ADDRESS) {
346 is_range_ram = pagerange_is_ram(start, end);
347 if (is_range_ram == 1)
348 return reserve_ram_pages_type(start, end, req_type,
349 new_type);
350 else if (is_range_ram < 0)
351 return -EINVAL;
352 }
353 372
354 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 373 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
355 if (!new) 374 if (!new)
@@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end)
446 if (is_ISA_range(start, end - 1)) 465 if (is_ISA_range(start, end - 1))
447 return 0; 466 return 0;
448 467
449 /* 468 is_range_ram = pat_pagerange_is_ram(start, end);
450 * For legacy reasons, some parts of the physical address range in the 469 if (is_range_ram == 1)
451 * legacy 1MB region is treated as non-RAM (even when listed as RAM in 470 return free_ram_pages_type(start, end);
452 * the e820 tables). So we will track the memory attributes of this 471 else if (is_range_ram < 0)
453 * legacy 1MB region using the linear memtype_list always. 472 return -EINVAL;
454 */
455 if (end >= ISA_END_ADDRESS) {
456 is_range_ram = pagerange_is_ram(start, end);
457 if (is_range_ram == 1)
458 return free_ram_pages_type(start, end);
459 else if (is_range_ram < 0)
460 return -EINVAL;
461 }
462 473
463 spin_lock(&memtype_lock); 474 spin_lock(&memtype_lock);
464 list_for_each_entry(entry, &memtype_list, nd) { 475 list_for_each_entry(entry, &memtype_list, nd) {
@@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
626 unsigned long flags; 637 unsigned long flags;
627 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); 638 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
628 639
629 is_ram = pagerange_is_ram(paddr, paddr + size); 640 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
630 641
631 if (is_ram != 0) { 642 /*
632 /* 643 * reserve_pfn_range() doesn't support RAM pages.
633 * For mapping RAM pages, drivers need to call 644 */
634 * set_memory_[uc|wc|wb] directly, for reserve and free, before 645 if (is_ram != 0)
635 * setting up the PTE. 646 return -EINVAL;
636 */
637 WARN_ON_ONCE(1);
638 return 0;
639 }
640 647
641 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 648 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
642 if (ret) 649 if (ret)
@@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
693{ 700{
694 int is_ram; 701 int is_ram;
695 702
696 is_ram = pagerange_is_ram(paddr, paddr + size); 703 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
697 if (is_ram == 0) 704 if (is_ram == 0)
698 free_memtype(paddr, paddr + size); 705 free_memtype(paddr, paddr + size);
699} 706}