aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-02-19 03:00:35 -0500
committerIngo Molnar <mingo@elte.hu>2009-02-19 03:00:35 -0500
commit72c26c9a26ea7f2f3d14f162c2ebb07805f724ea (patch)
treebf1b4bc0b69f96c79474f9edb9cf0e811c95f2dc /arch/x86
parent37bd824a35a60abc73e5fa8816bd5f50c913d69b (diff)
parentba95fd47d177d46743ad94055908d22840370e06 (diff)
Merge branch 'linus' into tracing/blktrace
Conflicts: block/blktrace.c Semantic merge: kernel/trace/blktrace.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm.h7
-rw-r--r--arch/x86/include/asm/mmzone_32.h2
-rw-r--r--arch/x86/include/asm/mmzone_64.h2
-rw-r--r--arch/x86/include/asm/page.h1
-rw-r--r--arch/x86/include/asm/paravirt.h17
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c12
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/olpc.c2
-rw-r--r--arch/x86/kernel/paravirt.c26
-rw-r--r--arch/x86/kernel/ptrace.c16
-rw-r--r--arch/x86/kernel/traps.c10
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/irq.c7
-rw-r--r--arch/x86/kvm/irq.h1
-rw-r--r--arch/x86/kvm/lapic.c66
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c9
-rw-r--r--arch/x86/kvm/svm.c1
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c10
-rw-r--r--arch/x86/mm/ioremap.c19
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/pageattr.c15
-rw-r--r--arch/x86/mm/pat.c83
24 files changed, 157 insertions, 162 deletions
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index d2e3bf3608af..886c9402ec45 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,13 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/ioctl.h> 10#include <linux/ioctl.h>
11 11
12/* Select x86 specific features in <linux/kvm.h> */
13#define __KVM_HAVE_PIT
14#define __KVM_HAVE_IOAPIC
15#define __KVM_HAVE_DEVICE_ASSIGNMENT
16#define __KVM_HAVE_MSI
17#define __KVM_HAVE_USER_NMI
18
12/* Architectural interrupt line count. */ 19/* Architectural interrupt line count. */
13#define KVM_NR_INTERRUPTS 256 20#define KVM_NR_INTERRUPTS 256
14 21
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 07f1af494ca5..105fb90a0635 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void)
32 get_memcfg_numa_flat(); 32 get_memcfg_numa_flat();
33} 33}
34 34
35extern int early_pfn_to_nid(unsigned long pfn);
36
37extern void resume_map_numa_kva(pgd_t *pgd); 35extern void resume_map_numa_kva(pgd_t *pgd);
38 36
39#else /* !CONFIG_NUMA */ 37#else /* !CONFIG_NUMA */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index a5b3817d4b9e..a29f48c2a322 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ 40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
41 NODE_DATA(nid)->node_spanned_pages) 41 NODE_DATA(nid)->node_spanned_pages)
42 42
43extern int early_pfn_to_nid(unsigned long pfn);
44
45#ifdef CONFIG_NUMA_EMU 43#ifdef CONFIG_NUMA_EMU
46#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) 44#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024)
47#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) 45#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index e9873a2e8695..776579119a00 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t;
57typedef struct { pgprotval_t pgprot; } pgprot_t; 57typedef struct { pgprotval_t pgprot; } pgprot_t;
58 58
59extern int page_is_ram(unsigned long pagenr); 59extern int page_is_ram(unsigned long pagenr);
60extern int pagerange_is_ram(unsigned long start, unsigned long end);
61extern int devmem_is_allowed(unsigned long pagenr); 60extern int devmem_is_allowed(unsigned long pagenr);
62extern void map_devmem(unsigned long pfn, unsigned long size, 61extern void map_devmem(unsigned long pfn, unsigned long size,
63 pgprot_t vma_prot); 62 pgprot_t vma_prot);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c09a14127584..e299287e8e33 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void)
1352 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 1352 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
1353} 1353}
1354 1354
1355static inline void arch_flush_lazy_cpu_mode(void) 1355void arch_flush_lazy_cpu_mode(void);
1356{
1357 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
1358 arch_leave_lazy_cpu_mode();
1359 arch_enter_lazy_cpu_mode();
1360 }
1361}
1362
1363 1356
1364#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 1357#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
1365static inline void arch_enter_lazy_mmu_mode(void) 1358static inline void arch_enter_lazy_mmu_mode(void)
@@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
1372 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); 1365 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
1373} 1366}
1374 1367
1375static inline void arch_flush_lazy_mmu_mode(void) 1368void arch_flush_lazy_mmu_mode(void);
1376{
1377 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
1378 arch_leave_lazy_mmu_mode();
1379 arch_enter_lazy_mmu_mode();
1380 }
1381}
1382 1369
1383static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, 1370static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
1384 unsigned long phys, pgprot_t flags) 1371 unsigned long phys, pgprot_t flags)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index fb039cd345d8..6428aa17b40e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1157 data->cpu = pol->cpu; 1157 data->cpu = pol->cpu;
1158 data->currpstate = HW_PSTATE_INVALID; 1158 data->currpstate = HW_PSTATE_INVALID;
1159 1159
1160 rc = powernow_k8_cpu_init_acpi(data); 1160 if (powernow_k8_cpu_init_acpi(data)) {
1161 if (rc) {
1162 /* 1161 /*
1163 * Use the PSB BIOS structure. This is only availabe on 1162 * Use the PSB BIOS structure. This is only availabe on
1164 * an UP version, and is deprecated by AMD. 1163 * an UP version, and is deprecated by AMD.
@@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1176 "ACPI maintainers and complain to your BIOS " 1175 "ACPI maintainers and complain to your BIOS "
1177 "vendor.\n"); 1176 "vendor.\n");
1178#endif 1177#endif
1179 goto err_out; 1178 kfree(data);
1179 return -ENODEV;
1180 } 1180 }
1181 if (pol->cpu != 0) { 1181 if (pol->cpu != 0) {
1182 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " 1182 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1183 "CPU other than CPU0. Complain to your BIOS " 1183 "CPU other than CPU0. Complain to your BIOS "
1184 "vendor.\n"); 1184 "vendor.\n");
1185 goto err_out; 1185 kfree(data);
1186 return -ENODEV;
1186 } 1187 }
1187 rc = find_psb_table(data); 1188 rc = find_psb_table(data);
1188 if (rc) { 1189 if (rc) {
1189 goto err_out; 1190 kfree(data);
1191 return -ENODEV;
1190 } 1192 }
1191 /* Take a crude guess here. 1193 /* Take a crude guess here.
1192 * That guess was in microseconds, so multiply with 1000 */ 1194 * That guess was in microseconds, so multiply with 1000 */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 388254f69a2a..a00545fe5cdd 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
269 now = hpet_readl(HPET_COUNTER); 269 now = hpet_readl(HPET_COUNTER);
270 cmp = now + (unsigned long) delta; 270 cmp = now + (unsigned long) delta;
271 cfg = hpet_readl(HPET_Tn_CFG(timer)); 271 cfg = hpet_readl(HPET_Tn_CFG(timer));
272 /* Make sure we use edge triggered interrupts */
273 cfg &= ~HPET_TN_LEVEL;
272 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 274 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
273 HPET_TN_SETVAL | HPET_TN_32BIT; 275 HPET_TN_SETVAL | HPET_TN_32BIT;
274 hpet_writel(cfg, HPET_Tn_CFG(timer)); 276 hpet_writel(cfg, HPET_Tn_CFG(timer));
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 7a13fac63a1f..4006c522adc7 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
203static void __init platform_detect(void) 203static void __init platform_detect(void)
204{ 204{
205 /* stopgap until OFW support is added to the kernel */ 205 /* stopgap until OFW support is added to the kernel */
206 olpc_platform_info.boardrev = 0xc2; 206 olpc_platform_info.boardrev = olpc_board(0xc2);
207} 207}
208#endif 208#endif
209 209
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb608873..c6520a4e85d4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,6 +268,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
268 return __get_cpu_var(paravirt_lazy_mode); 268 return __get_cpu_var(paravirt_lazy_mode);
269} 269}
270 270
271void arch_flush_lazy_mmu_mode(void)
272{
273 preempt_disable();
274
275 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
276 WARN_ON(preempt_count() == 1);
277 arch_leave_lazy_mmu_mode();
278 arch_enter_lazy_mmu_mode();
279 }
280
281 preempt_enable();
282}
283
284void arch_flush_lazy_cpu_mode(void)
285{
286 preempt_disable();
287
288 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
289 WARN_ON(preempt_count() == 1);
290 arch_leave_lazy_cpu_mode();
291 arch_enter_lazy_cpu_mode();
292 }
293
294 preempt_enable();
295}
296
271struct pv_info pv_info = { 297struct pv_info pv_info = {
272 .name = "bare hardware", 298 .name = "bare hardware",
273 .paravirt_enabled = 0, 299 .paravirt_enabled = 0,
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb9..5a4c23d89892 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
810 810
811static void ptrace_bts_detach(struct task_struct *child) 811static void ptrace_bts_detach(struct task_struct *child)
812{ 812{
813 if (unlikely(child->bts)) { 813 /*
814 ds_release_bts(child->bts); 814 * Ptrace_detach() races with ptrace_untrace() in case
815 child->bts = NULL; 815 * the child dies and is reaped by another thread.
816 816 *
817 ptrace_bts_free_buffer(child); 817 * We only do the memory accounting at this point and
818 } 818 * leave the buffer deallocation and the bts tracer
819 * release to ptrace_bts_untrace() which will be called
820 * later on with tasklist_lock held.
821 */
822 release_locked_buffer(child->bts_buffer, child->bts_size);
819} 823}
820#else 824#else
821static inline void ptrace_bts_fork(struct task_struct *tsk) {} 825static inline void ptrace_bts_fork(struct task_struct *tsk) {}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7932338d7cb3..a9e7548e1790 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs)
99 local_irq_enable(); 99 local_irq_enable();
100} 100}
101 101
102static inline void conditional_cli(struct pt_regs *regs)
103{
104 if (regs->flags & X86_EFLAGS_IF)
105 local_irq_disable();
106}
107
102static inline void preempt_conditional_cli(struct pt_regs *regs) 108static inline void preempt_conditional_cli(struct pt_regs *regs)
103{ 109{
104 if (regs->flags & X86_EFLAGS_IF) 110 if (regs->flags & X86_EFLAGS_IF)
@@ -626,8 +632,10 @@ clear_dr7:
626 632
627#ifdef CONFIG_X86_32 633#ifdef CONFIG_X86_32
628debug_vm86: 634debug_vm86:
635 /* reenable preemption: handle_vm86_trap() might sleep */
636 dec_preempt_count();
629 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); 637 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
630 preempt_conditional_cli(regs); 638 conditional_cli(regs);
631 return; 639 return;
632#endif 640#endif
633 641
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index e665d1c623ca..72bd275a9b5c 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
207 hrtimer_add_expires_ns(&pt->timer, pt->period); 207 hrtimer_add_expires_ns(&pt->timer, pt->period);
208 pt->scheduled = hrtimer_get_expires_ns(&pt->timer); 208 pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
209 if (pt->period) 209 if (pt->period)
210 ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); 210 ps->channels[0].count_load_time = ktime_get();
211 211
212 return (pt->period == 0 ? 0 : 1); 212 return (pt->period == 0 ? 0 : 1);
213} 213}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index c019b8edcdb7..cf17ed52f6fb 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
87} 87}
88EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); 88EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
89 89
90void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
91{
92 kvm_apic_timer_intr_post(vcpu, vec);
93 /* TODO: PIT, RTC etc. */
94}
95EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
96
97void __kvm_migrate_timers(struct kvm_vcpu *vcpu) 90void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
98{ 91{
99 __kvm_migrate_apic_timer(vcpu); 92 __kvm_migrate_apic_timer(vcpu);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2bf32a03ceec..82579ee538d0 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
89 89
90void kvm_pic_reset(struct kvm_kpic_state *s); 90void kvm_pic_reset(struct kvm_kpic_state *s);
91 91
92void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
93void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 92void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
94void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 93void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
95void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); 94void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afac68c0815c..f0b67f2cdd69 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -35,6 +35,12 @@
35#include "kvm_cache_regs.h" 35#include "kvm_cache_regs.h"
36#include "irq.h" 36#include "irq.h"
37 37
38#ifndef CONFIG_X86_64
39#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
40#else
41#define mod_64(x, y) ((x) % (y))
42#endif
43
38#define PRId64 "d" 44#define PRId64 "d"
39#define PRIx64 "llx" 45#define PRIx64 "llx"
40#define PRIu64 "u" 46#define PRIu64 "u"
@@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic)
511 517
512static u32 apic_get_tmcct(struct kvm_lapic *apic) 518static u32 apic_get_tmcct(struct kvm_lapic *apic)
513{ 519{
514 u64 counter_passed; 520 ktime_t remaining;
515 ktime_t passed, now; 521 s64 ns;
516 u32 tmcct; 522 u32 tmcct;
517 523
518 ASSERT(apic != NULL); 524 ASSERT(apic != NULL);
519 525
520 now = apic->timer.dev.base->get_time();
521 tmcct = apic_get_reg(apic, APIC_TMICT);
522
523 /* if initial count is 0, current count should also be 0 */ 526 /* if initial count is 0, current count should also be 0 */
524 if (tmcct == 0) 527 if (apic_get_reg(apic, APIC_TMICT) == 0)
525 return 0; 528 return 0;
526 529
527 if (unlikely(ktime_to_ns(now) <= 530 remaining = hrtimer_expires_remaining(&apic->timer.dev);
528 ktime_to_ns(apic->timer.last_update))) { 531 if (ktime_to_ns(remaining) < 0)
529 /* Wrap around */ 532 remaining = ktime_set(0, 0);
530 passed = ktime_add(( { 533
531 (ktime_t) { 534 ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
532 .tv64 = KTIME_MAX - 535 tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
533 (apic->timer.last_update).tv64}; }
534 ), now);
535 apic_debug("time elapsed\n");
536 } else
537 passed = ktime_sub(now, apic->timer.last_update);
538
539 counter_passed = div64_u64(ktime_to_ns(passed),
540 (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
541
542 if (counter_passed > tmcct) {
543 if (unlikely(!apic_lvtt_period(apic))) {
544 /* one-shot timers stick at 0 until reset */
545 tmcct = 0;
546 } else {
547 /*
548 * periodic timers reset to APIC_TMICT when they
549 * hit 0. The while loop simulates this happening N
550 * times. (counter_passed %= tmcct) would also work,
551 * but might be slower or not work on 32-bit??
552 */
553 while (counter_passed > tmcct)
554 counter_passed -= tmcct;
555 tmcct -= counter_passed;
556 }
557 } else {
558 tmcct -= counter_passed;
559 }
560 536
561 return tmcct; 537 return tmcct;
562} 538}
@@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
653{ 629{
654 ktime_t now = apic->timer.dev.base->get_time(); 630 ktime_t now = apic->timer.dev.base->get_time();
655 631
656 apic->timer.last_update = now;
657
658 apic->timer.period = apic_get_reg(apic, APIC_TMICT) * 632 apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
659 APIC_BUS_CYCLE_NS * apic->timer.divide_count; 633 APIC_BUS_CYCLE_NS * apic->timer.divide_count;
660 atomic_set(&apic->timer.pending, 0); 634 atomic_set(&apic->timer.pending, 0);
@@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1110 } 1084 }
1111} 1085}
1112 1086
1113void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
1114{
1115 struct kvm_lapic *apic = vcpu->arch.apic;
1116
1117 if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
1118 apic->timer.last_update = ktime_add_ns(
1119 apic->timer.last_update,
1120 apic->timer.period);
1121}
1122
1123int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1087int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
1124{ 1088{
1125 int vector = kvm_apic_has_interrupt(vcpu); 1089 int vector = kvm_apic_has_interrupt(vcpu);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 81858881287e..45ab6ee71209 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,7 +12,6 @@ struct kvm_lapic {
12 atomic_t pending; 12 atomic_t pending;
13 s64 period; /* unit: ns */ 13 s64 period; /* unit: ns */
14 u32 divide_count; 14 u32 divide_count;
15 ktime_t last_update;
16 struct hrtimer dev; 15 struct hrtimer dev;
17 } timer; 16 } timer;
18 struct kvm_vcpu *vcpu; 17 struct kvm_vcpu *vcpu;
@@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
42void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); 41void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
43int kvm_lapic_enabled(struct kvm_vcpu *vcpu); 42int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
44int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); 43int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
45void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
46 44
47void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); 45void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
48void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); 46void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 83f11c7474a1..2d4477c71473 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1698 if (largepage) 1698 if (largepage)
1699 spte |= PT_PAGE_SIZE_MASK; 1699 spte |= PT_PAGE_SIZE_MASK;
1700 if (mt_mask) { 1700 if (mt_mask) {
1701 mt_mask = get_memory_type(vcpu, gfn) << 1701 if (!kvm_is_mmio_pfn(pfn)) {
1702 kvm_x86_ops->get_mt_mask_shift(); 1702 mt_mask = get_memory_type(vcpu, gfn) <<
1703 kvm_x86_ops->get_mt_mask_shift();
1704 mt_mask |= VMX_EPT_IGMT_BIT;
1705 } else
1706 mt_mask = MTRR_TYPE_UNCACHABLE <<
1707 kvm_x86_ops->get_mt_mask_shift();
1703 spte |= mt_mask; 1708 spte |= mt_mask;
1704 } 1709 }
1705 1710
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1452851ae258..a9e769e4e251 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1600 /* Okay, we can deliver the interrupt: grab it and update PIC state. */ 1600 /* Okay, we can deliver the interrupt: grab it and update PIC state. */
1601 intr_vector = kvm_cpu_get_interrupt(vcpu); 1601 intr_vector = kvm_cpu_get_interrupt(vcpu);
1602 svm_inject_irq(svm, intr_vector); 1602 svm_inject_irq(svm, intr_vector);
1603 kvm_timer_intr_post(vcpu, intr_vector);
1604out: 1603out:
1605 update_cr8_intercept(vcpu); 1604 update_cr8_intercept(vcpu);
1606} 1605}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6259d7467648..7611af576829 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
903 data = vmcs_readl(GUEST_SYSENTER_ESP); 903 data = vmcs_readl(GUEST_SYSENTER_ESP);
904 break; 904 break;
905 default: 905 default:
906 vmx_load_host_state(to_vmx(vcpu));
906 msr = find_msr_entry(to_vmx(vcpu), msr_index); 907 msr = find_msr_entry(to_vmx(vcpu), msr_index);
907 if (msr) { 908 if (msr) {
908 data = msr->data; 909 data = msr->data;
@@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3285 } 3286 }
3286 if (vcpu->arch.interrupt.pending) { 3287 if (vcpu->arch.interrupt.pending) {
3287 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 3288 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
3288 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
3289 if (kvm_cpu_has_interrupt(vcpu)) 3289 if (kvm_cpu_has_interrupt(vcpu))
3290 enable_irq_window(vcpu); 3290 enable_irq_window(vcpu);
3291 } 3291 }
@@ -3687,8 +3687,7 @@ static int __init vmx_init(void)
3687 if (vm_need_ept()) { 3687 if (vm_need_ept()) {
3688 bypass_guest_pf = 0; 3688 bypass_guest_pf = 0;
3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3690 VMX_EPT_WRITABLE_MASK | 3690 VMX_EPT_WRITABLE_MASK);
3691 VMX_EPT_IGMT_BIT);
3692 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3691 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3693 VMX_EPT_EXECUTABLE_MASK, 3692 VMX_EPT_EXECUTABLE_MASK,
3694 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); 3693 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cc17546a2406..758b7a155ae9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext)
967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
968 case KVM_CAP_SET_TSS_ADDR: 968 case KVM_CAP_SET_TSS_ADDR:
969 case KVM_CAP_EXT_CPUID: 969 case KVM_CAP_EXT_CPUID:
970 case KVM_CAP_CLOCKSOURCE:
971 case KVM_CAP_PIT: 970 case KVM_CAP_PIT:
972 case KVM_CAP_NOP_IO_DELAY: 971 case KVM_CAP_NOP_IO_DELAY:
973 case KVM_CAP_MP_STATE: 972 case KVM_CAP_MP_STATE:
@@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext)
992 case KVM_CAP_IOMMU: 991 case KVM_CAP_IOMMU:
993 r = iommu_found(); 992 r = iommu_found();
994 break; 993 break;
994 case KVM_CAP_CLOCKSOURCE:
995 r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
996 break;
995 default: 997 default:
996 r = 0; 998 r = 0;
997 break; 999 break;
@@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
4127 4129
4128} 4130}
4129 4131
4130void kvm_arch_destroy_vm(struct kvm *kvm) 4132void kvm_arch_sync_events(struct kvm *kvm)
4131{ 4133{
4132 kvm_free_all_assigned_devices(kvm); 4134 kvm_free_all_assigned_devices(kvm);
4135}
4136
4137void kvm_arch_destroy_vm(struct kvm *kvm)
4138{
4133 kvm_iommu_unmap_guest(kvm); 4139 kvm_iommu_unmap_guest(kvm);
4134 kvm_free_pit(kvm); 4140 kvm_free_pit(kvm);
4135 kfree(kvm->arch.vpic); 4141 kfree(kvm->arch.vpic);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index af750ab973b6..f45d5e29a72e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr)
134 return 0; 134 return 0;
135} 135}
136 136
137int pagerange_is_ram(unsigned long start, unsigned long end)
138{
139 int ram_page = 0, not_rampage = 0;
140 unsigned long page_nr;
141
142 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
143 ++page_nr) {
144 if (page_is_ram(page_nr))
145 ram_page = 1;
146 else
147 not_rampage = 1;
148
149 if (ram_page == not_rampage)
150 return -1;
151 }
152
153 return ram_page;
154}
155
156/* 137/*
157 * Fix up the linear direct mapping of the kernel to avoid cache attribute 138 * Fix up the linear direct mapping of the kernel to avoid cache attribute
158 * conflicts. 139 * conflicts.
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89e..f3516da035d1 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
145 return shift; 145 return shift;
146} 146}
147 147
148int early_pfn_to_nid(unsigned long pfn) 148int __meminit __early_pfn_to_nid(unsigned long pfn)
149{ 149{
150 return phys_to_nid(pfn << PAGE_SHIFT); 150 return phys_to_nid(pfn << PAGE_SHIFT);
151} 151}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 84ba74820ad6..8ca0d8566fc8 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -575,7 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
575 address = cpa->vaddr[cpa->curpage]; 575 address = cpa->vaddr[cpa->curpage];
576 else 576 else
577 address = *cpa->vaddr; 577 address = *cpa->vaddr;
578
579repeat: 578repeat:
580 kpte = lookup_address(address, &level); 579 kpte = lookup_address(address, &level);
581 if (!kpte) 580 if (!kpte)
@@ -812,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
812 811
813 vm_unmap_aliases(); 812 vm_unmap_aliases();
814 813
814 /*
815 * If we're called with lazy mmu updates enabled, the
816 * in-memory pte state may be stale. Flush pending updates to
817 * bring them up to date.
818 */
819 arch_flush_lazy_mmu_mode();
820
815 cpa.vaddr = addr; 821 cpa.vaddr = addr;
816 cpa.numpages = numpages; 822 cpa.numpages = numpages;
817 cpa.mask_set = mask_set; 823 cpa.mask_set = mask_set;
@@ -854,6 +860,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
854 } else 860 } else
855 cpa_flush_all(cache); 861 cpa_flush_all(cache);
856 862
863 /*
864 * If we've been called with lazy mmu updates enabled, then
865 * make sure that everything gets flushed out before we
866 * return.
867 */
868 arch_flush_lazy_mmu_mode();
869
857out: 870out:
858 return ret; 871 return ret;
859} 872}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7b61036427df..aebbf67a79d0 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
211static struct memtype *cached_entry; 211static struct memtype *cached_entry;
212static u64 cached_start; 212static u64 cached_start;
213 213
214static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
215{
216 int ram_page = 0, not_rampage = 0;
217 unsigned long page_nr;
218
219 for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
220 ++page_nr) {
221 /*
222 * For legacy reasons, physical address range in the legacy ISA
223 * region is tracked as non-RAM. This will allow users of
224 * /dev/mem to map portions of legacy ISA region, even when
225 * some of those portions are listed(or not even listed) with
226 * different e820 types(RAM/reserved/..)
227 */
228 if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
229 page_is_ram(page_nr))
230 ram_page = 1;
231 else
232 not_rampage = 1;
233
234 if (ram_page == not_rampage)
235 return -1;
236 }
237
238 return ram_page;
239}
240
214/* 241/*
215 * For RAM pages, mark the pages as non WB memory type using 242 * For RAM pages, mark the pages as non WB memory type using
216 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or 243 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
@@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
336 if (new_type) 363 if (new_type)
337 *new_type = actual_type; 364 *new_type = actual_type;
338 365
339 /* 366 is_range_ram = pat_pagerange_is_ram(start, end);
340 * For legacy reasons, some parts of the physical address range in the 367 if (is_range_ram == 1)
341 * legacy 1MB region is treated as non-RAM (even when listed as RAM in 368 return reserve_ram_pages_type(start, end, req_type,
342 * the e820 tables). So we will track the memory attributes of this 369 new_type);
343 * legacy 1MB region using the linear memtype_list always. 370 else if (is_range_ram < 0)
344 */ 371 return -EINVAL;
345 if (end >= ISA_END_ADDRESS) {
346 is_range_ram = pagerange_is_ram(start, end);
347 if (is_range_ram == 1)
348 return reserve_ram_pages_type(start, end, req_type,
349 new_type);
350 else if (is_range_ram < 0)
351 return -EINVAL;
352 }
353 372
354 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 373 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
355 if (!new) 374 if (!new)
@@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end)
446 if (is_ISA_range(start, end - 1)) 465 if (is_ISA_range(start, end - 1))
447 return 0; 466 return 0;
448 467
449 /* 468 is_range_ram = pat_pagerange_is_ram(start, end);
450 * For legacy reasons, some parts of the physical address range in the 469 if (is_range_ram == 1)
451 * legacy 1MB region is treated as non-RAM (even when listed as RAM in 470 return free_ram_pages_type(start, end);
452 * the e820 tables). So we will track the memory attributes of this 471 else if (is_range_ram < 0)
453 * legacy 1MB region using the linear memtype_list always. 472 return -EINVAL;
454 */
455 if (end >= ISA_END_ADDRESS) {
456 is_range_ram = pagerange_is_ram(start, end);
457 if (is_range_ram == 1)
458 return free_ram_pages_type(start, end);
459 else if (is_range_ram < 0)
460 return -EINVAL;
461 }
462 473
463 spin_lock(&memtype_lock); 474 spin_lock(&memtype_lock);
464 list_for_each_entry(entry, &memtype_list, nd) { 475 list_for_each_entry(entry, &memtype_list, nd) {
@@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
626 unsigned long flags; 637 unsigned long flags;
627 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); 638 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
628 639
629 is_ram = pagerange_is_ram(paddr, paddr + size); 640 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
630 641
631 if (is_ram != 0) { 642 /*
632 /* 643 * reserve_pfn_range() doesn't support RAM pages.
633 * For mapping RAM pages, drivers need to call 644 */
634 * set_memory_[uc|wc|wb] directly, for reserve and free, before 645 if (is_ram != 0)
635 * setting up the PTE. 646 return -EINVAL;
636 */
637 WARN_ON_ONCE(1);
638 return 0;
639 }
640 647
641 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 648 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
642 if (ret) 649 if (ret)
@@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
693{ 700{
694 int is_ram; 701 int is_ram;
695 702
696 is_ram = pagerange_is_ram(paddr, paddr + size); 703 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
697 if (is_ram == 0) 704 if (is_ram == 0)
698 free_memtype(paddr, paddr + size); 705 free_memtype(paddr, paddr + size);
699} 706}