aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/io.h1
-rw-r--r--arch/x86/kernel/apb_timer.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c3
-rw-r--r--arch/x86/kernel/cpu/intel.c21
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/kprobes.c27
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kvm/mmu.c11
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c48
-rw-r--r--arch/x86/lib/rwsem_64.S2
-rw-r--r--arch/x86/mm/ioremap.c14
-rw-r--r--arch/x86/mm/pgtable_32.c2
-rw-r--r--arch/x86/pci/acpi.c84
-rw-r--r--arch/x86/pci/i386.c3
17 files changed, 157 insertions, 119 deletions
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index a1dcfa3ab17d..30a3e9776123 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -347,6 +347,7 @@ extern void __iomem *early_ioremap(resource_size_t phys_addr,
347extern void __iomem *early_memremap(resource_size_t phys_addr, 347extern void __iomem *early_memremap(resource_size_t phys_addr,
348 unsigned long size); 348 unsigned long size);
349extern void early_iounmap(void __iomem *addr, unsigned long size); 349extern void early_iounmap(void __iomem *addr, unsigned long size);
350extern void fixup_early_ioremap(void);
350 351
351#define IO_SPACE_LIMIT 0xffff 352#define IO_SPACE_LIMIT 0xffff
352 353
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index ff469e470059..a35347501d36 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -429,7 +429,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
429 429
430static __init int apbt_late_init(void) 430static __init int apbt_late_init(void)
431{ 431{
432 if (disable_apbt_percpu) 432 if (disable_apbt_percpu || !apb_timer_block_enabled)
433 return 0; 433 return 0;
434 /* This notifier should be called after workqueue is ready */ 434 /* This notifier should be called after workqueue is ready */
435 hotcpu_notifier(apbt_cpuhp_notify, -20); 435 hotcpu_notifier(apbt_cpuhp_notify, -20);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 127b8718abfb..eb2789c3f721 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2545,6 +2545,9 @@ void irq_force_complete_move(int irq)
2545 struct irq_desc *desc = irq_to_desc(irq); 2545 struct irq_desc *desc = irq_to_desc(irq);
2546 struct irq_cfg *cfg = desc->chip_data; 2546 struct irq_cfg *cfg = desc->chip_data;
2547 2547
2548 if (!cfg)
2549 return;
2550
2548 __irq_complete_move(&desc, cfg->vector); 2551 __irq_complete_move(&desc, cfg->vector);
2549} 2552}
2550#else 2553#else
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index d360b56e9825..b6215b9798e2 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -929,7 +929,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
929 powernow_table[i].index = index; 929 powernow_table[i].index = index;
930 930
931 /* Frequency may be rounded for these */ 931 /* Frequency may be rounded for these */
932 if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { 932 if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
933 || boot_cpu_data.x86 == 0x11) {
933 powernow_table[i].frequency = 934 powernow_table[i].frequency =
934 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); 935 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
935 } else 936 } else
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7e1cca13af35..1366c7cfd483 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -47,6 +47,27 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
47 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 47 (c->x86 == 0x6 && c->x86_model >= 0x0e))
48 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 48 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
49 49
50 /*
51 * Atom erratum AAE44/AAF40/AAG38/AAH41:
52 *
53 * A race condition between speculative fetches and invalidating
54 * a large page. This is worked around in microcode, but we
55 * need the microcode to have already been loaded... so if it is
56 * not, recommend a BIOS update and disable large pages.
57 */
58 if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) {
59 u32 ucode, junk;
60
61 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
62 sync_core();
63 rdmsr(MSR_IA32_UCODE_REV, junk, ucode);
64
65 if (ucode < 0x20e) {
66 printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
67 clear_cpu_cap(c, X86_FEATURE_PSE);
68 }
69 }
70
50#ifdef CONFIG_X86_64 71#ifdef CONFIG_X86_64
51 set_cpu_cap(c, X86_FEATURE_SYSENTER32); 72 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
52#else 73#else
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 1cbed97b59cf..dfdb4dba2320 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -22,6 +22,7 @@
22 */ 22 */
23 23
24#include <linux/dmi.h> 24#include <linux/dmi.h>
25#include <linux/module.h>
25#include <asm/div64.h> 26#include <asm/div64.h>
26#include <asm/vmware.h> 27#include <asm/vmware.h>
27#include <asm/x86_init.h> 28#include <asm/x86_init.h>
@@ -101,6 +102,7 @@ int vmware_platform(void)
101 102
102 return 0; 103 return 0;
103} 104}
105EXPORT_SYMBOL(vmware_platform);
104 106
105/* 107/*
106 * VMware hypervisor takes care of exporting a reliable TSC to the guest. 108 * VMware hypervisor takes care of exporting a reliable TSC to the guest.
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b43bbaebe2c0..1658efdfb4e5 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -534,20 +534,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
534 struct kprobe_ctlblk *kcb; 534 struct kprobe_ctlblk *kcb;
535 535
536 addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); 536 addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
537 if (*addr != BREAKPOINT_INSTRUCTION) {
538 /*
539 * The breakpoint instruction was removed right
540 * after we hit it. Another cpu has removed
541 * either a probepoint or a debugger breakpoint
542 * at this address. In either case, no further
543 * handling of this interrupt is appropriate.
544 * Back up over the (now missing) int3 and run
545 * the original instruction.
546 */
547 regs->ip = (unsigned long)addr;
548 return 1;
549 }
550
551 /* 537 /*
552 * We don't want to be preempted for the entire 538 * We don't want to be preempted for the entire
553 * duration of kprobe processing. We conditionally 539 * duration of kprobe processing. We conditionally
@@ -579,6 +565,19 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
579 setup_singlestep(p, regs, kcb, 0); 565 setup_singlestep(p, regs, kcb, 0);
580 return 1; 566 return 1;
581 } 567 }
568 } else if (*addr != BREAKPOINT_INSTRUCTION) {
569 /*
570 * The breakpoint instruction was removed right
571 * after we hit it. Another cpu has removed
572 * either a probepoint or a debugger breakpoint
573 * at this address. In either case, no further
574 * handling of this interrupt is appropriate.
575 * Back up over the (now missing) int3 and run
576 * the original instruction.
577 */
578 regs->ip = (unsigned long)addr;
579 preempt_enable_no_resched();
580 return 1;
582 } else if (kprobe_running()) { 581 } else if (kprobe_running()) {
583 p = __get_cpu_var(current_kprobe); 582 p = __get_cpu_var(current_kprobe);
584 if (p->break_handler && p->break_handler(p, regs)) { 583 if (p->break_handler && p->break_handler(p, regs)) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index dc9690b4c4cc..17cb3295cbf7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -276,12 +276,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
276 276
277 set_tsk_thread_flag(p, TIF_FORK); 277 set_tsk_thread_flag(p, TIF_FORK);
278 278
279 p->thread.fs = me->thread.fs;
280 p->thread.gs = me->thread.gs;
281 p->thread.io_bitmap_ptr = NULL; 279 p->thread.io_bitmap_ptr = NULL;
282 280
283 savesegment(gs, p->thread.gsindex); 281 savesegment(gs, p->thread.gsindex);
282 p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
284 savesegment(fs, p->thread.fsindex); 283 savesegment(fs, p->thread.fsindex);
284 p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
285 savesegment(es, p->thread.es); 285 savesegment(es, p->thread.es);
286 savesegment(ds, p->thread.ds); 286 savesegment(ds, p->thread.ds);
287 287
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 48aeee8eefb0..19a8906bcaa2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1490,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
1490 for_each_sp(pages, sp, parents, i) { 1490 for_each_sp(pages, sp, parents, i) {
1491 kvm_mmu_zap_page(kvm, sp); 1491 kvm_mmu_zap_page(kvm, sp);
1492 mmu_pages_clear_parents(&parents); 1492 mmu_pages_clear_parents(&parents);
1493 zapped++;
1493 } 1494 }
1494 zapped += pages.nr;
1495 kvm_mmu_pages_init(parent, &parents, &pages); 1495 kvm_mmu_pages_init(parent, &parents, &pages);
1496 } 1496 }
1497 1497
@@ -1542,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1542 */ 1542 */
1543 1543
1544 if (used_pages > kvm_nr_mmu_pages) { 1544 if (used_pages > kvm_nr_mmu_pages) {
1545 while (used_pages > kvm_nr_mmu_pages) { 1545 while (used_pages > kvm_nr_mmu_pages &&
1546 !list_empty(&kvm->arch.active_mmu_pages)) {
1546 struct kvm_mmu_page *page; 1547 struct kvm_mmu_page *page;
1547 1548
1548 page = container_of(kvm->arch.active_mmu_pages.prev, 1549 page = container_of(kvm->arch.active_mmu_pages.prev,
1549 struct kvm_mmu_page, link); 1550 struct kvm_mmu_page, link);
1550 kvm_mmu_zap_page(kvm, page); 1551 used_pages -= kvm_mmu_zap_page(kvm, page);
1551 used_pages--; 1552 used_pages--;
1552 } 1553 }
1554 kvm_nr_mmu_pages = used_pages;
1553 kvm->arch.n_free_mmu_pages = 0; 1555 kvm->arch.n_free_mmu_pages = 0;
1554 } 1556 }
1555 else 1557 else
@@ -1596,7 +1598,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1596 && !sp->role.invalid) { 1598 && !sp->role.invalid) {
1597 pgprintk("%s: zap %lx %x\n", 1599 pgprintk("%s: zap %lx %x\n",
1598 __func__, gfn, sp->role.word); 1600 __func__, gfn, sp->role.word);
1599 kvm_mmu_zap_page(kvm, sp); 1601 if (kvm_mmu_zap_page(kvm, sp))
1602 nn = bucket->first;
1600 } 1603 }
1601 } 1604 }
1602} 1605}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 445c59411ed0..2ba58206812a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -706,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
706 if (err) 706 if (err)
707 goto free_svm; 707 goto free_svm;
708 708
709 err = -ENOMEM;
709 page = alloc_page(GFP_KERNEL); 710 page = alloc_page(GFP_KERNEL);
710 if (!page) { 711 if (!page)
711 err = -ENOMEM;
712 goto uninit; 712 goto uninit;
713 }
714 713
715 err = -ENOMEM;
716 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 714 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
717 if (!msrpm_pages) 715 if (!msrpm_pages)
718 goto uninit; 716 goto free_page1;
719 717
720 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 718 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
721 if (!nested_msrpm_pages) 719 if (!nested_msrpm_pages)
722 goto uninit; 720 goto free_page2;
723
724 svm->msrpm = page_address(msrpm_pages);
725 svm_vcpu_init_msrpm(svm->msrpm);
726 721
727 hsave_page = alloc_page(GFP_KERNEL); 722 hsave_page = alloc_page(GFP_KERNEL);
728 if (!hsave_page) 723 if (!hsave_page)
729 goto uninit; 724 goto free_page3;
725
730 svm->nested.hsave = page_address(hsave_page); 726 svm->nested.hsave = page_address(hsave_page);
731 727
728 svm->msrpm = page_address(msrpm_pages);
729 svm_vcpu_init_msrpm(svm->msrpm);
730
732 svm->nested.msrpm = page_address(nested_msrpm_pages); 731 svm->nested.msrpm = page_address(nested_msrpm_pages);
733 732
734 svm->vmcb = page_address(page); 733 svm->vmcb = page_address(page);
@@ -744,6 +743,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
744 743
745 return &svm->vcpu; 744 return &svm->vcpu;
746 745
746free_page3:
747 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
748free_page2:
749 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
750free_page1:
751 __free_page(page);
747uninit: 752uninit:
748 kvm_vcpu_uninit(&svm->vcpu); 753 kvm_vcpu_uninit(&svm->vcpu);
749free_svm: 754free_svm:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 686492ed3079..bc933cfb4e66 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -77,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
77#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 77#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
78#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) 78#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
79 79
80#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
81
80/* 82/*
81 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 83 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
82 * ple_gap: upper bound on the amount of time between two successive 84 * ple_gap: upper bound on the amount of time between two successive
@@ -131,7 +133,7 @@ struct vcpu_vmx {
131 } host_state; 133 } host_state;
132 struct { 134 struct {
133 int vm86_active; 135 int vm86_active;
134 u8 save_iopl; 136 ulong save_rflags;
135 struct kvm_save_segment { 137 struct kvm_save_segment {
136 u16 selector; 138 u16 selector;
137 unsigned long base; 139 unsigned long base;
@@ -818,18 +820,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
818 820
819static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 821static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
820{ 822{
821 unsigned long rflags; 823 unsigned long rflags, save_rflags;
822 824
823 rflags = vmcs_readl(GUEST_RFLAGS); 825 rflags = vmcs_readl(GUEST_RFLAGS);
824 if (to_vmx(vcpu)->rmode.vm86_active) 826 if (to_vmx(vcpu)->rmode.vm86_active) {
825 rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 827 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
828 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
829 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
830 }
826 return rflags; 831 return rflags;
827} 832}
828 833
829static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 834static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
830{ 835{
831 if (to_vmx(vcpu)->rmode.vm86_active) 836 if (to_vmx(vcpu)->rmode.vm86_active) {
837 to_vmx(vcpu)->rmode.save_rflags = rflags;
832 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 838 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
839 }
833 vmcs_writel(GUEST_RFLAGS, rflags); 840 vmcs_writel(GUEST_RFLAGS, rflags);
834} 841}
835 842
@@ -1483,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1483 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); 1490 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
1484 1491
1485 flags = vmcs_readl(GUEST_RFLAGS); 1492 flags = vmcs_readl(GUEST_RFLAGS);
1486 flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 1493 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1487 flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); 1494 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1488 vmcs_writel(GUEST_RFLAGS, flags); 1495 vmcs_writel(GUEST_RFLAGS, flags);
1489 1496
1490 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | 1497 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@ -1557,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1557 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 1564 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
1558 1565
1559 flags = vmcs_readl(GUEST_RFLAGS); 1566 flags = vmcs_readl(GUEST_RFLAGS);
1560 vmx->rmode.save_iopl 1567 vmx->rmode.save_rflags = flags;
1561 = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1562 1568
1563 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 1569 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
1564 1570
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 24cd0ee896e9..3c4ca98ad27f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -433,8 +433,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
433 433
434#ifdef CONFIG_X86_64 434#ifdef CONFIG_X86_64
435 if (cr0 & 0xffffffff00000000UL) { 435 if (cr0 & 0xffffffff00000000UL) {
436 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
437 cr0, kvm_read_cr0(vcpu));
438 kvm_inject_gp(vcpu, 0); 436 kvm_inject_gp(vcpu, 0);
439 return; 437 return;
440 } 438 }
@@ -443,14 +441,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
443 cr0 &= ~CR0_RESERVED_BITS; 441 cr0 &= ~CR0_RESERVED_BITS;
444 442
445 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 443 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
446 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
447 kvm_inject_gp(vcpu, 0); 444 kvm_inject_gp(vcpu, 0);
448 return; 445 return;
449 } 446 }
450 447
451 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { 448 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
452 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
453 "and a clear PE flag\n");
454 kvm_inject_gp(vcpu, 0); 449 kvm_inject_gp(vcpu, 0);
455 return; 450 return;
456 } 451 }
@@ -461,15 +456,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
461 int cs_db, cs_l; 456 int cs_db, cs_l;
462 457
463 if (!is_pae(vcpu)) { 458 if (!is_pae(vcpu)) {
464 printk(KERN_DEBUG "set_cr0: #GP, start paging "
465 "in long mode while PAE is disabled\n");
466 kvm_inject_gp(vcpu, 0); 459 kvm_inject_gp(vcpu, 0);
467 return; 460 return;
468 } 461 }
469 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 462 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
470 if (cs_l) { 463 if (cs_l) {
471 printk(KERN_DEBUG "set_cr0: #GP, start paging "
472 "in long mode while CS.L == 1\n");
473 kvm_inject_gp(vcpu, 0); 464 kvm_inject_gp(vcpu, 0);
474 return; 465 return;
475 466
@@ -477,8 +468,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
477 } else 468 } else
478#endif 469#endif
479 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 470 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
480 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
481 "reserved bits\n");
482 kvm_inject_gp(vcpu, 0); 471 kvm_inject_gp(vcpu, 0);
483 return; 472 return;
484 } 473 }
@@ -505,28 +494,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
505 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 494 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
506 495
507 if (cr4 & CR4_RESERVED_BITS) { 496 if (cr4 & CR4_RESERVED_BITS) {
508 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
509 kvm_inject_gp(vcpu, 0); 497 kvm_inject_gp(vcpu, 0);
510 return; 498 return;
511 } 499 }
512 500
513 if (is_long_mode(vcpu)) { 501 if (is_long_mode(vcpu)) {
514 if (!(cr4 & X86_CR4_PAE)) { 502 if (!(cr4 & X86_CR4_PAE)) {
515 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
516 "in long mode\n");
517 kvm_inject_gp(vcpu, 0); 503 kvm_inject_gp(vcpu, 0);
518 return; 504 return;
519 } 505 }
520 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 506 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
521 && ((cr4 ^ old_cr4) & pdptr_bits) 507 && ((cr4 ^ old_cr4) & pdptr_bits)
522 && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 508 && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
523 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
524 kvm_inject_gp(vcpu, 0); 509 kvm_inject_gp(vcpu, 0);
525 return; 510 return;
526 } 511 }
527 512
528 if (cr4 & X86_CR4_VMXE) { 513 if (cr4 & X86_CR4_VMXE) {
529 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
530 kvm_inject_gp(vcpu, 0); 514 kvm_inject_gp(vcpu, 0);
531 return; 515 return;
532 } 516 }
@@ -547,21 +531,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
547 531
548 if (is_long_mode(vcpu)) { 532 if (is_long_mode(vcpu)) {
549 if (cr3 & CR3_L_MODE_RESERVED_BITS) { 533 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
550 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
551 kvm_inject_gp(vcpu, 0); 534 kvm_inject_gp(vcpu, 0);
552 return; 535 return;
553 } 536 }
554 } else { 537 } else {
555 if (is_pae(vcpu)) { 538 if (is_pae(vcpu)) {
556 if (cr3 & CR3_PAE_RESERVED_BITS) { 539 if (cr3 & CR3_PAE_RESERVED_BITS) {
557 printk(KERN_DEBUG
558 "set_cr3: #GP, reserved bits\n");
559 kvm_inject_gp(vcpu, 0); 540 kvm_inject_gp(vcpu, 0);
560 return; 541 return;
561 } 542 }
562 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { 543 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
563 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
564 "reserved bits\n");
565 kvm_inject_gp(vcpu, 0); 544 kvm_inject_gp(vcpu, 0);
566 return; 545 return;
567 } 546 }
@@ -593,7 +572,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3);
593void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 572void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
594{ 573{
595 if (cr8 & CR8_RESERVED_BITS) { 574 if (cr8 & CR8_RESERVED_BITS) {
596 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
597 kvm_inject_gp(vcpu, 0); 575 kvm_inject_gp(vcpu, 0);
598 return; 576 return;
599 } 577 }
@@ -649,15 +627,12 @@ static u32 emulated_msrs[] = {
649static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 627static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
650{ 628{
651 if (efer & efer_reserved_bits) { 629 if (efer & efer_reserved_bits) {
652 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
653 efer);
654 kvm_inject_gp(vcpu, 0); 630 kvm_inject_gp(vcpu, 0);
655 return; 631 return;
656 } 632 }
657 633
658 if (is_paging(vcpu) 634 if (is_paging(vcpu)
659 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { 635 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
660 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
661 kvm_inject_gp(vcpu, 0); 636 kvm_inject_gp(vcpu, 0);
662 return; 637 return;
663 } 638 }
@@ -667,7 +642,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
667 642
668 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 643 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
669 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { 644 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
670 printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
671 kvm_inject_gp(vcpu, 0); 645 kvm_inject_gp(vcpu, 0);
672 return; 646 return;
673 } 647 }
@@ -678,7 +652,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
678 652
679 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 653 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
680 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { 654 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
681 printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
682 kvm_inject_gp(vcpu, 0); 655 kvm_inject_gp(vcpu, 0);
683 return; 656 return;
684 } 657 }
@@ -967,9 +940,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
967 if (msr >= MSR_IA32_MC0_CTL && 940 if (msr >= MSR_IA32_MC0_CTL &&
968 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 941 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
969 u32 offset = msr - MSR_IA32_MC0_CTL; 942 u32 offset = msr - MSR_IA32_MC0_CTL;
970 /* only 0 or all 1s can be written to IA32_MCi_CTL */ 943 /* only 0 or all 1s can be written to IA32_MCi_CTL
944 * some Linux kernels though clear bit 10 in bank 4 to
945 * workaround a BIOS/GART TBL issue on AMD K8s, ignore
946 * this to avoid an uncatched #GP in the guest
947 */
971 if ((offset & 0x3) == 0 && 948 if ((offset & 0x3) == 0 &&
972 data != 0 && data != ~(u64)0) 949 data != 0 && (data | (1 << 10)) != ~(u64)0)
973 return -1; 950 return -1;
974 vcpu->arch.mce_banks[offset] = data; 951 vcpu->arch.mce_banks[offset] = data;
975 break; 952 break;
@@ -2635,8 +2612,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2635int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2612int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2636 struct kvm_dirty_log *log) 2613 struct kvm_dirty_log *log)
2637{ 2614{
2638 int r, n, i; 2615 int r, i;
2639 struct kvm_memory_slot *memslot; 2616 struct kvm_memory_slot *memslot;
2617 unsigned long n;
2640 unsigned long is_dirty = 0; 2618 unsigned long is_dirty = 0;
2641 unsigned long *dirty_bitmap = NULL; 2619 unsigned long *dirty_bitmap = NULL;
2642 2620
@@ -2651,7 +2629,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2651 if (!memslot->dirty_bitmap) 2629 if (!memslot->dirty_bitmap)
2652 goto out; 2630 goto out;
2653 2631
2654 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2632 n = kvm_dirty_bitmap_bytes(memslot);
2655 2633
2656 r = -ENOMEM; 2634 r = -ENOMEM;
2657 dirty_bitmap = vmalloc(n); 2635 dirty_bitmap = vmalloc(n);
@@ -4483,7 +4461,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4483 kvm_set_cr8(vcpu, kvm_run->cr8); 4461 kvm_set_cr8(vcpu, kvm_run->cr8);
4484 4462
4485 if (vcpu->arch.pio.cur_count) { 4463 if (vcpu->arch.pio.cur_count) {
4464 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4486 r = complete_pio(vcpu); 4465 r = complete_pio(vcpu);
4466 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4487 if (r) 4467 if (r)
4488 goto out; 4468 goto out;
4489 } 4469 }
@@ -5146,6 +5126,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5146 int ret = 0; 5126 int ret = 0;
5147 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5127 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
5148 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5128 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
5129 u32 desc_limit;
5149 5130
5150 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); 5131 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
5151 5132
@@ -5168,7 +5149,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5168 } 5149 }
5169 } 5150 }
5170 5151
5171 if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { 5152 desc_limit = get_desc_limit(&nseg_desc);
5153 if (!nseg_desc.p ||
5154 ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
5155 desc_limit < 0x2b)) {
5172 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); 5156 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
5173 return 1; 5157 return 1;
5174 } 5158 }
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
index 15acecf0d7aa..41fcf00e49df 100644
--- a/arch/x86/lib/rwsem_64.S
+++ b/arch/x86/lib/rwsem_64.S
@@ -60,7 +60,7 @@ ENTRY(call_rwsem_down_write_failed)
60 ENDPROC(call_rwsem_down_write_failed) 60 ENDPROC(call_rwsem_down_write_failed)
61 61
62ENTRY(call_rwsem_wake) 62ENTRY(call_rwsem_wake)
63 decw %dx /* do nothing if still outstanding active readers */ 63 decl %edx /* do nothing if still outstanding active readers */
64 jnz 1f 64 jnz 1f
65 save_common_regs 65 save_common_regs
66 movq %rax,%rdi 66 movq %rax,%rdi
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 5eb1ba74a3a9..12e4d2d3c110 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -448,6 +448,20 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
448static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; 448static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
449static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; 449static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
450 450
451void __init fixup_early_ioremap(void)
452{
453 int i;
454
455 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
456 if (prev_map[i]) {
457 WARN_ON(1);
458 break;
459 }
460 }
461
462 early_ioremap_init();
463}
464
451static int __init check_early_ioremap_leak(void) 465static int __init check_early_ioremap_leak(void)
452{ 466{
453 int count = 0; 467 int count = 0;
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 1a8faf09afed..792854003ed3 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -18,6 +18,7 @@
18#include <asm/e820.h> 18#include <asm/e820.h>
19#include <asm/tlb.h> 19#include <asm/tlb.h>
20#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
21#include <asm/io.h>
21 22
22unsigned int __VMALLOC_RESERVE = 128 << 20; 23unsigned int __VMALLOC_RESERVE = 128 << 20;
23 24
@@ -128,6 +129,7 @@ static int __init parse_reservetop(char *arg)
128 129
129 address = memparse(arg, &arg); 130 address = memparse(arg, &arg);
130 reserve_top_address(address); 131 reserve_top_address(address);
132 fixup_early_ioremap();
131 return 0; 133 return 0;
132} 134}
133early_param("reservetop", parse_reservetop); 135early_param("reservetop", parse_reservetop);
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index c7b1ebfb7da7..31930fd30ea9 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -66,14 +66,44 @@ resource_to_addr(struct acpi_resource *resource,
66 struct acpi_resource_address64 *addr) 66 struct acpi_resource_address64 *addr)
67{ 67{
68 acpi_status status; 68 acpi_status status;
69 69 struct acpi_resource_memory24 *memory24;
70 status = acpi_resource_to_address64(resource, addr); 70 struct acpi_resource_memory32 *memory32;
71 if (ACPI_SUCCESS(status) && 71 struct acpi_resource_fixed_memory32 *fixed_memory32;
72 (addr->resource_type == ACPI_MEMORY_RANGE || 72
73 addr->resource_type == ACPI_IO_RANGE) && 73 memset(addr, 0, sizeof(*addr));
74 addr->address_length > 0 && 74 switch (resource->type) {
75 addr->producer_consumer == ACPI_PRODUCER) { 75 case ACPI_RESOURCE_TYPE_MEMORY24:
76 memory24 = &resource->data.memory24;
77 addr->resource_type = ACPI_MEMORY_RANGE;
78 addr->minimum = memory24->minimum;
79 addr->address_length = memory24->address_length;
80 addr->maximum = addr->minimum + addr->address_length - 1;
81 return AE_OK;
82 case ACPI_RESOURCE_TYPE_MEMORY32:
83 memory32 = &resource->data.memory32;
84 addr->resource_type = ACPI_MEMORY_RANGE;
85 addr->minimum = memory32->minimum;
86 addr->address_length = memory32->address_length;
87 addr->maximum = addr->minimum + addr->address_length - 1;
76 return AE_OK; 88 return AE_OK;
89 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
90 fixed_memory32 = &resource->data.fixed_memory32;
91 addr->resource_type = ACPI_MEMORY_RANGE;
92 addr->minimum = fixed_memory32->address;
93 addr->address_length = fixed_memory32->address_length;
94 addr->maximum = addr->minimum + addr->address_length - 1;
95 return AE_OK;
96 case ACPI_RESOURCE_TYPE_ADDRESS16:
97 case ACPI_RESOURCE_TYPE_ADDRESS32:
98 case ACPI_RESOURCE_TYPE_ADDRESS64:
99 status = acpi_resource_to_address64(resource, addr);
100 if (ACPI_SUCCESS(status) &&
101 (addr->resource_type == ACPI_MEMORY_RANGE ||
102 addr->resource_type == ACPI_IO_RANGE) &&
103 addr->address_length > 0) {
104 return AE_OK;
105 }
106 break;
77 } 107 }
78 return AE_ERROR; 108 return AE_ERROR;
79} 109}
@@ -91,30 +121,6 @@ count_resource(struct acpi_resource *acpi_res, void *data)
91 return AE_OK; 121 return AE_OK;
92} 122}
93 123
94static void
95align_resource(struct acpi_device *bridge, struct resource *res)
96{
97 int align = (res->flags & IORESOURCE_MEM) ? 16 : 4;
98
99 /*
100 * Host bridge windows are not BARs, but the decoders on the PCI side
101 * that claim this address space have starting alignment and length
102 * constraints, so fix any obvious BIOS goofs.
103 */
104 if (!IS_ALIGNED(res->start, align)) {
105 dev_printk(KERN_DEBUG, &bridge->dev,
106 "host bridge window %pR invalid; "
107 "aligning start to %d-byte boundary\n", res, align);
108 res->start &= ~(align - 1);
109 }
110 if (!IS_ALIGNED(res->end + 1, align)) {
111 dev_printk(KERN_DEBUG, &bridge->dev,
112 "host bridge window %pR invalid; "
113 "aligning end to %d-byte boundary\n", res, align);
114 res->end = ALIGN(res->end, align) - 1;
115 }
116}
117
118static acpi_status 124static acpi_status
119setup_resource(struct acpi_resource *acpi_res, void *data) 125setup_resource(struct acpi_resource *acpi_res, void *data)
120{ 126{
@@ -124,7 +130,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
124 acpi_status status; 130 acpi_status status;
125 unsigned long flags; 131 unsigned long flags;
126 struct resource *root, *conflict; 132 struct resource *root, *conflict;
127 u64 start, end, max_len; 133 u64 start, end;
128 134
129 status = resource_to_addr(acpi_res, &addr); 135 status = resource_to_addr(acpi_res, &addr);
130 if (!ACPI_SUCCESS(status)) 136 if (!ACPI_SUCCESS(status))
@@ -141,19 +147,8 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
141 } else 147 } else
142 return AE_OK; 148 return AE_OK;
143 149
144 max_len = addr.maximum - addr.minimum + 1;
145 if (addr.address_length > max_len) {
146 dev_printk(KERN_DEBUG, &info->bridge->dev,
147 "host bridge window length %#llx doesn't fit in "
148 "%#llx-%#llx, trimming\n",
149 (unsigned long long) addr.address_length,
150 (unsigned long long) addr.minimum,
151 (unsigned long long) addr.maximum);
152 addr.address_length = max_len;
153 }
154
155 start = addr.minimum + addr.translation_offset; 150 start = addr.minimum + addr.translation_offset;
156 end = start + addr.address_length - 1; 151 end = addr.maximum + addr.translation_offset;
157 152
158 res = &info->res[info->res_num]; 153 res = &info->res[info->res_num];
159 res->name = info->name; 154 res->name = info->name;
@@ -161,7 +156,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
161 res->start = start; 156 res->start = start;
162 res->end = end; 157 res->end = end;
163 res->child = NULL; 158 res->child = NULL;
164 align_resource(info->bridge, res);
165 159
166 if (!pci_use_crs) { 160 if (!pci_use_crs) {
167 dev_printk(KERN_DEBUG, &info->bridge->dev, 161 dev_printk(KERN_DEBUG, &info->bridge->dev,
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 46fd43f79103..97da2ba9344b 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -72,6 +72,9 @@ pcibios_align_resource(void *data, const struct resource *res,
72 return start; 72 return start;
73 if (start & 0x300) 73 if (start & 0x300)
74 start = (start + 0x3ff) & ~0x3ff; 74 start = (start + 0x3ff) & ~0x3ff;
75 } else if (res->flags & IORESOURCE_MEM) {
76 if (start < BIOS_END)
77 start = BIOS_END;
75 } 78 }
76 return start; 79 return start;
77} 80}