aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/include/asm/e820.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c8
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/apic/probe_64.c7
-rw-r--r--arch/x86/kernel/head_32.S16
-rw-r--r--arch/x86/kernel/hpet.c26
-rw-r--r--arch/x86/kernel/resource.c48
-rw-r--r--arch/x86/kernel/setup.c1
-rw-r--r--arch/x86/kernel/xsave.c3
-rw-r--r--arch/x86/kvm/svm.c4
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c11
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--arch/x86/lguest/boot.c16
-rw-r--r--arch/x86/lguest/i386_head.S105
-rw-r--r--arch/x86/pci/i386.c18
-rw-r--r--arch/x86/vdso/Makefile4
20 files changed, 225 insertions, 64 deletions
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 23f315c9f215..325c05294fc4 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -355,7 +355,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
355 if (heap > 0x3fffffffffffUL) 355 if (heap > 0x3fffffffffffUL)
356 error("Destination address too large"); 356 error("Destination address too large");
357#else 357#else
358 if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) 358 if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
359 error("Destination address too large"); 359 error("Destination address too large");
360#endif 360#endif
361#ifndef CONFIG_RELOCATABLE 361#ifndef CONFIG_RELOCATABLE
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 5be1542fbfaf..e99d55d74df5 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -72,6 +72,9 @@ struct e820map {
72#define BIOS_BEGIN 0x000a0000 72#define BIOS_BEGIN 0x000a0000
73#define BIOS_END 0x00100000 73#define BIOS_END 0x00100000
74 74
75#define BIOS_ROM_BASE 0xffe00000
76#define BIOS_ROM_END 0xffffffff
77
75#ifdef __KERNEL__ 78#ifdef __KERNEL__
76/* see comment in arch/x86/kernel/e820.c */ 79/* see comment in arch/x86/kernel/e820.c */
77extern struct e820map e820; 80extern struct e820map e820;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9e6fe391094e..f702f82aa1eb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,7 +79,7 @@
79#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) 79#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
80#define KVM_MIN_FREE_MMU_PAGES 5 80#define KVM_MIN_FREE_MMU_PAGES 5
81#define KVM_REFILL_PAGES 25 81#define KVM_REFILL_PAGES 25
82#define KVM_MAX_CPUID_ENTRIES 40 82#define KVM_MAX_CPUID_ENTRIES 80
83#define KVM_NR_FIXED_MTRR_REGION 88 83#define KVM_NR_FIXED_MTRR_REGION 88
84#define KVM_NR_VAR_MTRR 8 84#define KVM_NR_VAR_MTRR 8
85 85
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9e13763b6092..1e994754d323 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -45,6 +45,7 @@ obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
46obj-y += tsc.o io_delay.o rtc.o 46obj-y += tsc.o io_delay.o rtc.o
47obj-y += pci-iommu_table.o 47obj-y += pci-iommu_table.o
48obj-y += resource.o
48 49
49obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 50obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
50obj-y += process.o 51obj-y += process.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3f838d537392..78218135b48e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1389,6 +1389,14 @@ void __cpuinit end_local_APIC_setup(void)
1389 1389
1390 setup_apic_nmi_watchdog(NULL); 1390 setup_apic_nmi_watchdog(NULL);
1391 apic_pm_activate(); 1391 apic_pm_activate();
1392
1393 /*
1394 * Now that local APIC setup is completed for BP, configure the fault
1395 * handling for interrupt remapping.
1396 */
1397 if (!smp_processor_id() && intr_remapping_enabled)
1398 enable_drhd_fault_handling();
1399
1392} 1400}
1393 1401
1394#ifdef CONFIG_X86_X2APIC 1402#ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7cc0a721f628..fadcd743a74f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2430,13 +2430,12 @@ static void ack_apic_level(struct irq_data *data)
2430{ 2430{
2431 struct irq_cfg *cfg = data->chip_data; 2431 struct irq_cfg *cfg = data->chip_data;
2432 int i, do_unmask_irq = 0, irq = data->irq; 2432 int i, do_unmask_irq = 0, irq = data->irq;
2433 struct irq_desc *desc = irq_to_desc(irq);
2434 unsigned long v; 2433 unsigned long v;
2435 2434
2436 irq_complete_move(cfg); 2435 irq_complete_move(cfg);
2437#ifdef CONFIG_GENERIC_PENDING_IRQ 2436#ifdef CONFIG_GENERIC_PENDING_IRQ
2438 /* If we are moving the irq we need to mask it */ 2437 /* If we are moving the irq we need to mask it */
2439 if (unlikely(desc->status & IRQ_MOVE_PENDING)) { 2438 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
2440 do_unmask_irq = 1; 2439 do_unmask_irq = 1;
2441 mask_ioapic(cfg); 2440 mask_ioapic(cfg);
2442 } 2441 }
@@ -3413,6 +3412,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3413 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3412 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3414 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3413 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3415 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3414 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3415 msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
3416 3416
3417 dmar_msi_write(irq, &msg); 3417 dmar_msi_write(irq, &msg);
3418 3418
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index f9e4e6a54073..d8c4a6feb286 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -79,13 +79,6 @@ void __init default_setup_apic_routing(void)
79 /* need to update phys_pkg_id */ 79 /* need to update phys_pkg_id */
80 apic->phys_pkg_id = apicid_phys_pkg_id; 80 apic->phys_pkg_id = apicid_phys_pkg_id;
81 } 81 }
82
83 /*
84 * Now that apic routing model is selected, configure the
85 * fault handling for intr remapping.
86 */
87 if (intr_remapping_enabled)
88 enable_drhd_fault_handling();
89} 82}
90 83
91/* Same for both flat and physical. */ 84/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index bcece91dd311..c0dbd9ac24f0 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -60,16 +60,18 @@
60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) 60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
61#endif 61#endif
62 62
63/* Number of possible pages in the lowmem region */
64LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT)
65
63/* Enough space to fit pagetables for the low memory linear map */ 66/* Enough space to fit pagetables for the low memory linear map */
64MAPPING_BEYOND_END = \ 67MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
65 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
66 68
67/* 69/*
68 * Worst-case size of the kernel mapping we need to make: 70 * Worst-case size of the kernel mapping we need to make:
69 * the worst-case size of the kernel itself, plus the extra we need 71 * a relocatable kernel can live anywhere in lowmem, so we need to be able
70 * to map for the linear map. 72 * to map all of lowmem.
71 */ 73 */
72KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT 74KERNEL_PAGES = LOWMEM_PAGES
73 75
74INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm 76INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
75RESERVE_BRK(pagetables, INIT_MAP_SIZE) 77RESERVE_BRK(pagetables, INIT_MAP_SIZE)
@@ -620,13 +622,13 @@ ENTRY(initial_code)
620__PAGE_ALIGNED_BSS 622__PAGE_ALIGNED_BSS
621 .align PAGE_SIZE_asm 623 .align PAGE_SIZE_asm
622#ifdef CONFIG_X86_PAE 624#ifdef CONFIG_X86_PAE
623initial_pg_pmd: 625ENTRY(initial_pg_pmd)
624 .fill 1024*KPMDS,4,0 626 .fill 1024*KPMDS,4,0
625#else 627#else
626ENTRY(initial_page_table) 628ENTRY(initial_page_table)
627 .fill 1024,4,0 629 .fill 1024,4,0
628#endif 630#endif
629initial_pg_fixmap: 631ENTRY(initial_pg_fixmap)
630 .fill 1024,4,0 632 .fill 1024,4,0
631ENTRY(empty_zero_page) 633ENTRY(empty_zero_page)
632 .fill 4096,1,0 634 .fill 4096,1,0
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ae03cab4352e..4ff5968f12d2 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -27,6 +27,9 @@
27#define HPET_DEV_FSB_CAP 0x1000 27#define HPET_DEV_FSB_CAP 0x1000
28#define HPET_DEV_PERI_CAP 0x2000 28#define HPET_DEV_PERI_CAP 0x2000
29 29
30#define HPET_MIN_CYCLES 128
31#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
32
30#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) 33#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
31 34
32/* 35/*
@@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_register(void)
299 /* Calculate the min / max delta */ 302 /* Calculate the min / max delta */
300 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, 303 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
301 &hpet_clockevent); 304 &hpet_clockevent);
302 /* 5 usec minimum reprogramming delta. */ 305 /* Setup minimum reprogramming delta. */
303 hpet_clockevent.min_delta_ns = 5000; 306 hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
307 &hpet_clockevent);
304 308
305 /* 309 /*
306 * Start hpet with the boot cpu mask and make it 310 * Start hpet with the boot cpu mask and make it
@@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long delta,
393 * the wraparound into account) nor a simple count down event 397 * the wraparound into account) nor a simple count down event
394 * mode. Further the write to the comparator register is 398 * mode. Further the write to the comparator register is
395 * delayed internally up to two HPET clock cycles in certain 399 * delayed internally up to two HPET clock cycles in certain
396 * chipsets (ATI, ICH9,10). We worked around that by reading 400 * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even
397 * back the compare register, but that required another 401 * longer delays. We worked around that by reading back the
398 * workaround for ICH9,10 chips where the first readout after 402 * compare register, but that required another workaround for
399 * write can return the old stale value. We already have a 403 * ICH9,10 chips where the first readout after write can
400 * minimum delta of 5us enforced, but a NMI or SMI hitting 404 * return the old stale value. We already had a minimum
405 * programming delta of 5us enforced, but a NMI or SMI hitting
401 * between the counter readout and the comparator write can 406 * between the counter readout and the comparator write can
402 * move us behind that point easily. Now instead of reading 407 * move us behind that point easily. Now instead of reading
403 * the compare register back several times, we make the ETIME 408 * the compare register back several times, we make the ETIME
404 * decision based on the following: Return ETIME if the 409 * decision based on the following: Return ETIME if the
405 * counter value after the write is less than 8 HPET cycles 410 * counter value after the write is less than HPET_MIN_CYCLES
406 * away from the event or if the counter is already ahead of 411 * away from the event or if the counter is already ahead of
407 * the event. 412 * the event. The minimum programming delta for the generic
413 * clockevents code is set to 1.5 * HPET_MIN_CYCLES.
408 */ 414 */
409 res = (s32)(cnt - hpet_readl(HPET_COUNTER)); 415 res = (s32)(cnt - hpet_readl(HPET_COUNTER));
410 416
411 return res < 8 ? -ETIME : 0; 417 return res < HPET_MIN_CYCLES ? -ETIME : 0;
412} 418}
413 419
414static void hpet_legacy_set_mode(enum clock_event_mode mode, 420static void hpet_legacy_set_mode(enum clock_event_mode mode,
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
new file mode 100644
index 000000000000..2a26819bb6a8
--- /dev/null
+++ b/arch/x86/kernel/resource.c
@@ -0,0 +1,48 @@
1#include <linux/ioport.h>
2#include <asm/e820.h>
3
4static void resource_clip(struct resource *res, resource_size_t start,
5 resource_size_t end)
6{
7 resource_size_t low = 0, high = 0;
8
9 if (res->end < start || res->start > end)
10 return; /* no conflict */
11
12 if (res->start < start)
13 low = start - res->start;
14
15 if (res->end > end)
16 high = res->end - end;
17
18 /* Keep the area above or below the conflict, whichever is larger */
19 if (low > high)
20 res->end = start - 1;
21 else
22 res->start = end + 1;
23}
24
25static void remove_e820_regions(struct resource *avail)
26{
27 int i;
28 struct e820entry *entry;
29
30 for (i = 0; i < e820.nr_map; i++) {
31 entry = &e820.map[i];
32
33 resource_clip(avail, entry->addr,
34 entry->addr + entry->size - 1);
35 }
36}
37
38void arch_remove_reservations(struct resource *avail)
39{
40 /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */
41 if (avail->flags & IORESOURCE_MEM) {
42 if (avail->start < BIOS_END)
43 avail->start = BIOS_END;
44 resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
45
46 remove_e820_regions(avail);
47 }
48}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 21c6746338af..85268f8eadf6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -769,7 +769,6 @@ void __init setup_arch(char **cmdline_p)
769 769
770 x86_init.oem.arch_setup(); 770 x86_init.oem.arch_setup();
771 771
772 resource_alloc_from_bottom = 0;
773 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; 772 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
774 setup_memory_map(); 773 setup_memory_map();
775 parse_setup_data(); 774 parse_setup_data();
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 9c253bd65e24..547128546cc3 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -394,7 +394,8 @@ static void __init setup_xstate_init(void)
394 * Setup init_xstate_buf to represent the init state of 394 * Setup init_xstate_buf to represent the init state of
395 * all the features managed by the xsave 395 * all the features managed by the xsave
396 */ 396 */
397 init_xstate_buf = alloc_bootmem(xstate_size); 397 init_xstate_buf = alloc_bootmem_align(xstate_size,
398 __alignof__(struct xsave_struct));
398 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; 399 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
399 400
400 clts(); 401 clts();
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1ca12298ffc7..b81a9b7c2ca4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3494,6 +3494,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3494static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 3494static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3495{ 3495{
3496 switch (func) { 3496 switch (func) {
3497 case 0x00000001:
3498 /* Mask out xsave bit as long as it is not supported by SVM */
3499 entry->ecx &= ~(bit(X86_FEATURE_XSAVE));
3500 break;
3497 case 0x80000001: 3501 case 0x80000001:
3498 if (nested) 3502 if (nested)
3499 entry->ecx |= (1 << 2); /* Set SVM bit */ 3503 entry->ecx |= (1 << 2); /* Set SVM bit */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ff21fdda0c53..81fcbe9515c5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4227,11 +4227,6 @@ static int vmx_get_lpage_level(void)
4227 return PT_PDPE_LEVEL; 4227 return PT_PDPE_LEVEL;
4228} 4228}
4229 4229
4230static inline u32 bit(int bitno)
4231{
4232 return 1 << (bitno & 31);
4233}
4234
4235static void vmx_cpuid_update(struct kvm_vcpu *vcpu) 4230static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
4236{ 4231{
4237 struct kvm_cpuid_entry2 *best; 4232 struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cdac9e592aa5..b989e1f1e5d3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -155,11 +155,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
155 155
156u64 __read_mostly host_xcr0; 156u64 __read_mostly host_xcr0;
157 157
158static inline u32 bit(int bitno)
159{
160 return 1 << (bitno & 31);
161}
162
163static void kvm_on_user_return(struct user_return_notifier *urn) 158static void kvm_on_user_return(struct user_return_notifier *urn)
164{ 159{
165 unsigned slot; 160 unsigned slot;
@@ -4569,9 +4564,11 @@ static void kvm_timer_init(void)
4569#ifdef CONFIG_CPU_FREQ 4564#ifdef CONFIG_CPU_FREQ
4570 struct cpufreq_policy policy; 4565 struct cpufreq_policy policy;
4571 memset(&policy, 0, sizeof(policy)); 4566 memset(&policy, 0, sizeof(policy));
4572 cpufreq_get_policy(&policy, get_cpu()); 4567 cpu = get_cpu();
4568 cpufreq_get_policy(&policy, cpu);
4573 if (policy.cpuinfo.max_freq) 4569 if (policy.cpuinfo.max_freq)
4574 max_tsc_khz = policy.cpuinfo.max_freq; 4570 max_tsc_khz = policy.cpuinfo.max_freq;
4571 put_cpu();
4575#endif 4572#endif
4576 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, 4573 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4577 CPUFREQ_TRANSITION_NOTIFIER); 4574 CPUFREQ_TRANSITION_NOTIFIER);
@@ -5522,6 +5519,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5522 5519
5523 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; 5520 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
5524 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5521 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
5522 if (sregs->cr4 & X86_CR4_OSXSAVE)
5523 update_cpuid(vcpu);
5525 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5524 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5526 load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); 5525 load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
5527 mmu_reset_needed = 1; 5526 mmu_reset_needed = 1;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2cea414489f3..c600da830ce0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -70,6 +70,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
70 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 70 return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
71} 71}
72 72
73static inline u32 bit(int bitno)
74{
75 return 1 << (bitno & 31);
76}
77
73void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 78void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
74void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 79void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
75int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); 80int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 73b1e1a1f489..4996cf5f73a0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -531,7 +531,10 @@ static void lguest_write_cr3(unsigned long cr3)
531{ 531{
532 lguest_data.pgdir = cr3; 532 lguest_data.pgdir = cr3;
533 lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); 533 lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
534 cr3_changed = true; 534
535 /* These two page tables are simple, linear, and used during boot */
536 if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
537 cr3_changed = true;
535} 538}
536 539
537static unsigned long lguest_read_cr3(void) 540static unsigned long lguest_read_cr3(void)
@@ -703,9 +706,9 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
703 * to forget all of them. Fortunately, this is very rare. 706 * to forget all of them. Fortunately, this is very rare.
704 * 707 *
705 * ... except in early boot when the kernel sets up the initial pagetables, 708 * ... except in early boot when the kernel sets up the initial pagetables,
706 * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell 709 * which makes booting astonishingly slow: 48 seconds! So we don't even tell
707 * the Host anything changed until we've done the first page table switch, 710 * the Host anything changed until we've done the first real page table switch,
708 * which brings boot back to 0.25 seconds. 711 * which brings boot back to 4.3 seconds.
709 */ 712 */
710static void lguest_set_pte(pte_t *ptep, pte_t pteval) 713static void lguest_set_pte(pte_t *ptep, pte_t pteval)
711{ 714{
@@ -1002,7 +1005,7 @@ static void lguest_time_init(void)
1002 clockevents_register_device(&lguest_clockevent); 1005 clockevents_register_device(&lguest_clockevent);
1003 1006
1004 /* Finally, we unblock the timer interrupt. */ 1007 /* Finally, we unblock the timer interrupt. */
1005 enable_lguest_irq(0); 1008 clear_bit(0, lguest_data.blocked_interrupts);
1006} 1009}
1007 1010
1008/* 1011/*
@@ -1349,9 +1352,6 @@ __init void lguest_init(void)
1349 */ 1352 */
1350 switch_to_new_gdt(0); 1353 switch_to_new_gdt(0);
1351 1354
1352 /* We actually boot with all memory mapped, but let's say 128MB. */
1353 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
1354
1355 /* 1355 /*
1356 * The Host<->Guest Switcher lives at the top of our address space, and 1356 * The Host<->Guest Switcher lives at the top of our address space, and
1357 * the Host told us how big it is when we made LGUEST_INIT hypercall: 1357 * the Host told us how big it is when we made LGUEST_INIT hypercall:
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 4f420c2f2d55..e7d5382ef263 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -4,6 +4,7 @@
4#include <asm/asm-offsets.h> 4#include <asm/asm-offsets.h>
5#include <asm/thread_info.h> 5#include <asm/thread_info.h>
6#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7#include <asm/pgtable.h>
7 8
8/*G:020 9/*G:020
9 * Our story starts with the kernel booting into startup_32 in 10 * Our story starts with the kernel booting into startup_32 in
@@ -37,9 +38,113 @@ ENTRY(lguest_entry)
37 /* Set up the initial stack so we can run C code. */ 38 /* Set up the initial stack so we can run C code. */
38 movl $(init_thread_union+THREAD_SIZE),%esp 39 movl $(init_thread_union+THREAD_SIZE),%esp
39 40
41 call init_pagetables
42
40 /* Jumps are relative: we're running __PAGE_OFFSET too low. */ 43 /* Jumps are relative: we're running __PAGE_OFFSET too low. */
41 jmp lguest_init+__PAGE_OFFSET 44 jmp lguest_init+__PAGE_OFFSET
42 45
46/*
47 * Initialize page tables. This creates a PDE and a set of page
48 * tables, which are located immediately beyond __brk_base. The variable
49 * _brk_end is set up to point to the first "safe" location.
50 * Mappings are created both at virtual address 0 (identity mapping)
51 * and PAGE_OFFSET for up to _end.
52 *
53 * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they
54 * don't have a stack at this point, so we can't just use call and ret.
55 */
56init_pagetables:
57#if PTRS_PER_PMD > 1
58#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
59#else
60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
61#endif
62#define pa(X) ((X) - __PAGE_OFFSET)
63
64/* Enough space to fit pagetables for the low memory linear map */
65MAPPING_BEYOND_END = \
66 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
67#ifdef CONFIG_X86_PAE
68
69 /*
70 * In PAE mode initial_page_table is statically defined to contain
71 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
72 * entries). The identity mapping is handled by pointing two PGD entries
73 * to the first kernel PMD.
74 *
75 * Note the upper half of each PMD or PTE are always zero at this stage.
76 */
77
78#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
79
80 xorl %ebx,%ebx /* %ebx is kept at zero */
81
82 movl $pa(__brk_base), %edi
83 movl $pa(initial_pg_pmd), %edx
84 movl $PTE_IDENT_ATTR, %eax
8510:
86 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
87 movl %ecx,(%edx) /* Store PMD entry */
88 /* Upper half already zero */
89 addl $8,%edx
90 movl $512,%ecx
9111:
92 stosl
93 xchgl %eax,%ebx
94 stosl
95 xchgl %eax,%ebx
96 addl $0x1000,%eax
97 loop 11b
98
99 /*
100 * End condition: we must map up to the end + MAPPING_BEYOND_END.
101 */
102 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
103 cmpl %ebp,%eax
104 jb 10b
1051:
106 addl $__PAGE_OFFSET, %edi
107 movl %edi, pa(_brk_end)
108 shrl $12, %eax
109 movl %eax, pa(max_pfn_mapped)
110
111 /* Do early initialization of the fixmap area */
112 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
113 movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
114#else /* Not PAE */
115
116page_pde_offset = (__PAGE_OFFSET >> 20);
117
118 movl $pa(__brk_base), %edi
119 movl $pa(initial_page_table), %edx
120 movl $PTE_IDENT_ATTR, %eax
12110:
122 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
123 movl %ecx,(%edx) /* Store identity PDE entry */
124 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
125 addl $4,%edx
126 movl $1024, %ecx
12711:
128 stosl
129 addl $0x1000,%eax
130 loop 11b
131 /*
132 * End condition: we must map up to the end + MAPPING_BEYOND_END.
133 */
134 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
135 cmpl %ebp,%eax
136 jb 10b
137 addl $__PAGE_OFFSET, %edi
138 movl %edi, pa(_brk_end)
139 shrl $12, %eax
140 movl %eax, pa(max_pfn_mapped)
141
142 /* Do early initialization of the fixmap area */
143 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
144 movl %eax,pa(initial_page_table+0xffc)
145#endif
146 ret
147
43/*G:055 148/*G:055
44 * We create a macro which puts the assembler code between lgstart_ and lgend_ 149 * We create a macro which puts the assembler code between lgstart_ and lgend_
45 * markers. These templates are put in the .text section: they can't be 150 * markers. These templates are put in the .text section: they can't be
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index c4bb261c106e..b1805b78842f 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -65,21 +65,13 @@ pcibios_align_resource(void *data, const struct resource *res,
65 resource_size_t size, resource_size_t align) 65 resource_size_t size, resource_size_t align)
66{ 66{
67 struct pci_dev *dev = data; 67 struct pci_dev *dev = data;
68 resource_size_t start = round_down(res->end - size + 1, align); 68 resource_size_t start = res->start;
69 69
70 if (res->flags & IORESOURCE_IO) { 70 if (res->flags & IORESOURCE_IO) {
71 71 if (skip_isa_ioresource_align(dev))
72 /* 72 return start;
73 * If we're avoiding ISA aliases, the largest contiguous I/O 73 if (start & 0x300)
74 * port space is 256 bytes. Clearing bits 9 and 10 preserves 74 start = (start + 0x3ff) & ~0x3ff;
75 * all 256-byte and smaller alignments, so the result will
76 * still be correctly aligned.
77 */
78 if (!skip_isa_ioresource_align(dev))
79 start &= ~0x300;
80 } else if (res->flags & IORESOURCE_MEM) {
81 if (start < BIOS_END)
82 start = res->end; /* fail; no space */
83 } 75 }
84 return start; 76 return start;
85} 77}
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 4a2afa1bac51..b6552b189bcd 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -25,7 +25,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
25 25
26export CPPFLAGS_vdso.lds += -P -C 26export CPPFLAGS_vdso.lds += -P -C
27 27
28VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \ 28VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
29 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 29 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
30 30
31$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so 31$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
@@ -69,7 +69,7 @@ vdso32.so-$(VDSO32-y) += sysenter
69vdso32-images = $(vdso32.so-y:%=vdso32-%.so) 69vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
70 70
71CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 71CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
72VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 72VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1
73 73
74# This makes sure the $(obj) subdirectory exists even though vdso32/ 74# This makes sure the $(obj) subdirectory exists even though vdso32/
75# is not a kbuild sub-make subdirectory. 75# is not a kbuild sub-make subdirectory.