aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c1
-rw-r--r--arch/x86/include/asm/e820.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c8
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/apic/probe_64.c7
-rw-r--r--arch/x86/kernel/head_32.S16
-rw-r--r--arch/x86/kernel/hpet.c26
-rw-r--r--arch/x86/kernel/microcode_intel.c16
-rw-r--r--arch/x86/kernel/pvclock.c5
-rw-r--r--arch/x86/kernel/resource.c48
-rw-r--r--arch/x86/kernel/setup.c18
-rw-r--r--arch/x86/kernel/xsave.c3
-rw-r--r--arch/x86/kvm/svm.c4
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c11
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--arch/x86/lguest/boot.c16
-rw-r--r--arch/x86/lguest/i386_head.S105
-rw-r--r--arch/x86/pci/i386.c18
-rw-r--r--arch/x86/pci/xen.c27
-rw-r--r--arch/x86/vdso/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu.c2
-rw-r--r--arch/x86/xen/platform-pci-unplug.c2
-rw-r--r--arch/x86/xen/setup.c41
-rw-r--r--arch/x86/xen/suspend.c1
-rw-r--r--arch/x86/xen/time.c2
-rw-r--r--arch/x86/xen/xen-ops.h2
32 files changed, 290 insertions, 120 deletions
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 23f315c9f215..325c05294fc4 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -355,7 +355,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
355 if (heap > 0x3fffffffffffUL) 355 if (heap > 0x3fffffffffffUL)
356 error("Destination address too large"); 356 error("Destination address too large");
357#else 357#else
358 if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) 358 if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
359 error("Destination address too large"); 359 error("Destination address too large");
360#endif 360#endif
361#ifndef CONFIG_RELOCATABLE 361#ifndef CONFIG_RELOCATABLE
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index cbcc8d8ea93a..7a6e68e4f748 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -10,6 +10,7 @@
10 * by the Free Software Foundation. 10 * by the Free Software Foundation.
11 */ 11 */
12 12
13#include <linux/err.h>
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/init.h> 15#include <linux/init.h>
15#include <linux/kernel.h> 16#include <linux/kernel.h>
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 5be1542fbfaf..e99d55d74df5 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -72,6 +72,9 @@ struct e820map {
72#define BIOS_BEGIN 0x000a0000 72#define BIOS_BEGIN 0x000a0000
73#define BIOS_END 0x00100000 73#define BIOS_END 0x00100000
74 74
75#define BIOS_ROM_BASE 0xffe00000
76#define BIOS_ROM_END 0xffffffff
77
75#ifdef __KERNEL__ 78#ifdef __KERNEL__
76/* see comment in arch/x86/kernel/e820.c */ 79/* see comment in arch/x86/kernel/e820.c */
77extern struct e820map e820; 80extern struct e820map e820;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9e6fe391094e..f702f82aa1eb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,7 +79,7 @@
79#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) 79#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
80#define KVM_MIN_FREE_MMU_PAGES 5 80#define KVM_MIN_FREE_MMU_PAGES 5
81#define KVM_REFILL_PAGES 25 81#define KVM_REFILL_PAGES 25
82#define KVM_MAX_CPUID_ENTRIES 40 82#define KVM_MAX_CPUID_ENTRIES 80
83#define KVM_NR_FIXED_MTRR_REGION 88 83#define KVM_NR_FIXED_MTRR_REGION 88
84#define KVM_NR_VAR_MTRR 8 84#define KVM_NR_VAR_MTRR 8
85 85
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7f7e577a0e39..31d84acc1512 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -11,6 +11,7 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
11void pvclock_read_wallclock(struct pvclock_wall_clock *wall, 11void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
12 struct pvclock_vcpu_time_info *vcpu, 12 struct pvclock_vcpu_time_info *vcpu,
13 struct timespec *ts); 13 struct timespec *ts);
14void pvclock_resume(void);
14 15
15/* 16/*
16 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 17 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9e13763b6092..1e994754d323 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -45,6 +45,7 @@ obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
46obj-y += tsc.o io_delay.o rtc.o 46obj-y += tsc.o io_delay.o rtc.o
47obj-y += pci-iommu_table.o 47obj-y += pci-iommu_table.o
48obj-y += resource.o
48 49
49obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 50obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
50obj-y += process.o 51obj-y += process.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3f838d537392..78218135b48e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1389,6 +1389,14 @@ void __cpuinit end_local_APIC_setup(void)
1389 1389
1390 setup_apic_nmi_watchdog(NULL); 1390 setup_apic_nmi_watchdog(NULL);
1391 apic_pm_activate(); 1391 apic_pm_activate();
1392
1393 /*
1394 * Now that local APIC setup is completed for BP, configure the fault
1395 * handling for interrupt remapping.
1396 */
1397 if (!smp_processor_id() && intr_remapping_enabled)
1398 enable_drhd_fault_handling();
1399
1392} 1400}
1393 1401
1394#ifdef CONFIG_X86_X2APIC 1402#ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7cc0a721f628..fadcd743a74f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2430,13 +2430,12 @@ static void ack_apic_level(struct irq_data *data)
2430{ 2430{
2431 struct irq_cfg *cfg = data->chip_data; 2431 struct irq_cfg *cfg = data->chip_data;
2432 int i, do_unmask_irq = 0, irq = data->irq; 2432 int i, do_unmask_irq = 0, irq = data->irq;
2433 struct irq_desc *desc = irq_to_desc(irq);
2434 unsigned long v; 2433 unsigned long v;
2435 2434
2436 irq_complete_move(cfg); 2435 irq_complete_move(cfg);
2437#ifdef CONFIG_GENERIC_PENDING_IRQ 2436#ifdef CONFIG_GENERIC_PENDING_IRQ
2438 /* If we are moving the irq we need to mask it */ 2437 /* If we are moving the irq we need to mask it */
2439 if (unlikely(desc->status & IRQ_MOVE_PENDING)) { 2438 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
2440 do_unmask_irq = 1; 2439 do_unmask_irq = 1;
2441 mask_ioapic(cfg); 2440 mask_ioapic(cfg);
2442 } 2441 }
@@ -3413,6 +3412,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3413 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3412 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3414 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3413 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3415 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3414 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3415 msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
3416 3416
3417 dmar_msi_write(irq, &msg); 3417 dmar_msi_write(irq, &msg);
3418 3418
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index f9e4e6a54073..d8c4a6feb286 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -79,13 +79,6 @@ void __init default_setup_apic_routing(void)
79 /* need to update phys_pkg_id */ 79 /* need to update phys_pkg_id */
80 apic->phys_pkg_id = apicid_phys_pkg_id; 80 apic->phys_pkg_id = apicid_phys_pkg_id;
81 } 81 }
82
83 /*
84 * Now that apic routing model is selected, configure the
85 * fault handling for intr remapping.
86 */
87 if (intr_remapping_enabled)
88 enable_drhd_fault_handling();
89} 82}
90 83
91/* Same for both flat and physical. */ 84/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index bcece91dd311..c0dbd9ac24f0 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -60,16 +60,18 @@
60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) 60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
61#endif 61#endif
62 62
63/* Number of possible pages in the lowmem region */
64LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT)
65
63/* Enough space to fit pagetables for the low memory linear map */ 66/* Enough space to fit pagetables for the low memory linear map */
64MAPPING_BEYOND_END = \ 67MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
65 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
66 68
67/* 69/*
68 * Worst-case size of the kernel mapping we need to make: 70 * Worst-case size of the kernel mapping we need to make:
69 * the worst-case size of the kernel itself, plus the extra we need 71 * a relocatable kernel can live anywhere in lowmem, so we need to be able
70 * to map for the linear map. 72 * to map all of lowmem.
71 */ 73 */
72KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT 74KERNEL_PAGES = LOWMEM_PAGES
73 75
74INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm 76INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
75RESERVE_BRK(pagetables, INIT_MAP_SIZE) 77RESERVE_BRK(pagetables, INIT_MAP_SIZE)
@@ -620,13 +622,13 @@ ENTRY(initial_code)
620__PAGE_ALIGNED_BSS 622__PAGE_ALIGNED_BSS
621 .align PAGE_SIZE_asm 623 .align PAGE_SIZE_asm
622#ifdef CONFIG_X86_PAE 624#ifdef CONFIG_X86_PAE
623initial_pg_pmd: 625ENTRY(initial_pg_pmd)
624 .fill 1024*KPMDS,4,0 626 .fill 1024*KPMDS,4,0
625#else 627#else
626ENTRY(initial_page_table) 628ENTRY(initial_page_table)
627 .fill 1024,4,0 629 .fill 1024,4,0
628#endif 630#endif
629initial_pg_fixmap: 631ENTRY(initial_pg_fixmap)
630 .fill 1024,4,0 632 .fill 1024,4,0
631ENTRY(empty_zero_page) 633ENTRY(empty_zero_page)
632 .fill 4096,1,0 634 .fill 4096,1,0
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ae03cab4352e..4ff5968f12d2 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -27,6 +27,9 @@
27#define HPET_DEV_FSB_CAP 0x1000 27#define HPET_DEV_FSB_CAP 0x1000
28#define HPET_DEV_PERI_CAP 0x2000 28#define HPET_DEV_PERI_CAP 0x2000
29 29
30#define HPET_MIN_CYCLES 128
31#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
32
30#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) 33#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
31 34
32/* 35/*
@@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_register(void)
299 /* Calculate the min / max delta */ 302 /* Calculate the min / max delta */
300 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, 303 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
301 &hpet_clockevent); 304 &hpet_clockevent);
302 /* 5 usec minimum reprogramming delta. */ 305 /* Setup minimum reprogramming delta. */
303 hpet_clockevent.min_delta_ns = 5000; 306 hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
307 &hpet_clockevent);
304 308
305 /* 309 /*
306 * Start hpet with the boot cpu mask and make it 310 * Start hpet with the boot cpu mask and make it
@@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long delta,
393 * the wraparound into account) nor a simple count down event 397 * the wraparound into account) nor a simple count down event
394 * mode. Further the write to the comparator register is 398 * mode. Further the write to the comparator register is
395 * delayed internally up to two HPET clock cycles in certain 399 * delayed internally up to two HPET clock cycles in certain
396 * chipsets (ATI, ICH9,10). We worked around that by reading 400 * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even
397 * back the compare register, but that required another 401 * longer delays. We worked around that by reading back the
398 * workaround for ICH9,10 chips where the first readout after 402 * compare register, but that required another workaround for
399 * write can return the old stale value. We already have a 403 * ICH9,10 chips where the first readout after write can
400 * minimum delta of 5us enforced, but a NMI or SMI hitting 404 * return the old stale value. We already had a minimum
405 * programming delta of 5us enforced, but a NMI or SMI hitting
401 * between the counter readout and the comparator write can 406 * between the counter readout and the comparator write can
402 * move us behind that point easily. Now instead of reading 407 * move us behind that point easily. Now instead of reading
403 * the compare register back several times, we make the ETIME 408 * the compare register back several times, we make the ETIME
404 * decision based on the following: Return ETIME if the 409 * decision based on the following: Return ETIME if the
405 * counter value after the write is less than 8 HPET cycles 410 * counter value after the write is less than HPET_MIN_CYCLES
406 * away from the event or if the counter is already ahead of 411 * away from the event or if the counter is already ahead of
407 * the event. 412 * the event. The minimum programming delta for the generic
413 * clockevents code is set to 1.5 * HPET_MIN_CYCLES.
408 */ 414 */
409 res = (s32)(cnt - hpet_readl(HPET_COUNTER)); 415 res = (s32)(cnt - hpet_readl(HPET_COUNTER));
410 416
411 return res < 8 ? -ETIME : 0; 417 return res < HPET_MIN_CYCLES ? -ETIME : 0;
412} 418}
413 419
414static void hpet_legacy_set_mode(enum clock_event_mode mode, 420static void hpet_legacy_set_mode(enum clock_event_mode mode,
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index dcb65cc0a053..1a1b606d3e92 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -364,8 +364,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
364 364
365 /* For performance reasons, reuse mc area when possible */ 365 /* For performance reasons, reuse mc area when possible */
366 if (!mc || mc_size > curr_mc_size) { 366 if (!mc || mc_size > curr_mc_size) {
367 if (mc) 367 vfree(mc);
368 vfree(mc);
369 mc = vmalloc(mc_size); 368 mc = vmalloc(mc_size);
370 if (!mc) 369 if (!mc)
371 break; 370 break;
@@ -374,13 +373,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
374 373
375 if (get_ucode_data(mc, ucode_ptr, mc_size) || 374 if (get_ucode_data(mc, ucode_ptr, mc_size) ||
376 microcode_sanity_check(mc) < 0) { 375 microcode_sanity_check(mc) < 0) {
377 vfree(mc);
378 break; 376 break;
379 } 377 }
380 378
381 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { 379 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
382 if (new_mc) 380 vfree(new_mc);
383 vfree(new_mc);
384 new_rev = mc_header.rev; 381 new_rev = mc_header.rev;
385 new_mc = mc; 382 new_mc = mc;
386 mc = NULL; /* trigger new vmalloc */ 383 mc = NULL; /* trigger new vmalloc */
@@ -390,12 +387,10 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
390 leftover -= mc_size; 387 leftover -= mc_size;
391 } 388 }
392 389
393 if (mc) 390 vfree(mc);
394 vfree(mc);
395 391
396 if (leftover) { 392 if (leftover) {
397 if (new_mc) 393 vfree(new_mc);
398 vfree(new_mc);
399 state = UCODE_ERROR; 394 state = UCODE_ERROR;
400 goto out; 395 goto out;
401 } 396 }
@@ -405,8 +400,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
405 goto out; 400 goto out;
406 } 401 }
407 402
408 if (uci->mc) 403 vfree(uci->mc);
409 vfree(uci->mc);
410 uci->mc = (struct microcode_intel *)new_mc; 404 uci->mc = (struct microcode_intel *)new_mc;
411 405
412 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", 406 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 008b91eefa18..42eb3300dfc6 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -83,6 +83,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
83 83
84static atomic64_t last_value = ATOMIC64_INIT(0); 84static atomic64_t last_value = ATOMIC64_INIT(0);
85 85
86void pvclock_resume(void)
87{
88 atomic64_set(&last_value, 0);
89}
90
86cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 91cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
87{ 92{
88 struct pvclock_shadow_time shadow; 93 struct pvclock_shadow_time shadow;
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
new file mode 100644
index 000000000000..2a26819bb6a8
--- /dev/null
+++ b/arch/x86/kernel/resource.c
@@ -0,0 +1,48 @@
1#include <linux/ioport.h>
2#include <asm/e820.h>
3
4static void resource_clip(struct resource *res, resource_size_t start,
5 resource_size_t end)
6{
7 resource_size_t low = 0, high = 0;
8
9 if (res->end < start || res->start > end)
10 return; /* no conflict */
11
12 if (res->start < start)
13 low = start - res->start;
14
15 if (res->end > end)
16 high = res->end - end;
17
18 /* Keep the area above or below the conflict, whichever is larger */
19 if (low > high)
20 res->end = start - 1;
21 else
22 res->start = end + 1;
23}
24
25static void remove_e820_regions(struct resource *avail)
26{
27 int i;
28 struct e820entry *entry;
29
30 for (i = 0; i < e820.nr_map; i++) {
31 entry = &e820.map[i];
32
33 resource_clip(avail, entry->addr,
34 entry->addr + entry->size - 1);
35 }
36}
37
38void arch_remove_reservations(struct resource *avail)
39{
40 /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */
41 if (avail->flags & IORESOURCE_MEM) {
42 if (avail->start < BIOS_END)
43 avail->start = BIOS_END;
44 resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
45
46 remove_e820_regions(avail);
47 }
48}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 21c6746338af..a0f52af256a0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -501,7 +501,18 @@ static inline unsigned long long get_total_mem(void)
501 return total << PAGE_SHIFT; 501 return total << PAGE_SHIFT;
502} 502}
503 503
504#define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF 504/*
505 * Keep the crash kernel below this limit. On 32 bits earlier kernels
506 * would limit the kernel to the low 512 MiB due to mapping restrictions.
507 * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
508 * limit once kexec-tools are fixed.
509 */
510#ifdef CONFIG_X86_32
511# define CRASH_KERNEL_ADDR_MAX (512 << 20)
512#else
513# define CRASH_KERNEL_ADDR_MAX (896 << 20)
514#endif
515
505static void __init reserve_crashkernel(void) 516static void __init reserve_crashkernel(void)
506{ 517{
507 unsigned long long total_mem; 518 unsigned long long total_mem;
@@ -520,10 +531,10 @@ static void __init reserve_crashkernel(void)
520 const unsigned long long alignment = 16<<20; /* 16M */ 531 const unsigned long long alignment = 16<<20; /* 16M */
521 532
522 /* 533 /*
523 * kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX 534 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
524 */ 535 */
525 crash_base = memblock_find_in_range(alignment, 536 crash_base = memblock_find_in_range(alignment,
526 DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment); 537 CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
527 538
528 if (crash_base == MEMBLOCK_ERROR) { 539 if (crash_base == MEMBLOCK_ERROR) {
529 pr_info("crashkernel reservation failed - No suitable area found.\n"); 540 pr_info("crashkernel reservation failed - No suitable area found.\n");
@@ -769,7 +780,6 @@ void __init setup_arch(char **cmdline_p)
769 780
770 x86_init.oem.arch_setup(); 781 x86_init.oem.arch_setup();
771 782
772 resource_alloc_from_bottom = 0;
773 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; 783 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
774 setup_memory_map(); 784 setup_memory_map();
775 parse_setup_data(); 785 parse_setup_data();
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 9c253bd65e24..547128546cc3 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -394,7 +394,8 @@ static void __init setup_xstate_init(void)
394 * Setup init_xstate_buf to represent the init state of 394 * Setup init_xstate_buf to represent the init state of
395 * all the features managed by the xsave 395 * all the features managed by the xsave
396 */ 396 */
397 init_xstate_buf = alloc_bootmem(xstate_size); 397 init_xstate_buf = alloc_bootmem_align(xstate_size,
398 __alignof__(struct xsave_struct));
398 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; 399 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
399 400
400 clts(); 401 clts();
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1ca12298ffc7..b81a9b7c2ca4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3494,6 +3494,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3494static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 3494static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3495{ 3495{
3496 switch (func) { 3496 switch (func) {
3497 case 0x00000001:
3498 /* Mask out xsave bit as long as it is not supported by SVM */
3499 entry->ecx &= ~(bit(X86_FEATURE_XSAVE));
3500 break;
3497 case 0x80000001: 3501 case 0x80000001:
3498 if (nested) 3502 if (nested)
3499 entry->ecx |= (1 << 2); /* Set SVM bit */ 3503 entry->ecx |= (1 << 2); /* Set SVM bit */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ff21fdda0c53..81fcbe9515c5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4227,11 +4227,6 @@ static int vmx_get_lpage_level(void)
4227 return PT_PDPE_LEVEL; 4227 return PT_PDPE_LEVEL;
4228} 4228}
4229 4229
4230static inline u32 bit(int bitno)
4231{
4232 return 1 << (bitno & 31);
4233}
4234
4235static void vmx_cpuid_update(struct kvm_vcpu *vcpu) 4230static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
4236{ 4231{
4237 struct kvm_cpuid_entry2 *best; 4232 struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cdac9e592aa5..b989e1f1e5d3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -155,11 +155,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
155 155
156u64 __read_mostly host_xcr0; 156u64 __read_mostly host_xcr0;
157 157
158static inline u32 bit(int bitno)
159{
160 return 1 << (bitno & 31);
161}
162
163static void kvm_on_user_return(struct user_return_notifier *urn) 158static void kvm_on_user_return(struct user_return_notifier *urn)
164{ 159{
165 unsigned slot; 160 unsigned slot;
@@ -4569,9 +4564,11 @@ static void kvm_timer_init(void)
4569#ifdef CONFIG_CPU_FREQ 4564#ifdef CONFIG_CPU_FREQ
4570 struct cpufreq_policy policy; 4565 struct cpufreq_policy policy;
4571 memset(&policy, 0, sizeof(policy)); 4566 memset(&policy, 0, sizeof(policy));
4572 cpufreq_get_policy(&policy, get_cpu()); 4567 cpu = get_cpu();
4568 cpufreq_get_policy(&policy, cpu);
4573 if (policy.cpuinfo.max_freq) 4569 if (policy.cpuinfo.max_freq)
4574 max_tsc_khz = policy.cpuinfo.max_freq; 4570 max_tsc_khz = policy.cpuinfo.max_freq;
4571 put_cpu();
4575#endif 4572#endif
4576 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, 4573 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4577 CPUFREQ_TRANSITION_NOTIFIER); 4574 CPUFREQ_TRANSITION_NOTIFIER);
@@ -5522,6 +5519,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5522 5519
5523 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; 5520 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
5524 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5521 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
5522 if (sregs->cr4 & X86_CR4_OSXSAVE)
5523 update_cpuid(vcpu);
5525 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5524 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5526 load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); 5525 load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
5527 mmu_reset_needed = 1; 5526 mmu_reset_needed = 1;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2cea414489f3..c600da830ce0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -70,6 +70,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
70 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 70 return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
71} 71}
72 72
73static inline u32 bit(int bitno)
74{
75 return 1 << (bitno & 31);
76}
77
73void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 78void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
74void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 79void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
75int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); 80int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 73b1e1a1f489..4996cf5f73a0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -531,7 +531,10 @@ static void lguest_write_cr3(unsigned long cr3)
531{ 531{
532 lguest_data.pgdir = cr3; 532 lguest_data.pgdir = cr3;
533 lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); 533 lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
534 cr3_changed = true; 534
535 /* These two page tables are simple, linear, and used during boot */
536 if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
537 cr3_changed = true;
535} 538}
536 539
537static unsigned long lguest_read_cr3(void) 540static unsigned long lguest_read_cr3(void)
@@ -703,9 +706,9 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
703 * to forget all of them. Fortunately, this is very rare. 706 * to forget all of them. Fortunately, this is very rare.
704 * 707 *
705 * ... except in early boot when the kernel sets up the initial pagetables, 708 * ... except in early boot when the kernel sets up the initial pagetables,
706 * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell 709 * which makes booting astonishingly slow: 48 seconds! So we don't even tell
707 * the Host anything changed until we've done the first page table switch, 710 * the Host anything changed until we've done the first real page table switch,
708 * which brings boot back to 0.25 seconds. 711 * which brings boot back to 4.3 seconds.
709 */ 712 */
710static void lguest_set_pte(pte_t *ptep, pte_t pteval) 713static void lguest_set_pte(pte_t *ptep, pte_t pteval)
711{ 714{
@@ -1002,7 +1005,7 @@ static void lguest_time_init(void)
1002 clockevents_register_device(&lguest_clockevent); 1005 clockevents_register_device(&lguest_clockevent);
1003 1006
1004 /* Finally, we unblock the timer interrupt. */ 1007 /* Finally, we unblock the timer interrupt. */
1005 enable_lguest_irq(0); 1008 clear_bit(0, lguest_data.blocked_interrupts);
1006} 1009}
1007 1010
1008/* 1011/*
@@ -1349,9 +1352,6 @@ __init void lguest_init(void)
1349 */ 1352 */
1350 switch_to_new_gdt(0); 1353 switch_to_new_gdt(0);
1351 1354
1352 /* We actually boot with all memory mapped, but let's say 128MB. */
1353 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
1354
1355 /* 1355 /*
1356 * The Host<->Guest Switcher lives at the top of our address space, and 1356 * The Host<->Guest Switcher lives at the top of our address space, and
1357 * the Host told us how big it is when we made LGUEST_INIT hypercall: 1357 * the Host told us how big it is when we made LGUEST_INIT hypercall:
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 4f420c2f2d55..e7d5382ef263 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -4,6 +4,7 @@
4#include <asm/asm-offsets.h> 4#include <asm/asm-offsets.h>
5#include <asm/thread_info.h> 5#include <asm/thread_info.h>
6#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7#include <asm/pgtable.h>
7 8
8/*G:020 9/*G:020
9 * Our story starts with the kernel booting into startup_32 in 10 * Our story starts with the kernel booting into startup_32 in
@@ -37,9 +38,113 @@ ENTRY(lguest_entry)
37 /* Set up the initial stack so we can run C code. */ 38 /* Set up the initial stack so we can run C code. */
38 movl $(init_thread_union+THREAD_SIZE),%esp 39 movl $(init_thread_union+THREAD_SIZE),%esp
39 40
41 call init_pagetables
42
40 /* Jumps are relative: we're running __PAGE_OFFSET too low. */ 43 /* Jumps are relative: we're running __PAGE_OFFSET too low. */
41 jmp lguest_init+__PAGE_OFFSET 44 jmp lguest_init+__PAGE_OFFSET
42 45
46/*
47 * Initialize page tables. This creates a PDE and a set of page
48 * tables, which are located immediately beyond __brk_base. The variable
49 * _brk_end is set up to point to the first "safe" location.
50 * Mappings are created both at virtual address 0 (identity mapping)
51 * and PAGE_OFFSET for up to _end.
52 *
53 * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they
54 * don't have a stack at this point, so we can't just use call and ret.
55 */
56init_pagetables:
57#if PTRS_PER_PMD > 1
58#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
59#else
60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
61#endif
62#define pa(X) ((X) - __PAGE_OFFSET)
63
64/* Enough space to fit pagetables for the low memory linear map */
65MAPPING_BEYOND_END = \
66 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
67#ifdef CONFIG_X86_PAE
68
69 /*
70 * In PAE mode initial_page_table is statically defined to contain
71 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
72 * entries). The identity mapping is handled by pointing two PGD entries
73 * to the first kernel PMD.
74 *
75 * Note the upper half of each PMD or PTE are always zero at this stage.
76 */
77
78#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
79
80 xorl %ebx,%ebx /* %ebx is kept at zero */
81
82 movl $pa(__brk_base), %edi
83 movl $pa(initial_pg_pmd), %edx
84 movl $PTE_IDENT_ATTR, %eax
8510:
86 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
87 movl %ecx,(%edx) /* Store PMD entry */
88 /* Upper half already zero */
89 addl $8,%edx
90 movl $512,%ecx
9111:
92 stosl
93 xchgl %eax,%ebx
94 stosl
95 xchgl %eax,%ebx
96 addl $0x1000,%eax
97 loop 11b
98
99 /*
100 * End condition: we must map up to the end + MAPPING_BEYOND_END.
101 */
102 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
103 cmpl %ebp,%eax
104 jb 10b
1051:
106 addl $__PAGE_OFFSET, %edi
107 movl %edi, pa(_brk_end)
108 shrl $12, %eax
109 movl %eax, pa(max_pfn_mapped)
110
111 /* Do early initialization of the fixmap area */
112 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
113 movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
114#else /* Not PAE */
115
116page_pde_offset = (__PAGE_OFFSET >> 20);
117
118 movl $pa(__brk_base), %edi
119 movl $pa(initial_page_table), %edx
120 movl $PTE_IDENT_ATTR, %eax
12110:
122 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
123 movl %ecx,(%edx) /* Store identity PDE entry */
124 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
125 addl $4,%edx
126 movl $1024, %ecx
12711:
128 stosl
129 addl $0x1000,%eax
130 loop 11b
131 /*
132 * End condition: we must map up to the end + MAPPING_BEYOND_END.
133 */
134 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
135 cmpl %ebp,%eax
136 jb 10b
137 addl $__PAGE_OFFSET, %edi
138 movl %edi, pa(_brk_end)
139 shrl $12, %eax
140 movl %eax, pa(max_pfn_mapped)
141
142 /* Do early initialization of the fixmap area */
143 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
144 movl %eax,pa(initial_page_table+0xffc)
145#endif
146 ret
147
43/*G:055 148/*G:055
44 * We create a macro which puts the assembler code between lgstart_ and lgend_ 149 * We create a macro which puts the assembler code between lgstart_ and lgend_
45 * markers. These templates are put in the .text section: they can't be 150 * markers. These templates are put in the .text section: they can't be
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index c4bb261c106e..b1805b78842f 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -65,21 +65,13 @@ pcibios_align_resource(void *data, const struct resource *res,
65 resource_size_t size, resource_size_t align) 65 resource_size_t size, resource_size_t align)
66{ 66{
67 struct pci_dev *dev = data; 67 struct pci_dev *dev = data;
68 resource_size_t start = round_down(res->end - size + 1, align); 68 resource_size_t start = res->start;
69 69
70 if (res->flags & IORESOURCE_IO) { 70 if (res->flags & IORESOURCE_IO) {
71 71 if (skip_isa_ioresource_align(dev))
72 /* 72 return start;
73 * If we're avoiding ISA aliases, the largest contiguous I/O 73 if (start & 0x300)
74 * port space is 256 bytes. Clearing bits 9 and 10 preserves 74 start = (start + 0x3ff) & ~0x3ff;
75 * all 256-byte and smaller alignments, so the result will
76 * still be correctly aligned.
77 */
78 if (!skip_isa_ioresource_align(dev))
79 start &= ~0x300;
80 } else if (res->flags & IORESOURCE_MEM) {
81 if (start < BIOS_END)
82 start = res->end; /* fail; no space */
83 } 75 }
84 return start; 76 return start;
85} 77}
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index d7b5109f7a9c..25cd4a07d09f 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -70,6 +70,9 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
70struct xen_pci_frontend_ops *xen_pci_frontend; 70struct xen_pci_frontend_ops *xen_pci_frontend;
71EXPORT_SYMBOL_GPL(xen_pci_frontend); 71EXPORT_SYMBOL_GPL(xen_pci_frontend);
72 72
73#define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \
74 MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0))
75
73static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, 76static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
74 struct msi_msg *msg) 77 struct msi_msg *msg)
75{ 78{
@@ -83,12 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
83 MSI_ADDR_REDIRECTION_CPU | 86 MSI_ADDR_REDIRECTION_CPU |
84 MSI_ADDR_DEST_ID(pirq); 87 MSI_ADDR_DEST_ID(pirq);
85 88
86 msg->data = 89 msg->data = XEN_PIRQ_MSI_DATA;
87 MSI_DATA_TRIGGER_EDGE |
88 MSI_DATA_LEVEL_ASSERT |
89 /* delivery mode reserved */
90 (3 << 8) |
91 MSI_DATA_VECTOR(0);
92} 90}
93 91
94static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 92static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
@@ -98,8 +96,23 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
98 struct msi_msg msg; 96 struct msi_msg msg;
99 97
100 list_for_each_entry(msidesc, &dev->msi_list, list) { 98 list_for_each_entry(msidesc, &dev->msi_list, list) {
99 __read_msi_msg(msidesc, &msg);
100 pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
101 ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
102 if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
103 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
104 "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ);
105 if (irq < 0)
106 goto error;
107 ret = set_irq_msi(irq, msidesc);
108 if (ret < 0)
109 goto error_while;
110 printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d"
111 " pirq=%d\n", irq, pirq);
112 return 0;
113 }
101 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? 114 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
102 "msi-x" : "msi", &irq, &pirq); 115 "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ));
103 if (irq < 0 || pirq < 0) 116 if (irq < 0 || pirq < 0)
104 goto error; 117 goto error;
105 printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); 118 printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 4a2afa1bac51..b6552b189bcd 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -25,7 +25,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
25 25
26export CPPFLAGS_vdso.lds += -P -C 26export CPPFLAGS_vdso.lds += -P -C
27 27
28VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \ 28VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
29 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 29 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
30 30
31$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so 31$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
@@ -69,7 +69,7 @@ vdso32.so-$(VDSO32-y) += sysenter
69vdso32-images = $(vdso32.so-y:%=vdso32-%.so) 69vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
70 70
71CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 71CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
72VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 72VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1
73 73
74# This makes sure the $(obj) subdirectory exists even though vdso32/ 74# This makes sure the $(obj) subdirectory exists even though vdso32/
75# is not a kbuild sub-make subdirectory. 75# is not a kbuild sub-make subdirectory.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 02c710bebf7a..44dcad43989d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1021,10 +1021,6 @@ static void xen_reboot(int reason)
1021{ 1021{
1022 struct sched_shutdown r = { .reason = reason }; 1022 struct sched_shutdown r = { .reason = reason };
1023 1023
1024#ifdef CONFIG_SMP
1025 stop_other_cpus();
1026#endif
1027
1028 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) 1024 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
1029 BUG(); 1025 BUG();
1030} 1026}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a1feff9e59b6..44924e551fde 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2415,8 +2415,6 @@ void __init xen_init_mmu_ops(void)
2415 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; 2415 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
2416 pv_mmu_ops = xen_mmu_ops; 2416 pv_mmu_ops = xen_mmu_ops;
2417 2417
2418 vmap_lazy_unmap = false;
2419
2420 memset(dummy_mapping, 0xff, PAGE_SIZE); 2418 memset(dummy_mapping, 0xff, PAGE_SIZE);
2421} 2419}
2422 2420
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 0f456386cce5..25c52f94a27c 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -68,7 +68,7 @@ static int __init check_platform_magic(void)
68 return 0; 68 return 0;
69} 69}
70 70
71void __init xen_unplug_emulated_devices(void) 71void xen_unplug_emulated_devices(void)
72{ 72{
73 int r; 73 int r;
74 74
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 01afd8a94607..b5a7f928234b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -181,24 +181,21 @@ char * __init xen_memory_setup(void)
181 for (i = 0; i < memmap.nr_entries; i++) { 181 for (i = 0; i < memmap.nr_entries; i++) {
182 unsigned long long end = map[i].addr + map[i].size; 182 unsigned long long end = map[i].addr + map[i].size;
183 183
184 if (map[i].type == E820_RAM) { 184 if (map[i].type == E820_RAM && end > mem_end) {
185 if (map[i].addr < mem_end && end > mem_end) { 185 /* RAM off the end - may be partially included */
186 /* Truncate region to max_mem. */ 186 u64 delta = min(map[i].size, end - mem_end);
187 u64 delta = end - mem_end;
188 187
189 map[i].size -= delta; 188 map[i].size -= delta;
190 extra_pages += PFN_DOWN(delta); 189 end -= delta;
191 190
192 end = mem_end; 191 extra_pages += PFN_DOWN(delta);
193 }
194 } 192 }
195 193
196 if (end > xen_extra_mem_start) 194 if (map[i].size > 0 && end > xen_extra_mem_start)
197 xen_extra_mem_start = end; 195 xen_extra_mem_start = end;
198 196
199 /* If region is non-RAM or below mem_end, add what remains */ 197 /* Add region if any remains */
200 if ((map[i].type != E820_RAM || map[i].addr < mem_end) && 198 if (map[i].size > 0)
201 map[i].size > 0)
202 e820_add_region(map[i].addr, map[i].size, map[i].type); 199 e820_add_region(map[i].addr, map[i].size, map[i].type);
203 } 200 }
204 201
@@ -252,20 +249,6 @@ char * __init xen_memory_setup(void)
252 return "Xen"; 249 return "Xen";
253} 250}
254 251
255static void xen_idle(void)
256{
257 local_irq_disable();
258
259 if (need_resched())
260 local_irq_enable();
261 else {
262 current_thread_info()->status &= ~TS_POLLING;
263 smp_mb__after_clear_bit();
264 safe_halt();
265 current_thread_info()->status |= TS_POLLING;
266 }
267}
268
269/* 252/*
270 * Set the bit indicating "nosegneg" library variants should be used. 253 * Set the bit indicating "nosegneg" library variants should be used.
271 * We only need to bother in pure 32-bit mode; compat 32-bit processes 254 * We only need to bother in pure 32-bit mode; compat 32-bit processes
@@ -362,7 +345,11 @@ void __init xen_arch_setup(void)
362 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? 345 MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
363 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 346 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
364 347
365 pm_idle = xen_idle; 348 /* Set up idle, making sure it calls safe_halt() pvop */
349#ifdef CONFIG_X86_32
350 boot_cpu_data.hlt_works_ok = 1;
351#endif
352 pm_idle = default_idle;
366 353
367 fiddle_vdso(); 354 fiddle_vdso();
368} 355}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 1d789d56877c..9bbd63a129b5 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -31,6 +31,7 @@ void xen_hvm_post_suspend(int suspend_cancelled)
31 int cpu; 31 int cpu;
32 xen_hvm_init_shared_info(); 32 xen_hvm_init_shared_info();
33 xen_callback_vector(); 33 xen_callback_vector();
34 xen_unplug_emulated_devices();
34 if (xen_feature(XENFEAT_hvm_safe_pvclock)) { 35 if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
35 for_each_online_cpu(cpu) { 36 for_each_online_cpu(cpu) {
36 xen_setup_runstate_info(cpu); 37 xen_setup_runstate_info(cpu);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b2bb5aa3b054..5da5e53fb94c 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -426,6 +426,8 @@ void xen_timer_resume(void)
426{ 426{
427 int cpu; 427 int cpu;
428 428
429 pvclock_resume();
430
429 if (xen_clockevent != &xen_vcpuop_clockevent) 431 if (xen_clockevent != &xen_vcpuop_clockevent)
430 return; 432 return;
431 433
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 64044747348e..9d41bf985757 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -43,7 +43,7 @@ void xen_vcpu_restore(void);
43 43
44void xen_callback_vector(void); 44void xen_callback_vector(void);
45void xen_hvm_init_shared_info(void); 45void xen_hvm_init_shared_info(void);
46void __init xen_unplug_emulated_devices(void); 46void xen_unplug_emulated_devices(void);
47 47
48void __init xen_build_dynamic_phys_to_machine(void); 48void __init xen_build_dynamic_phys_to_machine(void);
49 49