aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 01:00:18 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 01:00:18 -0500
commit84621c9b18d0bb6cb267e3395c7f3131ecf4d39c (patch)
tree28566fe0211798143136b5cd154e2239d38a7b68 /arch/x86
parent7ebd3faa9b5b42caf2d5aa1352a93dcfa0098011 (diff)
parentc9f6e9977e38de15da96b732a8dec0ef56cbf977 (diff)
Merge tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull Xen updates from Konrad Rzeszutek Wilk: "Two major features that Xen community is excited about: The first is event channel scalability by David Vrabel - we switch over from an two-level per-cpu bitmap of events (IRQs) - to an FIFO queue with priorities. This lets us be able to handle more events, have lower latency, and better scalability. Good stuff. The other is PVH by Mukesh Rathor. In short, PV is a mode where the kernel lets the hypervisor program page-tables, segments, etc. With EPT/NPT capabilities in current processors, the overhead of doing this in an HVM (Hardware Virtual Machine) container is much lower than the hypervisor doing it for us. In short we let a PV guest run without doing page-table, segment, syscall, etc updates through the hypervisor - instead it is all done within the guest container. It is a "hybrid" PV - hence the 'PVH' name - a PV guest within an HVM container. The major benefits are less code to deal with - for example we only use one function from the the pv_mmu_ops (which has 39 function calls); faster performance for syscall (no context switches into the hypervisor); less traps on various operations; etc. It is still being baked - the ABI is not yet set in stone. But it is pretty awesome and we are excited about it. Lastly, there are some changes to ARM code - you should get a simple conflict which has been resolved in #linux-next. In short, this pull has awesome features. Features: - FIFO event channels. Key advantages: support for over 100,000 events (2^17), 16 different event priorities, improved fairness in event latency through the use of FIFOs. - Xen PVH support. "It’s a fully PV kernel mode, running with paravirtualized disk and network, paravirtualized interrupts and timers, no emulated devices of any kind (and thus no qemu), no BIOS or legacy boot — but instead of requiring PV MMU, it uses the HVM hardware extensions to virtualize the pagetables, as well as system calls and other privileged operations." (from "The Paravirtualization Spectrum, Part 2: From poles to a spectrum") Bug-fixes: - Fixes in balloon driver (refactor and make it work under ARM) - Allow xenfb to be used in HVM guests. - Allow xen_platform_pci=0 to work properly. - Refactors in event channels" * tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (52 commits) xen/pvh: Set X86_CR0_WP and others in CR0 (v2) MAINTAINERS: add git repository for Xen xen/pvh: Use 'depend' instead of 'select'. xen: delete new instances of __cpuinit usage xen/fb: allow xenfb initialization for hvm guests xen/evtchn_fifo: fix error return code in evtchn_fifo_setup() xen-platform: fix error return code in platform_pci_init() xen/pvh: remove duplicated include from enlighten.c xen/pvh: Fix compile issues with xen_pvh_domain() xen: Use dev_is_pci() to check whether it is pci device xen/grant-table: Force to use v1 of grants. xen/pvh: Support ParaVirtualized Hardware extensions (v3). xen/pvh: Piggyback on PVHVM XenBus. xen/pvh: Piggyback on PVHVM for grant driver (v4) xen/grant: Implement an grant frame array struct (v3). xen/grant-table: Refactor gnttab_init xen/grants: Remove gnttab_max_grant_frames dependency on gnttab_init. xen/pvh: Piggyback on PVHVM for event channels (v2) xen/pvh: Update E820 to work with PVH (v2) xen/pvh: Secondary VCPU bringup (non-bootup CPUs) ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/xen/page.h8
-rw-r--r--arch/x86/xen/Kconfig4
-rw-r--r--arch/x86/xen/enlighten.c126
-rw-r--r--arch/x86/xen/grant-table.c63
-rw-r--r--arch/x86/xen/irq.c5
-rw-r--r--arch/x86/xen/mmu.c166
-rw-r--r--arch/x86/xen/p2m.c15
-rw-r--r--arch/x86/xen/platform-pci-unplug.c79
-rw-r--r--arch/x86/xen/setup.c40
-rw-r--r--arch/x86/xen/smp.c49
-rw-r--r--arch/x86/xen/time.c1
-rw-r--r--arch/x86/xen/xen-head.S25
-rw-r--r--arch/x86/xen/xen-ops.h1
13 files changed, 459 insertions, 123 deletions
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index b913915e8e63..3e276eb23d1b 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -167,7 +167,12 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
167 */ 167 */
168static inline unsigned long mfn_to_local_pfn(unsigned long mfn) 168static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
169{ 169{
170 unsigned long pfn = mfn_to_pfn(mfn); 170 unsigned long pfn;
171
172 if (xen_feature(XENFEAT_auto_translated_physmap))
173 return mfn;
174
175 pfn = mfn_to_pfn(mfn);
171 if (get_phys_to_machine(pfn) != mfn) 176 if (get_phys_to_machine(pfn) != mfn)
172 return -1; /* force !pfn_valid() */ 177 return -1; /* force !pfn_valid() */
173 return pfn; 178 return pfn;
@@ -222,5 +227,6 @@ void make_lowmem_page_readonly(void *vaddr);
222void make_lowmem_page_readwrite(void *vaddr); 227void make_lowmem_page_readwrite(void *vaddr);
223 228
224#define xen_remap(cookie, size) ioremap((cookie), (size)); 229#define xen_remap(cookie, size) ioremap((cookie), (size));
230#define xen_unmap(cookie) iounmap((cookie))
225 231
226#endif /* _ASM_X86_XEN_PAGE_H */ 232#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 1a3c76505649..01b90261fa38 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -51,3 +51,7 @@ config XEN_DEBUG_FS
51 Enable statistics output and various tuning options in debugfs. 51 Enable statistics output and various tuning options in debugfs.
52 Enabling this option may incur a significant performance overhead. 52 Enabling this option may incur a significant performance overhead.
53 53
54config XEN_PVH
55 bool "Support for running as a PVH guest"
56 depends on X86_64 && XEN && XEN_PVHVM
57 def_bool n
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index fa6ade76ef3f..a4d7b647867f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -262,8 +262,9 @@ static void __init xen_banner(void)
262 struct xen_extraversion extra; 262 struct xen_extraversion extra;
263 HYPERVISOR_xen_version(XENVER_extraversion, &extra); 263 HYPERVISOR_xen_version(XENVER_extraversion, &extra);
264 264
265 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 265 pr_info("Booting paravirtualized kernel %son %s\n",
266 pv_info.name); 266 xen_feature(XENFEAT_auto_translated_physmap) ?
267 "with PVH extensions " : "", pv_info.name);
267 printk(KERN_INFO "Xen version: %d.%d%s%s\n", 268 printk(KERN_INFO "Xen version: %d.%d%s%s\n",
268 version >> 16, version & 0xffff, extra.extraversion, 269 version >> 16, version & 0xffff, extra.extraversion,
269 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 270 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
@@ -433,7 +434,7 @@ static void __init xen_init_cpuid_mask(void)
433 434
434 ax = 1; 435 ax = 1;
435 cx = 0; 436 cx = 0;
436 xen_cpuid(&ax, &bx, &cx, &dx); 437 cpuid(1, &ax, &bx, &cx, &dx);
437 438
438 xsave_mask = 439 xsave_mask =
439 (1 << (X86_FEATURE_XSAVE % 32)) | 440 (1 << (X86_FEATURE_XSAVE % 32)) |
@@ -1142,8 +1143,9 @@ void xen_setup_vcpu_info_placement(void)
1142 xen_vcpu_setup(cpu); 1143 xen_vcpu_setup(cpu);
1143 1144
1144 /* xen_vcpu_setup managed to place the vcpu_info within the 1145 /* xen_vcpu_setup managed to place the vcpu_info within the
1145 percpu area for all cpus, so make use of it */ 1146 * percpu area for all cpus, so make use of it. Note that for
1146 if (have_vcpu_info_placement) { 1147 * PVH we want to use native IRQ mechanism. */
1148 if (have_vcpu_info_placement && !xen_pvh_domain()) {
1147 pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); 1149 pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
1148 pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); 1150 pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
1149 pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); 1151 pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
@@ -1407,9 +1409,49 @@ static void __init xen_boot_params_init_edd(void)
1407 * Set up the GDT and segment registers for -fstack-protector. Until 1409 * Set up the GDT and segment registers for -fstack-protector. Until
1408 * we do this, we have to be careful not to call any stack-protected 1410 * we do this, we have to be careful not to call any stack-protected
1409 * function, which is most of the kernel. 1411 * function, which is most of the kernel.
1412 *
1413 * Note, that it is __ref because the only caller of this after init
1414 * is PVH which is not going to use xen_load_gdt_boot or other
1415 * __init functions.
1410 */ 1416 */
1411static void __init xen_setup_stackprotector(void) 1417static void __ref xen_setup_gdt(int cpu)
1412{ 1418{
1419 if (xen_feature(XENFEAT_auto_translated_physmap)) {
1420#ifdef CONFIG_X86_64
1421 unsigned long dummy;
1422
1423 load_percpu_segment(cpu); /* We need to access per-cpu area */
1424 switch_to_new_gdt(cpu); /* GDT and GS set */
1425
1426 /* We are switching of the Xen provided GDT to our HVM mode
1427 * GDT. The new GDT has __KERNEL_CS with CS.L = 1
1428 * and we are jumping to reload it.
1429 */
1430 asm volatile ("pushq %0\n"
1431 "leaq 1f(%%rip),%0\n"
1432 "pushq %0\n"
1433 "lretq\n"
1434 "1:\n"
1435 : "=&r" (dummy) : "0" (__KERNEL_CS));
1436
1437 /*
1438 * While not needed, we also set the %es, %ds, and %fs
1439 * to zero. We don't care about %ss as it is NULL.
1440 * Strictly speaking this is not needed as Xen zeros those
1441 * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE)
1442 *
1443 * Linux zeros them in cpu_init() and in secondary_startup_64
1444 * (for BSP).
1445 */
1446 loadsegment(es, 0);
1447 loadsegment(ds, 0);
1448 loadsegment(fs, 0);
1449#else
1450 /* PVH: TODO Implement. */
1451 BUG();
1452#endif
1453 return; /* PVH does not need any PV GDT ops. */
1454 }
1413 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; 1455 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
1414 pv_cpu_ops.load_gdt = xen_load_gdt_boot; 1456 pv_cpu_ops.load_gdt = xen_load_gdt_boot;
1415 1457
@@ -1420,6 +1462,46 @@ static void __init xen_setup_stackprotector(void)
1420 pv_cpu_ops.load_gdt = xen_load_gdt; 1462 pv_cpu_ops.load_gdt = xen_load_gdt;
1421} 1463}
1422 1464
1465/*
1466 * A PV guest starts with default flags that are not set for PVH, set them
1467 * here asap.
1468 */
1469static void xen_pvh_set_cr_flags(int cpu)
1470{
1471
1472 /* Some of these are setup in 'secondary_startup_64'. The others:
1473 * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
1474 * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
1475 write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
1476}
1477
1478/*
1479 * Note, that it is ref - because the only caller of this after init
1480 * is PVH which is not going to use xen_load_gdt_boot or other
1481 * __init functions.
1482 */
1483void __ref xen_pvh_secondary_vcpu_init(int cpu)
1484{
1485 xen_setup_gdt(cpu);
1486 xen_pvh_set_cr_flags(cpu);
1487}
1488
1489static void __init xen_pvh_early_guest_init(void)
1490{
1491 if (!xen_feature(XENFEAT_auto_translated_physmap))
1492 return;
1493
1494 if (!xen_feature(XENFEAT_hvm_callback_vector))
1495 return;
1496
1497 xen_have_vector_callback = 1;
1498 xen_pvh_set_cr_flags(0);
1499
1500#ifdef CONFIG_X86_32
1501 BUG(); /* PVH: Implement proper support. */
1502#endif
1503}
1504
1423/* First C function to be called on Xen boot */ 1505/* First C function to be called on Xen boot */
1424asmlinkage void __init xen_start_kernel(void) 1506asmlinkage void __init xen_start_kernel(void)
1425{ 1507{
@@ -1431,13 +1513,16 @@ asmlinkage void __init xen_start_kernel(void)
1431 1513
1432 xen_domain_type = XEN_PV_DOMAIN; 1514 xen_domain_type = XEN_PV_DOMAIN;
1433 1515
1516 xen_setup_features();
1517 xen_pvh_early_guest_init();
1434 xen_setup_machphys_mapping(); 1518 xen_setup_machphys_mapping();
1435 1519
1436 /* Install Xen paravirt ops */ 1520 /* Install Xen paravirt ops */
1437 pv_info = xen_info; 1521 pv_info = xen_info;
1438 pv_init_ops = xen_init_ops; 1522 pv_init_ops = xen_init_ops;
1439 pv_cpu_ops = xen_cpu_ops;
1440 pv_apic_ops = xen_apic_ops; 1523 pv_apic_ops = xen_apic_ops;
1524 if (!xen_pvh_domain())
1525 pv_cpu_ops = xen_cpu_ops;
1441 1526
1442 x86_init.resources.memory_setup = xen_memory_setup; 1527 x86_init.resources.memory_setup = xen_memory_setup;
1443 x86_init.oem.arch_setup = xen_arch_setup; 1528 x86_init.oem.arch_setup = xen_arch_setup;
@@ -1469,17 +1554,14 @@ asmlinkage void __init xen_start_kernel(void)
1469 /* Work out if we support NX */ 1554 /* Work out if we support NX */
1470 x86_configure_nx(); 1555 x86_configure_nx();
1471 1556
1472 xen_setup_features();
1473
1474 /* Get mfn list */ 1557 /* Get mfn list */
1475 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1558 xen_build_dynamic_phys_to_machine();
1476 xen_build_dynamic_phys_to_machine();
1477 1559
1478 /* 1560 /*
1479 * Set up kernel GDT and segment registers, mainly so that 1561 * Set up kernel GDT and segment registers, mainly so that
1480 * -fstack-protector code can be executed. 1562 * -fstack-protector code can be executed.
1481 */ 1563 */
1482 xen_setup_stackprotector(); 1564 xen_setup_gdt(0);
1483 1565
1484 xen_init_irq_ops(); 1566 xen_init_irq_ops();
1485 xen_init_cpuid_mask(); 1567 xen_init_cpuid_mask();
@@ -1548,14 +1630,18 @@ asmlinkage void __init xen_start_kernel(void)
1548 /* set the limit of our address space */ 1630 /* set the limit of our address space */
1549 xen_reserve_top(); 1631 xen_reserve_top();
1550 1632
1551 /* We used to do this in xen_arch_setup, but that is too late on AMD 1633 /* PVH: runs at default kernel iopl of 0 */
1552 * were early_cpu_init (run before ->arch_setup()) calls early_amd_init 1634 if (!xen_pvh_domain()) {
1553 * which pokes 0xcf8 port. 1635 /*
1554 */ 1636 * We used to do this in xen_arch_setup, but that is too late
1555 set_iopl.iopl = 1; 1637 * on AMD were early_cpu_init (run before ->arch_setup()) calls
1556 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 1638 * early_amd_init which pokes 0xcf8 port.
1557 if (rc != 0) 1639 */
1558 xen_raw_printk("physdev_op failed %d\n", rc); 1640 set_iopl.iopl = 1;
1641 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1642 if (rc != 0)
1643 xen_raw_printk("physdev_op failed %d\n", rc);
1644 }
1559 1645
1560#ifdef CONFIG_X86_32 1646#ifdef CONFIG_X86_32
1561 /* set up basic CPUID stuff */ 1647 /* set up basic CPUID stuff */
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 3a5f55d51907..103c93f874b2 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -125,3 +125,66 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
125 apply_to_page_range(&init_mm, (unsigned long)shared, 125 apply_to_page_range(&init_mm, (unsigned long)shared,
126 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); 126 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
127} 127}
128#ifdef CONFIG_XEN_PVH
129#include <xen/balloon.h>
130#include <xen/events.h>
131#include <xen/xen.h>
132#include <linux/slab.h>
133static int __init xlated_setup_gnttab_pages(void)
134{
135 struct page **pages;
136 xen_pfn_t *pfns;
137 int rc;
138 unsigned int i;
139 unsigned long nr_grant_frames = gnttab_max_grant_frames();
140
141 BUG_ON(nr_grant_frames == 0);
142 pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL);
143 if (!pages)
144 return -ENOMEM;
145
146 pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL);
147 if (!pfns) {
148 kfree(pages);
149 return -ENOMEM;
150 }
151 rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */);
152 if (rc) {
153 pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__,
154 nr_grant_frames, rc);
155 kfree(pages);
156 kfree(pfns);
157 return rc;
158 }
159 for (i = 0; i < nr_grant_frames; i++)
160 pfns[i] = page_to_pfn(pages[i]);
161
162 rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames,
163 &xen_auto_xlat_grant_frames.vaddr);
164
165 kfree(pages);
166 if (rc) {
167 pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__,
168 nr_grant_frames, rc);
169 free_xenballooned_pages(nr_grant_frames, pages);
170 kfree(pfns);
171 return rc;
172 }
173
174 xen_auto_xlat_grant_frames.pfn = pfns;
175 xen_auto_xlat_grant_frames.count = nr_grant_frames;
176
177 return 0;
178}
179
180static int __init xen_pvh_gnttab_setup(void)
181{
182 if (!xen_pvh_domain())
183 return -ENODEV;
184
185 return xlated_setup_gnttab_pages();
186}
187/* Call it _before_ __gnttab_init as we need to initialize the
188 * xen_auto_xlat_grant_frames first. */
189core_initcall(xen_pvh_gnttab_setup);
190#endif
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 0da7f863056f..76ca326105f7 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -5,6 +5,7 @@
5#include <xen/interface/xen.h> 5#include <xen/interface/xen.h>
6#include <xen/interface/sched.h> 6#include <xen/interface/sched.h>
7#include <xen/interface/vcpu.h> 7#include <xen/interface/vcpu.h>
8#include <xen/features.h>
8#include <xen/events.h> 9#include <xen/events.h>
9 10
10#include <asm/xen/hypercall.h> 11#include <asm/xen/hypercall.h>
@@ -128,6 +129,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
128 129
129void __init xen_init_irq_ops(void) 130void __init xen_init_irq_ops(void)
130{ 131{
131 pv_irq_ops = xen_irq_ops; 132 /* For PVH we use default pv_irq_ops settings. */
133 if (!xen_feature(XENFEAT_hvm_callback_vector))
134 pv_irq_ops = xen_irq_ops;
132 x86_init.irqs.intr_init = xen_init_IRQ; 135 x86_init.irqs.intr_init = xen_init_IRQ;
133} 136}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ce563be09cc1..c1d406f35523 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1198,44 +1198,40 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
1198 * instead of somewhere later and be confusing. */ 1198 * instead of somewhere later and be confusing. */
1199 xen_mc_flush(); 1199 xen_mc_flush();
1200} 1200}
1201#endif 1201static void __init xen_pagetable_p2m_copy(void)
1202static void __init xen_pagetable_init(void)
1203{ 1202{
1204#ifdef CONFIG_X86_64
1205 unsigned long size; 1203 unsigned long size;
1206 unsigned long addr; 1204 unsigned long addr;
1207#endif 1205 unsigned long new_mfn_list;
1208 paging_init(); 1206
1209 xen_setup_shared_info(); 1207 if (xen_feature(XENFEAT_auto_translated_physmap))
1210#ifdef CONFIG_X86_64 1208 return;
1211 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 1209
1212 unsigned long new_mfn_list; 1210 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1213 1211
1214 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1212 new_mfn_list = xen_revector_p2m_tree();
1215 1213 /* No memory or already called. */
1216 /* On 32-bit, we get zero so this never gets executed. */ 1214 if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list)
1217 new_mfn_list = xen_revector_p2m_tree(); 1215 return;
1218 if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { 1216
1219 /* using __ka address and sticking INVALID_P2M_ENTRY! */ 1217 /* using __ka address and sticking INVALID_P2M_ENTRY! */
1220 memset((void *)xen_start_info->mfn_list, 0xff, size); 1218 memset((void *)xen_start_info->mfn_list, 0xff, size);
1221 1219
1222 /* We should be in __ka space. */ 1220 /* We should be in __ka space. */
1223 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); 1221 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
1224 addr = xen_start_info->mfn_list; 1222 addr = xen_start_info->mfn_list;
1225 /* We roundup to the PMD, which means that if anybody at this stage is 1223 /* We roundup to the PMD, which means that if anybody at this stage is
1226 * using the __ka address of xen_start_info or xen_start_info->shared_info 1224 * using the __ka address of xen_start_info or xen_start_info->shared_info
1227 * they are in going to crash. Fortunatly we have already revectored 1225 * they are in going to crash. Fortunatly we have already revectored
1228 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ 1226 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
1229 size = roundup(size, PMD_SIZE); 1227 size = roundup(size, PMD_SIZE);
1230 xen_cleanhighmap(addr, addr + size); 1228 xen_cleanhighmap(addr, addr + size);
1231 1229
1232 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1230 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1233 memblock_free(__pa(xen_start_info->mfn_list), size); 1231 memblock_free(__pa(xen_start_info->mfn_list), size);
1234 /* And revector! Bye bye old array */ 1232 /* And revector! Bye bye old array */
1235 xen_start_info->mfn_list = new_mfn_list; 1233 xen_start_info->mfn_list = new_mfn_list;
1236 } else 1234
1237 goto skip;
1238 }
1239 /* At this stage, cleanup_highmap has already cleaned __ka space 1235 /* At this stage, cleanup_highmap has already cleaned __ka space
1240 * from _brk_limit way up to the max_pfn_mapped (which is the end of 1236 * from _brk_limit way up to the max_pfn_mapped (which is the end of
1241 * the ramdisk). We continue on, erasing PMD entries that point to page 1237 * the ramdisk). We continue on, erasing PMD entries that point to page
@@ -1255,7 +1251,15 @@ static void __init xen_pagetable_init(void)
1255 * anything at this stage. */ 1251 * anything at this stage. */
1256 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); 1252 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
1257#endif 1253#endif
1258skip: 1254}
1255#endif
1256
1257static void __init xen_pagetable_init(void)
1258{
1259 paging_init();
1260 xen_setup_shared_info();
1261#ifdef CONFIG_X86_64
1262 xen_pagetable_p2m_copy();
1259#endif 1263#endif
1260 xen_post_allocator_init(); 1264 xen_post_allocator_init();
1261} 1265}
@@ -1753,6 +1757,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags)
1753 unsigned long pfn = __pa(addr) >> PAGE_SHIFT; 1757 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1754 pte_t pte = pfn_pte(pfn, prot); 1758 pte_t pte = pfn_pte(pfn, prot);
1755 1759
1760 /* For PVH no need to set R/O or R/W to pin them or unpin them. */
1761 if (xen_feature(XENFEAT_auto_translated_physmap))
1762 return;
1763
1756 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) 1764 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
1757 BUG(); 1765 BUG();
1758} 1766}
@@ -1863,6 +1871,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1863 * but that's enough to get __va working. We need to fill in the rest 1871 * but that's enough to get __va working. We need to fill in the rest
1864 * of the physical mapping once some sort of allocator has been set 1872 * of the physical mapping once some sort of allocator has been set
1865 * up. 1873 * up.
1874 * NOTE: for PVH, the page tables are native.
1866 */ 1875 */
1867void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 1876void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1868{ 1877{
@@ -1884,17 +1893,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1884 /* Zap identity mapping */ 1893 /* Zap identity mapping */
1885 init_level4_pgt[0] = __pgd(0); 1894 init_level4_pgt[0] = __pgd(0);
1886 1895
1887 /* Pre-constructed entries are in pfn, so convert to mfn */ 1896 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1888 /* L4[272] -> level3_ident_pgt 1897 /* Pre-constructed entries are in pfn, so convert to mfn */
1889 * L4[511] -> level3_kernel_pgt */ 1898 /* L4[272] -> level3_ident_pgt
1890 convert_pfn_mfn(init_level4_pgt); 1899 * L4[511] -> level3_kernel_pgt */
1891 1900 convert_pfn_mfn(init_level4_pgt);
1892 /* L3_i[0] -> level2_ident_pgt */ 1901
1893 convert_pfn_mfn(level3_ident_pgt); 1902 /* L3_i[0] -> level2_ident_pgt */
1894 /* L3_k[510] -> level2_kernel_pgt 1903 convert_pfn_mfn(level3_ident_pgt);
1895 * L3_i[511] -> level2_fixmap_pgt */ 1904 /* L3_k[510] -> level2_kernel_pgt
1896 convert_pfn_mfn(level3_kernel_pgt); 1905 * L3_i[511] -> level2_fixmap_pgt */
1897 1906 convert_pfn_mfn(level3_kernel_pgt);
1907 }
1898 /* We get [511][511] and have Xen's version of level2_kernel_pgt */ 1908 /* We get [511][511] and have Xen's version of level2_kernel_pgt */
1899 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); 1909 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1900 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); 1910 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
@@ -1918,31 +1928,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1918 copy_page(level2_fixmap_pgt, l2); 1928 copy_page(level2_fixmap_pgt, l2);
1919 /* Note that we don't do anything with level1_fixmap_pgt which 1929 /* Note that we don't do anything with level1_fixmap_pgt which
1920 * we don't need. */ 1930 * we don't need. */
1931 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1932 /* Make pagetable pieces RO */
1933 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1934 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1935 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1936 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1937 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1938 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1939 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1940
1941 /* Pin down new L4 */
1942 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1943 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1944
1945 /* Unpin Xen-provided one */
1946 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1921 1947
1922 /* Make pagetable pieces RO */ 1948 /*
1923 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); 1949 * At this stage there can be no user pgd, and no page
1924 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); 1950 * structure to attach it to, so make sure we just set kernel
1925 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); 1951 * pgd.
1926 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); 1952 */
1927 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); 1953 xen_mc_batch();
1928 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1954 __xen_write_cr3(true, __pa(init_level4_pgt));
1929 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1955 xen_mc_issue(PARAVIRT_LAZY_CPU);
1930 1956 } else
1931 /* Pin down new L4 */ 1957 native_write_cr3(__pa(init_level4_pgt));
1932 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1933 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1934
1935 /* Unpin Xen-provided one */
1936 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1937
1938 /*
1939 * At this stage there can be no user pgd, and no page
1940 * structure to attach it to, so make sure we just set kernel
1941 * pgd.
1942 */
1943 xen_mc_batch();
1944 __xen_write_cr3(true, __pa(init_level4_pgt));
1945 xen_mc_issue(PARAVIRT_LAZY_CPU);
1946 1958
1947 /* We can't that easily rip out L3 and L2, as the Xen pagetables are 1959 /* We can't that easily rip out L3 and L2, as the Xen pagetables are
1948 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for 1960 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
@@ -2103,6 +2115,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2103 2115
2104static void __init xen_post_allocator_init(void) 2116static void __init xen_post_allocator_init(void)
2105{ 2117{
2118 if (xen_feature(XENFEAT_auto_translated_physmap))
2119 return;
2120
2106 pv_mmu_ops.set_pte = xen_set_pte; 2121 pv_mmu_ops.set_pte = xen_set_pte;
2107 pv_mmu_ops.set_pmd = xen_set_pmd; 2122 pv_mmu_ops.set_pmd = xen_set_pmd;
2108 pv_mmu_ops.set_pud = xen_set_pud; 2123 pv_mmu_ops.set_pud = xen_set_pud;
@@ -2207,6 +2222,15 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2207void __init xen_init_mmu_ops(void) 2222void __init xen_init_mmu_ops(void)
2208{ 2223{
2209 x86_init.paging.pagetable_init = xen_pagetable_init; 2224 x86_init.paging.pagetable_init = xen_pagetable_init;
2225
2226 /* Optimization - we can use the HVM one but it has no idea which
2227 * VCPUs are descheduled - which means that it will needlessly IPI
2228 * them. Xen knows so let it do the job.
2229 */
2230 if (xen_feature(XENFEAT_auto_translated_physmap)) {
2231 pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
2232 return;
2233 }
2210 pv_mmu_ops = xen_mmu_ops; 2234 pv_mmu_ops = xen_mmu_ops;
2211 2235
2212 memset(dummy_mapping, 0xff, PAGE_SIZE); 2236 memset(dummy_mapping, 0xff, PAGE_SIZE);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 2ae8699e8767..696c694986d0 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -280,6 +280,9 @@ void __ref xen_build_mfn_list_list(void)
280{ 280{
281 unsigned long pfn; 281 unsigned long pfn;
282 282
283 if (xen_feature(XENFEAT_auto_translated_physmap))
284 return;
285
283 /* Pre-initialize p2m_top_mfn to be completely missing */ 286 /* Pre-initialize p2m_top_mfn to be completely missing */
284 if (p2m_top_mfn == NULL) { 287 if (p2m_top_mfn == NULL) {
285 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 288 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
@@ -336,6 +339,9 @@ void __ref xen_build_mfn_list_list(void)
336 339
337void xen_setup_mfn_list_list(void) 340void xen_setup_mfn_list_list(void)
338{ 341{
342 if (xen_feature(XENFEAT_auto_translated_physmap))
343 return;
344
339 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); 345 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
340 346
341 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 347 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -346,10 +352,15 @@ void xen_setup_mfn_list_list(void)
346/* Set up p2m_top to point to the domain-builder provided p2m pages */ 352/* Set up p2m_top to point to the domain-builder provided p2m pages */
347void __init xen_build_dynamic_phys_to_machine(void) 353void __init xen_build_dynamic_phys_to_machine(void)
348{ 354{
349 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; 355 unsigned long *mfn_list;
350 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); 356 unsigned long max_pfn;
351 unsigned long pfn; 357 unsigned long pfn;
352 358
359 if (xen_feature(XENFEAT_auto_translated_physmap))
360 return;
361
362 mfn_list = (unsigned long *)xen_start_info->mfn_list;
363 max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
353 xen_max_p2m_pfn = max_pfn; 364 xen_max_p2m_pfn = max_pfn;
354 365
355 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); 366 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 0a7852483ffe..a8261716d58d 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -30,10 +30,9 @@
30#define XEN_PLATFORM_ERR_PROTOCOL -2 30#define XEN_PLATFORM_ERR_PROTOCOL -2
31#define XEN_PLATFORM_ERR_BLACKLIST -3 31#define XEN_PLATFORM_ERR_BLACKLIST -3
32 32
33/* store the value of xen_emul_unplug after the unplug is done */
34int xen_platform_pci_unplug;
35EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
36#ifdef CONFIG_XEN_PVHVM 33#ifdef CONFIG_XEN_PVHVM
34/* store the value of xen_emul_unplug after the unplug is done */
35static int xen_platform_pci_unplug;
37static int xen_emul_unplug; 36static int xen_emul_unplug;
38 37
39static int check_platform_magic(void) 38static int check_platform_magic(void)
@@ -69,6 +68,80 @@ static int check_platform_magic(void)
69 return 0; 68 return 0;
70} 69}
71 70
71bool xen_has_pv_devices()
72{
73 if (!xen_domain())
74 return false;
75
76 /* PV domains always have them. */
77 if (xen_pv_domain())
78 return true;
79
80 /* And user has xen_platform_pci=0 set in guest config as
81 * driver did not modify the value. */
82 if (xen_platform_pci_unplug == 0)
83 return false;
84
85 if (xen_platform_pci_unplug & XEN_UNPLUG_NEVER)
86 return false;
87
88 if (xen_platform_pci_unplug & XEN_UNPLUG_ALL)
89 return true;
90
91 /* This is an odd one - we are going to run legacy
92 * and PV drivers at the same time. */
93 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY)
94 return true;
95
96 /* And the caller has to follow with xen_pv_{disk,nic}_devices
97 * to be certain which driver can load. */
98 return false;
99}
100EXPORT_SYMBOL_GPL(xen_has_pv_devices);
101
102static bool __xen_has_pv_device(int state)
103{
104 /* HVM domains might or might not */
105 if (xen_hvm_domain() && (xen_platform_pci_unplug & state))
106 return true;
107
108 return xen_has_pv_devices();
109}
110
111bool xen_has_pv_nic_devices(void)
112{
113 return __xen_has_pv_device(XEN_UNPLUG_ALL_NICS | XEN_UNPLUG_ALL);
114}
115EXPORT_SYMBOL_GPL(xen_has_pv_nic_devices);
116
117bool xen_has_pv_disk_devices(void)
118{
119 return __xen_has_pv_device(XEN_UNPLUG_ALL_IDE_DISKS |
120 XEN_UNPLUG_AUX_IDE_DISKS | XEN_UNPLUG_ALL);
121}
122EXPORT_SYMBOL_GPL(xen_has_pv_disk_devices);
123
124/*
125 * This one is odd - it determines whether you want to run PV _and_
126 * legacy (IDE) drivers together. This combination is only possible
127 * under HVM.
128 */
129bool xen_has_pv_and_legacy_disk_devices(void)
130{
131 if (!xen_domain())
132 return false;
133
134 /* N.B. This is only ever used in HVM mode */
135 if (xen_pv_domain())
136 return false;
137
138 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY)
139 return true;
140
141 return false;
142}
143EXPORT_SYMBOL_GPL(xen_has_pv_and_legacy_disk_devices);
144
72void xen_unplug_emulated_devices(void) 145void xen_unplug_emulated_devices(void)
73{ 146{
74 int r; 147 int r;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 68c054f59de6..dd5f905e33d5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -27,6 +27,7 @@
27#include <xen/interface/memory.h> 27#include <xen/interface/memory.h>
28#include <xen/interface/physdev.h> 28#include <xen/interface/physdev.h>
29#include <xen/features.h> 29#include <xen/features.h>
30#include "mmu.h"
30#include "xen-ops.h" 31#include "xen-ops.h"
31#include "vdso.h" 32#include "vdso.h"
32 33
@@ -81,6 +82,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
81 82
82 memblock_reserve(start, size); 83 memblock_reserve(start, size);
83 84
85 if (xen_feature(XENFEAT_auto_translated_physmap))
86 return;
87
84 xen_max_p2m_pfn = PFN_DOWN(start + size); 88 xen_max_p2m_pfn = PFN_DOWN(start + size);
85 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { 89 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
86 unsigned long mfn = pfn_to_mfn(pfn); 90 unsigned long mfn = pfn_to_mfn(pfn);
@@ -103,6 +107,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
103 .domid = DOMID_SELF 107 .domid = DOMID_SELF
104 }; 108 };
105 unsigned long len = 0; 109 unsigned long len = 0;
110 int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
106 unsigned long pfn; 111 unsigned long pfn;
107 int ret; 112 int ret;
108 113
@@ -116,7 +121,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
116 continue; 121 continue;
117 frame = mfn; 122 frame = mfn;
118 } else { 123 } else {
119 if (mfn != INVALID_P2M_ENTRY) 124 if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
120 continue; 125 continue;
121 frame = pfn; 126 frame = pfn;
122 } 127 }
@@ -154,6 +159,13 @@ static unsigned long __init xen_do_chunk(unsigned long start,
154static unsigned long __init xen_release_chunk(unsigned long start, 159static unsigned long __init xen_release_chunk(unsigned long start,
155 unsigned long end) 160 unsigned long end)
156{ 161{
162 /*
163 * Xen already ballooned out the E820 non RAM regions for us
164 * and set them up properly in EPT.
165 */
166 if (xen_feature(XENFEAT_auto_translated_physmap))
167 return end - start;
168
157 return xen_do_chunk(start, end, true); 169 return xen_do_chunk(start, end, true);
158} 170}
159 171
@@ -222,7 +234,13 @@ static void __init xen_set_identity_and_release_chunk(
222 * (except for the ISA region which must be 1:1 mapped) to 234 * (except for the ISA region which must be 1:1 mapped) to
223 * release the refcounts (in Xen) on the original frames. 235 * release the refcounts (in Xen) on the original frames.
224 */ 236 */
225 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { 237
238 /*
239 * PVH E820 matches the hypervisor's P2M which means we need to
240 * account for the proper values of *release and *identity.
241 */
242 for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) &&
243 pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
226 pte_t pte = __pte_ma(0); 244 pte_t pte = __pte_ma(0);
227 245
228 if (pfn < PFN_UP(ISA_END_ADDRESS)) 246 if (pfn < PFN_UP(ISA_END_ADDRESS))
@@ -563,16 +581,13 @@ void xen_enable_nmi(void)
563 BUG(); 581 BUG();
564#endif 582#endif
565} 583}
566void __init xen_arch_setup(void) 584void __init xen_pvmmu_arch_setup(void)
567{ 585{
568 xen_panic_handler_init();
569
570 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 586 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
571 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 587 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
572 588
573 if (!xen_feature(XENFEAT_auto_translated_physmap)) 589 HYPERVISOR_vm_assist(VMASST_CMD_enable,
574 HYPERVISOR_vm_assist(VMASST_CMD_enable, 590 VMASST_TYPE_pae_extended_cr3);
575 VMASST_TYPE_pae_extended_cr3);
576 591
577 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 592 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
578 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 593 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
@@ -581,6 +596,15 @@ void __init xen_arch_setup(void)
581 xen_enable_sysenter(); 596 xen_enable_sysenter();
582 xen_enable_syscall(); 597 xen_enable_syscall();
583 xen_enable_nmi(); 598 xen_enable_nmi();
599}
600
601/* This function is not called for HVM domains */
602void __init xen_arch_setup(void)
603{
604 xen_panic_handler_init();
605 if (!xen_feature(XENFEAT_auto_translated_physmap))
606 xen_pvmmu_arch_setup();
607
584#ifdef CONFIG_ACPI 608#ifdef CONFIG_ACPI
585 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 609 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
586 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 610 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c36b325abd83..a18eadd8bb40 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -73,9 +73,11 @@ static void cpu_bringup(void)
73 touch_softlockup_watchdog(); 73 touch_softlockup_watchdog();
74 preempt_disable(); 74 preempt_disable();
75 75
76 xen_enable_sysenter(); 76 /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
77 xen_enable_syscall(); 77 if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
78 78 xen_enable_sysenter();
79 xen_enable_syscall();
80 }
79 cpu = smp_processor_id(); 81 cpu = smp_processor_id();
80 smp_store_cpu_info(cpu); 82 smp_store_cpu_info(cpu);
81 cpu_data(cpu).x86_max_cores = 1; 83 cpu_data(cpu).x86_max_cores = 1;
@@ -97,8 +99,14 @@ static void cpu_bringup(void)
97 wmb(); /* make sure everything is out */ 99 wmb(); /* make sure everything is out */
98} 100}
99 101
100static void cpu_bringup_and_idle(void) 102/* Note: cpu parameter is only relevant for PVH */
103static void cpu_bringup_and_idle(int cpu)
101{ 104{
105#ifdef CONFIG_X86_64
106 if (xen_feature(XENFEAT_auto_translated_physmap) &&
107 xen_feature(XENFEAT_supervisor_mode_kernel))
108 xen_pvh_secondary_vcpu_init(cpu);
109#endif
102 cpu_bringup(); 110 cpu_bringup();
103 cpu_startup_entry(CPUHP_ONLINE); 111 cpu_startup_entry(CPUHP_ONLINE);
104} 112}
@@ -274,9 +282,10 @@ static void __init xen_smp_prepare_boot_cpu(void)
274 native_smp_prepare_boot_cpu(); 282 native_smp_prepare_boot_cpu();
275 283
276 if (xen_pv_domain()) { 284 if (xen_pv_domain()) {
277 /* We've switched to the "real" per-cpu gdt, so make sure the 285 if (!xen_feature(XENFEAT_writable_page_tables))
278 old memory can be recycled */ 286 /* We've switched to the "real" per-cpu gdt, so make
279 make_lowmem_page_readwrite(xen_initial_gdt); 287 * sure the old memory can be recycled. */
288 make_lowmem_page_readwrite(xen_initial_gdt);
280 289
281#ifdef CONFIG_X86_32 290#ifdef CONFIG_X86_32
282 /* 291 /*
@@ -360,22 +369,21 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
360 369
361 gdt = get_cpu_gdt_table(cpu); 370 gdt = get_cpu_gdt_table(cpu);
362 371
363 ctxt->flags = VGCF_IN_KERNEL;
364 ctxt->user_regs.ss = __KERNEL_DS;
365#ifdef CONFIG_X86_32 372#ifdef CONFIG_X86_32
373 /* Note: PVH is not yet supported on x86_32. */
366 ctxt->user_regs.fs = __KERNEL_PERCPU; 374 ctxt->user_regs.fs = __KERNEL_PERCPU;
367 ctxt->user_regs.gs = __KERNEL_STACK_CANARY; 375 ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
368#else
369 ctxt->gs_base_kernel = per_cpu_offset(cpu);
370#endif 376#endif
371 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 377 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
372 378
373 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 379 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
374 380
375 { 381 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
382 ctxt->flags = VGCF_IN_KERNEL;
376 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ 383 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
377 ctxt->user_regs.ds = __USER_DS; 384 ctxt->user_regs.ds = __USER_DS;
378 ctxt->user_regs.es = __USER_DS; 385 ctxt->user_regs.es = __USER_DS;
386 ctxt->user_regs.ss = __KERNEL_DS;
379 387
380 xen_copy_trap_info(ctxt->trap_ctxt); 388 xen_copy_trap_info(ctxt->trap_ctxt);
381 389
@@ -396,18 +404,27 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
396#ifdef CONFIG_X86_32 404#ifdef CONFIG_X86_32
397 ctxt->event_callback_cs = __KERNEL_CS; 405 ctxt->event_callback_cs = __KERNEL_CS;
398 ctxt->failsafe_callback_cs = __KERNEL_CS; 406 ctxt->failsafe_callback_cs = __KERNEL_CS;
407#else
408 ctxt->gs_base_kernel = per_cpu_offset(cpu);
399#endif 409#endif
400 ctxt->event_callback_eip = 410 ctxt->event_callback_eip =
401 (unsigned long)xen_hypervisor_callback; 411 (unsigned long)xen_hypervisor_callback;
402 ctxt->failsafe_callback_eip = 412 ctxt->failsafe_callback_eip =
403 (unsigned long)xen_failsafe_callback; 413 (unsigned long)xen_failsafe_callback;
414 ctxt->user_regs.cs = __KERNEL_CS;
415 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
416#ifdef CONFIG_X86_32
404 } 417 }
405 ctxt->user_regs.cs = __KERNEL_CS; 418#else
419 } else
420 /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with
421 * %rdi having the cpu number - which means are passing in
422 * as the first parameter the cpu. Subtle!
423 */
424 ctxt->user_regs.rdi = cpu;
425#endif
406 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 426 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
407
408 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
409 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 427 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
410
411 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) 428 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
412 BUG(); 429 BUG();
413 430
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 12a1ca707b94..7b78f88c1707 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -446,6 +446,7 @@ void xen_setup_timer(int cpu)
446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| 446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
447 IRQF_FORCE_RESUME, 447 IRQF_FORCE_RESUME,
448 name, NULL); 448 name, NULL);
449 (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
449 450
450 memcpy(evt, xen_clockevent, sizeof(*evt)); 451 memcpy(evt, xen_clockevent, sizeof(*evt));
451 452
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7faed5869e5b..485b69585540 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -11,8 +11,28 @@
11#include <asm/page_types.h> 11#include <asm/page_types.h>
12 12
13#include <xen/interface/elfnote.h> 13#include <xen/interface/elfnote.h>
14#include <xen/interface/features.h>
14#include <asm/xen/interface.h> 15#include <asm/xen/interface.h>
15 16
17#ifdef CONFIG_XEN_PVH
18#define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel"
19/* Note the lack of 'hvm_callback_vector'. Older hypervisor will
20 * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in
21 * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore.
22 */
23#define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \
24 (1 << XENFEAT_auto_translated_physmap) | \
25 (1 << XENFEAT_supervisor_mode_kernel) | \
26 (1 << XENFEAT_hvm_callback_vector))
27/* The XENFEAT_writable_page_tables is not stricly neccessary as we set that
28 * up regardless whether this CONFIG option is enabled or not, but it
29 * clarifies what the right flags need to be.
30 */
31#else
32#define PVH_FEATURES_STR ""
33#define PVH_FEATURES (0)
34#endif
35
16 __INIT 36 __INIT
17ENTRY(startup_xen) 37ENTRY(startup_xen)
18 cld 38 cld
@@ -95,7 +115,10 @@ NEXT_HYPERCALL(arch_6)
95#endif 115#endif
96 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) 116 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
97 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) 117 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
98 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") 118 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR)
119 ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) |
120 (1 << XENFEAT_writable_page_tables) |
121 (1 << XENFEAT_dom0))
99 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") 122 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
100 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") 123 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
101 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, 124 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 95f8c6142328..1cb6f4c37300 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -123,4 +123,5 @@ __visible void xen_adjust_exception_frame(void);
123 123
124extern int xen_panic_handler_init(void); 124extern int xen_panic_handler_init(void);
125 125
126void xen_pvh_secondary_vcpu_init(int cpu);
126#endif /* XEN_OPS_H */ 127#endif /* XEN_OPS_H */