diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 4 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 126 | ||||
-rw-r--r-- | arch/x86/xen/grant-table.c | 63 | ||||
-rw-r--r-- | arch/x86/xen/irq.c | 5 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 166 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 15 | ||||
-rw-r--r-- | arch/x86/xen/platform-pci-unplug.c | 79 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 40 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 49 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 25 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 1 |
12 files changed, 452 insertions, 122 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 1a3c76505649..01b90261fa38 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -51,3 +51,7 @@ config XEN_DEBUG_FS | |||
51 | Enable statistics output and various tuning options in debugfs. | 51 | Enable statistics output and various tuning options in debugfs. |
52 | Enabling this option may incur a significant performance overhead. | 52 | Enabling this option may incur a significant performance overhead. |
53 | 53 | ||
54 | config XEN_PVH | ||
55 | bool "Support for running as a PVH guest" | ||
56 | depends on X86_64 && XEN && XEN_PVHVM | ||
57 | def_bool n | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index fa6ade76ef3f..a4d7b647867f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -262,8 +262,9 @@ static void __init xen_banner(void) | |||
262 | struct xen_extraversion extra; | 262 | struct xen_extraversion extra; |
263 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); | 263 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); |
264 | 264 | ||
265 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 265 | pr_info("Booting paravirtualized kernel %son %s\n", |
266 | pv_info.name); | 266 | xen_feature(XENFEAT_auto_translated_physmap) ? |
267 | "with PVH extensions " : "", pv_info.name); | ||
267 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", | 268 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", |
268 | version >> 16, version & 0xffff, extra.extraversion, | 269 | version >> 16, version & 0xffff, extra.extraversion, |
269 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | 270 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); |
@@ -433,7 +434,7 @@ static void __init xen_init_cpuid_mask(void) | |||
433 | 434 | ||
434 | ax = 1; | 435 | ax = 1; |
435 | cx = 0; | 436 | cx = 0; |
436 | xen_cpuid(&ax, &bx, &cx, &dx); | 437 | cpuid(1, &ax, &bx, &cx, &dx); |
437 | 438 | ||
438 | xsave_mask = | 439 | xsave_mask = |
439 | (1 << (X86_FEATURE_XSAVE % 32)) | | 440 | (1 << (X86_FEATURE_XSAVE % 32)) | |
@@ -1142,8 +1143,9 @@ void xen_setup_vcpu_info_placement(void) | |||
1142 | xen_vcpu_setup(cpu); | 1143 | xen_vcpu_setup(cpu); |
1143 | 1144 | ||
1144 | /* xen_vcpu_setup managed to place the vcpu_info within the | 1145 | /* xen_vcpu_setup managed to place the vcpu_info within the |
1145 | percpu area for all cpus, so make use of it */ | 1146 | * percpu area for all cpus, so make use of it. Note that for |
1146 | if (have_vcpu_info_placement) { | 1147 | * PVH we want to use native IRQ mechanism. */ |
1148 | if (have_vcpu_info_placement && !xen_pvh_domain()) { | ||
1147 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); | 1149 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); |
1148 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); | 1150 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); |
1149 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); | 1151 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); |
@@ -1407,9 +1409,49 @@ static void __init xen_boot_params_init_edd(void) | |||
1407 | * Set up the GDT and segment registers for -fstack-protector. Until | 1409 | * Set up the GDT and segment registers for -fstack-protector. Until |
1408 | * we do this, we have to be careful not to call any stack-protected | 1410 | * we do this, we have to be careful not to call any stack-protected |
1409 | * function, which is most of the kernel. | 1411 | * function, which is most of the kernel. |
1412 | * | ||
1413 | * Note, that it is __ref because the only caller of this after init | ||
1414 | * is PVH which is not going to use xen_load_gdt_boot or other | ||
1415 | * __init functions. | ||
1410 | */ | 1416 | */ |
1411 | static void __init xen_setup_stackprotector(void) | 1417 | static void __ref xen_setup_gdt(int cpu) |
1412 | { | 1418 | { |
1419 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1420 | #ifdef CONFIG_X86_64 | ||
1421 | unsigned long dummy; | ||
1422 | |||
1423 | load_percpu_segment(cpu); /* We need to access per-cpu area */ | ||
1424 | switch_to_new_gdt(cpu); /* GDT and GS set */ | ||
1425 | |||
1426 | /* We are switching of the Xen provided GDT to our HVM mode | ||
1427 | * GDT. The new GDT has __KERNEL_CS with CS.L = 1 | ||
1428 | * and we are jumping to reload it. | ||
1429 | */ | ||
1430 | asm volatile ("pushq %0\n" | ||
1431 | "leaq 1f(%%rip),%0\n" | ||
1432 | "pushq %0\n" | ||
1433 | "lretq\n" | ||
1434 | "1:\n" | ||
1435 | : "=&r" (dummy) : "0" (__KERNEL_CS)); | ||
1436 | |||
1437 | /* | ||
1438 | * While not needed, we also set the %es, %ds, and %fs | ||
1439 | * to zero. We don't care about %ss as it is NULL. | ||
1440 | * Strictly speaking this is not needed as Xen zeros those | ||
1441 | * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE) | ||
1442 | * | ||
1443 | * Linux zeros them in cpu_init() and in secondary_startup_64 | ||
1444 | * (for BSP). | ||
1445 | */ | ||
1446 | loadsegment(es, 0); | ||
1447 | loadsegment(ds, 0); | ||
1448 | loadsegment(fs, 0); | ||
1449 | #else | ||
1450 | /* PVH: TODO Implement. */ | ||
1451 | BUG(); | ||
1452 | #endif | ||
1453 | return; /* PVH does not need any PV GDT ops. */ | ||
1454 | } | ||
1413 | pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; | 1455 | pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; |
1414 | pv_cpu_ops.load_gdt = xen_load_gdt_boot; | 1456 | pv_cpu_ops.load_gdt = xen_load_gdt_boot; |
1415 | 1457 | ||
@@ -1420,6 +1462,46 @@ static void __init xen_setup_stackprotector(void) | |||
1420 | pv_cpu_ops.load_gdt = xen_load_gdt; | 1462 | pv_cpu_ops.load_gdt = xen_load_gdt; |
1421 | } | 1463 | } |
1422 | 1464 | ||
1465 | /* | ||
1466 | * A PV guest starts with default flags that are not set for PVH, set them | ||
1467 | * here asap. | ||
1468 | */ | ||
1469 | static void xen_pvh_set_cr_flags(int cpu) | ||
1470 | { | ||
1471 | |||
1472 | /* Some of these are setup in 'secondary_startup_64'. The others: | ||
1473 | * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests | ||
1474 | * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */ | ||
1475 | write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM); | ||
1476 | } | ||
1477 | |||
1478 | /* | ||
1479 | * Note, that it is ref - because the only caller of this after init | ||
1480 | * is PVH which is not going to use xen_load_gdt_boot or other | ||
1481 | * __init functions. | ||
1482 | */ | ||
1483 | void __ref xen_pvh_secondary_vcpu_init(int cpu) | ||
1484 | { | ||
1485 | xen_setup_gdt(cpu); | ||
1486 | xen_pvh_set_cr_flags(cpu); | ||
1487 | } | ||
1488 | |||
1489 | static void __init xen_pvh_early_guest_init(void) | ||
1490 | { | ||
1491 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
1492 | return; | ||
1493 | |||
1494 | if (!xen_feature(XENFEAT_hvm_callback_vector)) | ||
1495 | return; | ||
1496 | |||
1497 | xen_have_vector_callback = 1; | ||
1498 | xen_pvh_set_cr_flags(0); | ||
1499 | |||
1500 | #ifdef CONFIG_X86_32 | ||
1501 | BUG(); /* PVH: Implement proper support. */ | ||
1502 | #endif | ||
1503 | } | ||
1504 | |||
1423 | /* First C function to be called on Xen boot */ | 1505 | /* First C function to be called on Xen boot */ |
1424 | asmlinkage void __init xen_start_kernel(void) | 1506 | asmlinkage void __init xen_start_kernel(void) |
1425 | { | 1507 | { |
@@ -1431,13 +1513,16 @@ asmlinkage void __init xen_start_kernel(void) | |||
1431 | 1513 | ||
1432 | xen_domain_type = XEN_PV_DOMAIN; | 1514 | xen_domain_type = XEN_PV_DOMAIN; |
1433 | 1515 | ||
1516 | xen_setup_features(); | ||
1517 | xen_pvh_early_guest_init(); | ||
1434 | xen_setup_machphys_mapping(); | 1518 | xen_setup_machphys_mapping(); |
1435 | 1519 | ||
1436 | /* Install Xen paravirt ops */ | 1520 | /* Install Xen paravirt ops */ |
1437 | pv_info = xen_info; | 1521 | pv_info = xen_info; |
1438 | pv_init_ops = xen_init_ops; | 1522 | pv_init_ops = xen_init_ops; |
1439 | pv_cpu_ops = xen_cpu_ops; | ||
1440 | pv_apic_ops = xen_apic_ops; | 1523 | pv_apic_ops = xen_apic_ops; |
1524 | if (!xen_pvh_domain()) | ||
1525 | pv_cpu_ops = xen_cpu_ops; | ||
1441 | 1526 | ||
1442 | x86_init.resources.memory_setup = xen_memory_setup; | 1527 | x86_init.resources.memory_setup = xen_memory_setup; |
1443 | x86_init.oem.arch_setup = xen_arch_setup; | 1528 | x86_init.oem.arch_setup = xen_arch_setup; |
@@ -1469,17 +1554,14 @@ asmlinkage void __init xen_start_kernel(void) | |||
1469 | /* Work out if we support NX */ | 1554 | /* Work out if we support NX */ |
1470 | x86_configure_nx(); | 1555 | x86_configure_nx(); |
1471 | 1556 | ||
1472 | xen_setup_features(); | ||
1473 | |||
1474 | /* Get mfn list */ | 1557 | /* Get mfn list */ |
1475 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 1558 | xen_build_dynamic_phys_to_machine(); |
1476 | xen_build_dynamic_phys_to_machine(); | ||
1477 | 1559 | ||
1478 | /* | 1560 | /* |
1479 | * Set up kernel GDT and segment registers, mainly so that | 1561 | * Set up kernel GDT and segment registers, mainly so that |
1480 | * -fstack-protector code can be executed. | 1562 | * -fstack-protector code can be executed. |
1481 | */ | 1563 | */ |
1482 | xen_setup_stackprotector(); | 1564 | xen_setup_gdt(0); |
1483 | 1565 | ||
1484 | xen_init_irq_ops(); | 1566 | xen_init_irq_ops(); |
1485 | xen_init_cpuid_mask(); | 1567 | xen_init_cpuid_mask(); |
@@ -1548,14 +1630,18 @@ asmlinkage void __init xen_start_kernel(void) | |||
1548 | /* set the limit of our address space */ | 1630 | /* set the limit of our address space */ |
1549 | xen_reserve_top(); | 1631 | xen_reserve_top(); |
1550 | 1632 | ||
1551 | /* We used to do this in xen_arch_setup, but that is too late on AMD | 1633 | /* PVH: runs at default kernel iopl of 0 */ |
1552 | * were early_cpu_init (run before ->arch_setup()) calls early_amd_init | 1634 | if (!xen_pvh_domain()) { |
1553 | * which pokes 0xcf8 port. | 1635 | /* |
1554 | */ | 1636 | * We used to do this in xen_arch_setup, but that is too late |
1555 | set_iopl.iopl = 1; | 1637 | * on AMD were early_cpu_init (run before ->arch_setup()) calls |
1556 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | 1638 | * early_amd_init which pokes 0xcf8 port. |
1557 | if (rc != 0) | 1639 | */ |
1558 | xen_raw_printk("physdev_op failed %d\n", rc); | 1640 | set_iopl.iopl = 1; |
1641 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
1642 | if (rc != 0) | ||
1643 | xen_raw_printk("physdev_op failed %d\n", rc); | ||
1644 | } | ||
1559 | 1645 | ||
1560 | #ifdef CONFIG_X86_32 | 1646 | #ifdef CONFIG_X86_32 |
1561 | /* set up basic CPUID stuff */ | 1647 | /* set up basic CPUID stuff */ |
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 3a5f55d51907..103c93f874b2 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c | |||
@@ -125,3 +125,66 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) | |||
125 | apply_to_page_range(&init_mm, (unsigned long)shared, | 125 | apply_to_page_range(&init_mm, (unsigned long)shared, |
126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); | 126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); |
127 | } | 127 | } |
128 | #ifdef CONFIG_XEN_PVH | ||
129 | #include <xen/balloon.h> | ||
130 | #include <xen/events.h> | ||
131 | #include <xen/xen.h> | ||
132 | #include <linux/slab.h> | ||
133 | static int __init xlated_setup_gnttab_pages(void) | ||
134 | { | ||
135 | struct page **pages; | ||
136 | xen_pfn_t *pfns; | ||
137 | int rc; | ||
138 | unsigned int i; | ||
139 | unsigned long nr_grant_frames = gnttab_max_grant_frames(); | ||
140 | |||
141 | BUG_ON(nr_grant_frames == 0); | ||
142 | pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL); | ||
143 | if (!pages) | ||
144 | return -ENOMEM; | ||
145 | |||
146 | pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL); | ||
147 | if (!pfns) { | ||
148 | kfree(pages); | ||
149 | return -ENOMEM; | ||
150 | } | ||
151 | rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */); | ||
152 | if (rc) { | ||
153 | pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, | ||
154 | nr_grant_frames, rc); | ||
155 | kfree(pages); | ||
156 | kfree(pfns); | ||
157 | return rc; | ||
158 | } | ||
159 | for (i = 0; i < nr_grant_frames; i++) | ||
160 | pfns[i] = page_to_pfn(pages[i]); | ||
161 | |||
162 | rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames, | ||
163 | &xen_auto_xlat_grant_frames.vaddr); | ||
164 | |||
165 | kfree(pages); | ||
166 | if (rc) { | ||
167 | pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, | ||
168 | nr_grant_frames, rc); | ||
169 | free_xenballooned_pages(nr_grant_frames, pages); | ||
170 | kfree(pfns); | ||
171 | return rc; | ||
172 | } | ||
173 | |||
174 | xen_auto_xlat_grant_frames.pfn = pfns; | ||
175 | xen_auto_xlat_grant_frames.count = nr_grant_frames; | ||
176 | |||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | static int __init xen_pvh_gnttab_setup(void) | ||
181 | { | ||
182 | if (!xen_pvh_domain()) | ||
183 | return -ENODEV; | ||
184 | |||
185 | return xlated_setup_gnttab_pages(); | ||
186 | } | ||
187 | /* Call it _before_ __gnttab_init as we need to initialize the | ||
188 | * xen_auto_xlat_grant_frames first. */ | ||
189 | core_initcall(xen_pvh_gnttab_setup); | ||
190 | #endif | ||
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 0da7f863056f..76ca326105f7 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <xen/interface/xen.h> | 5 | #include <xen/interface/xen.h> |
6 | #include <xen/interface/sched.h> | 6 | #include <xen/interface/sched.h> |
7 | #include <xen/interface/vcpu.h> | 7 | #include <xen/interface/vcpu.h> |
8 | #include <xen/features.h> | ||
8 | #include <xen/events.h> | 9 | #include <xen/events.h> |
9 | 10 | ||
10 | #include <asm/xen/hypercall.h> | 11 | #include <asm/xen/hypercall.h> |
@@ -128,6 +129,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { | |||
128 | 129 | ||
129 | void __init xen_init_irq_ops(void) | 130 | void __init xen_init_irq_ops(void) |
130 | { | 131 | { |
131 | pv_irq_ops = xen_irq_ops; | 132 | /* For PVH we use default pv_irq_ops settings. */ |
133 | if (!xen_feature(XENFEAT_hvm_callback_vector)) | ||
134 | pv_irq_ops = xen_irq_ops; | ||
132 | x86_init.irqs.intr_init = xen_init_IRQ; | 135 | x86_init.irqs.intr_init = xen_init_IRQ; |
133 | } | 136 | } |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ce563be09cc1..c1d406f35523 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1198,44 +1198,40 @@ static void __init xen_cleanhighmap(unsigned long vaddr, | |||
1198 | * instead of somewhere later and be confusing. */ | 1198 | * instead of somewhere later and be confusing. */ |
1199 | xen_mc_flush(); | 1199 | xen_mc_flush(); |
1200 | } | 1200 | } |
1201 | #endif | 1201 | static void __init xen_pagetable_p2m_copy(void) |
1202 | static void __init xen_pagetable_init(void) | ||
1203 | { | 1202 | { |
1204 | #ifdef CONFIG_X86_64 | ||
1205 | unsigned long size; | 1203 | unsigned long size; |
1206 | unsigned long addr; | 1204 | unsigned long addr; |
1207 | #endif | 1205 | unsigned long new_mfn_list; |
1208 | paging_init(); | 1206 | |
1209 | xen_setup_shared_info(); | 1207 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
1210 | #ifdef CONFIG_X86_64 | 1208 | return; |
1211 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 1209 | |
1212 | unsigned long new_mfn_list; | 1210 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1213 | 1211 | ||
1214 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1212 | new_mfn_list = xen_revector_p2m_tree(); |
1215 | 1213 | /* No memory or already called. */ | |
1216 | /* On 32-bit, we get zero so this never gets executed. */ | 1214 | if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) |
1217 | new_mfn_list = xen_revector_p2m_tree(); | 1215 | return; |
1218 | if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { | 1216 | |
1219 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | 1217 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ |
1220 | memset((void *)xen_start_info->mfn_list, 0xff, size); | 1218 | memset((void *)xen_start_info->mfn_list, 0xff, size); |
1221 | 1219 | ||
1222 | /* We should be in __ka space. */ | 1220 | /* We should be in __ka space. */ |
1223 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); | 1221 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); |
1224 | addr = xen_start_info->mfn_list; | 1222 | addr = xen_start_info->mfn_list; |
1225 | /* We roundup to the PMD, which means that if anybody at this stage is | 1223 | /* We roundup to the PMD, which means that if anybody at this stage is |
1226 | * using the __ka address of xen_start_info or xen_start_info->shared_info | 1224 | * using the __ka address of xen_start_info or xen_start_info->shared_info |
1227 | * they are in going to crash. Fortunatly we have already revectored | 1225 | * they are in going to crash. Fortunatly we have already revectored |
1228 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ | 1226 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ |
1229 | size = roundup(size, PMD_SIZE); | 1227 | size = roundup(size, PMD_SIZE); |
1230 | xen_cleanhighmap(addr, addr + size); | 1228 | xen_cleanhighmap(addr, addr + size); |
1231 | 1229 | ||
1232 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1230 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1233 | memblock_free(__pa(xen_start_info->mfn_list), size); | 1231 | memblock_free(__pa(xen_start_info->mfn_list), size); |
1234 | /* And revector! Bye bye old array */ | 1232 | /* And revector! Bye bye old array */ |
1235 | xen_start_info->mfn_list = new_mfn_list; | 1233 | xen_start_info->mfn_list = new_mfn_list; |
1236 | } else | 1234 | |
1237 | goto skip; | ||
1238 | } | ||
1239 | /* At this stage, cleanup_highmap has already cleaned __ka space | 1235 | /* At this stage, cleanup_highmap has already cleaned __ka space |
1240 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | 1236 | * from _brk_limit way up to the max_pfn_mapped (which is the end of |
1241 | * the ramdisk). We continue on, erasing PMD entries that point to page | 1237 | * the ramdisk). We continue on, erasing PMD entries that point to page |
@@ -1255,7 +1251,15 @@ static void __init xen_pagetable_init(void) | |||
1255 | * anything at this stage. */ | 1251 | * anything at this stage. */ |
1256 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); | 1252 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); |
1257 | #endif | 1253 | #endif |
1258 | skip: | 1254 | } |
1255 | #endif | ||
1256 | |||
1257 | static void __init xen_pagetable_init(void) | ||
1258 | { | ||
1259 | paging_init(); | ||
1260 | xen_setup_shared_info(); | ||
1261 | #ifdef CONFIG_X86_64 | ||
1262 | xen_pagetable_p2m_copy(); | ||
1259 | #endif | 1263 | #endif |
1260 | xen_post_allocator_init(); | 1264 | xen_post_allocator_init(); |
1261 | } | 1265 | } |
@@ -1753,6 +1757,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) | |||
1753 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | 1757 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; |
1754 | pte_t pte = pfn_pte(pfn, prot); | 1758 | pte_t pte = pfn_pte(pfn, prot); |
1755 | 1759 | ||
1760 | /* For PVH no need to set R/O or R/W to pin them or unpin them. */ | ||
1761 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
1762 | return; | ||
1763 | |||
1756 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) | 1764 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) |
1757 | BUG(); | 1765 | BUG(); |
1758 | } | 1766 | } |
@@ -1863,6 +1871,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, | |||
1863 | * but that's enough to get __va working. We need to fill in the rest | 1871 | * but that's enough to get __va working. We need to fill in the rest |
1864 | * of the physical mapping once some sort of allocator has been set | 1872 | * of the physical mapping once some sort of allocator has been set |
1865 | * up. | 1873 | * up. |
1874 | * NOTE: for PVH, the page tables are native. | ||
1866 | */ | 1875 | */ |
1867 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1876 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
1868 | { | 1877 | { |
@@ -1884,17 +1893,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1884 | /* Zap identity mapping */ | 1893 | /* Zap identity mapping */ |
1885 | init_level4_pgt[0] = __pgd(0); | 1894 | init_level4_pgt[0] = __pgd(0); |
1886 | 1895 | ||
1887 | /* Pre-constructed entries are in pfn, so convert to mfn */ | 1896 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
1888 | /* L4[272] -> level3_ident_pgt | 1897 | /* Pre-constructed entries are in pfn, so convert to mfn */ |
1889 | * L4[511] -> level3_kernel_pgt */ | 1898 | /* L4[272] -> level3_ident_pgt |
1890 | convert_pfn_mfn(init_level4_pgt); | 1899 | * L4[511] -> level3_kernel_pgt */ |
1891 | 1900 | convert_pfn_mfn(init_level4_pgt); | |
1892 | /* L3_i[0] -> level2_ident_pgt */ | 1901 | |
1893 | convert_pfn_mfn(level3_ident_pgt); | 1902 | /* L3_i[0] -> level2_ident_pgt */ |
1894 | /* L3_k[510] -> level2_kernel_pgt | 1903 | convert_pfn_mfn(level3_ident_pgt); |
1895 | * L3_i[511] -> level2_fixmap_pgt */ | 1904 | /* L3_k[510] -> level2_kernel_pgt |
1896 | convert_pfn_mfn(level3_kernel_pgt); | 1905 | * L3_i[511] -> level2_fixmap_pgt */ |
1897 | 1906 | convert_pfn_mfn(level3_kernel_pgt); | |
1907 | } | ||
1898 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ | 1908 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ |
1899 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | 1909 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); |
1900 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | 1910 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); |
@@ -1918,31 +1928,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1918 | copy_page(level2_fixmap_pgt, l2); | 1928 | copy_page(level2_fixmap_pgt, l2); |
1919 | /* Note that we don't do anything with level1_fixmap_pgt which | 1929 | /* Note that we don't do anything with level1_fixmap_pgt which |
1920 | * we don't need. */ | 1930 | * we don't need. */ |
1931 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1932 | /* Make pagetable pieces RO */ | ||
1933 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | ||
1934 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | ||
1935 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | ||
1936 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | ||
1937 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | ||
1938 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1939 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | ||
1940 | |||
1941 | /* Pin down new L4 */ | ||
1942 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1943 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1944 | |||
1945 | /* Unpin Xen-provided one */ | ||
1946 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1921 | 1947 | ||
1922 | /* Make pagetable pieces RO */ | 1948 | /* |
1923 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | 1949 | * At this stage there can be no user pgd, and no page |
1924 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | 1950 | * structure to attach it to, so make sure we just set kernel |
1925 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | 1951 | * pgd. |
1926 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | 1952 | */ |
1927 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | 1953 | xen_mc_batch(); |
1928 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1954 | __xen_write_cr3(true, __pa(init_level4_pgt)); |
1929 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | 1955 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1930 | 1956 | } else | |
1931 | /* Pin down new L4 */ | 1957 | native_write_cr3(__pa(init_level4_pgt)); |
1932 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1933 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1934 | |||
1935 | /* Unpin Xen-provided one */ | ||
1936 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1937 | |||
1938 | /* | ||
1939 | * At this stage there can be no user pgd, and no page | ||
1940 | * structure to attach it to, so make sure we just set kernel | ||
1941 | * pgd. | ||
1942 | */ | ||
1943 | xen_mc_batch(); | ||
1944 | __xen_write_cr3(true, __pa(init_level4_pgt)); | ||
1945 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
1946 | 1958 | ||
1947 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are | 1959 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are |
1948 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for | 1960 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for |
@@ -2103,6 +2115,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
2103 | 2115 | ||
2104 | static void __init xen_post_allocator_init(void) | 2116 | static void __init xen_post_allocator_init(void) |
2105 | { | 2117 | { |
2118 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
2119 | return; | ||
2120 | |||
2106 | pv_mmu_ops.set_pte = xen_set_pte; | 2121 | pv_mmu_ops.set_pte = xen_set_pte; |
2107 | pv_mmu_ops.set_pmd = xen_set_pmd; | 2122 | pv_mmu_ops.set_pmd = xen_set_pmd; |
2108 | pv_mmu_ops.set_pud = xen_set_pud; | 2123 | pv_mmu_ops.set_pud = xen_set_pud; |
@@ -2207,6 +2222,15 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2207 | void __init xen_init_mmu_ops(void) | 2222 | void __init xen_init_mmu_ops(void) |
2208 | { | 2223 | { |
2209 | x86_init.paging.pagetable_init = xen_pagetable_init; | 2224 | x86_init.paging.pagetable_init = xen_pagetable_init; |
2225 | |||
2226 | /* Optimization - we can use the HVM one but it has no idea which | ||
2227 | * VCPUs are descheduled - which means that it will needlessly IPI | ||
2228 | * them. Xen knows so let it do the job. | ||
2229 | */ | ||
2230 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
2231 | pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; | ||
2232 | return; | ||
2233 | } | ||
2210 | pv_mmu_ops = xen_mmu_ops; | 2234 | pv_mmu_ops = xen_mmu_ops; |
2211 | 2235 | ||
2212 | memset(dummy_mapping, 0xff, PAGE_SIZE); | 2236 | memset(dummy_mapping, 0xff, PAGE_SIZE); |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 2ae8699e8767..696c694986d0 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -280,6 +280,9 @@ void __ref xen_build_mfn_list_list(void) | |||
280 | { | 280 | { |
281 | unsigned long pfn; | 281 | unsigned long pfn; |
282 | 282 | ||
283 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
284 | return; | ||
285 | |||
283 | /* Pre-initialize p2m_top_mfn to be completely missing */ | 286 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
284 | if (p2m_top_mfn == NULL) { | 287 | if (p2m_top_mfn == NULL) { |
285 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | 288 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); |
@@ -336,6 +339,9 @@ void __ref xen_build_mfn_list_list(void) | |||
336 | 339 | ||
337 | void xen_setup_mfn_list_list(void) | 340 | void xen_setup_mfn_list_list(void) |
338 | { | 341 | { |
342 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
343 | return; | ||
344 | |||
339 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | 345 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
340 | 346 | ||
341 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | 347 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = |
@@ -346,10 +352,15 @@ void xen_setup_mfn_list_list(void) | |||
346 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | 352 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ |
347 | void __init xen_build_dynamic_phys_to_machine(void) | 353 | void __init xen_build_dynamic_phys_to_machine(void) |
348 | { | 354 | { |
349 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | 355 | unsigned long *mfn_list; |
350 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | 356 | unsigned long max_pfn; |
351 | unsigned long pfn; | 357 | unsigned long pfn; |
352 | 358 | ||
359 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
360 | return; | ||
361 | |||
362 | mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
363 | max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
353 | xen_max_p2m_pfn = max_pfn; | 364 | xen_max_p2m_pfn = max_pfn; |
354 | 365 | ||
355 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | 366 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); |
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 0a7852483ffe..a8261716d58d 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c | |||
@@ -30,10 +30,9 @@ | |||
30 | #define XEN_PLATFORM_ERR_PROTOCOL -2 | 30 | #define XEN_PLATFORM_ERR_PROTOCOL -2 |
31 | #define XEN_PLATFORM_ERR_BLACKLIST -3 | 31 | #define XEN_PLATFORM_ERR_BLACKLIST -3 |
32 | 32 | ||
33 | /* store the value of xen_emul_unplug after the unplug is done */ | ||
34 | int xen_platform_pci_unplug; | ||
35 | EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); | ||
36 | #ifdef CONFIG_XEN_PVHVM | 33 | #ifdef CONFIG_XEN_PVHVM |
34 | /* store the value of xen_emul_unplug after the unplug is done */ | ||
35 | static int xen_platform_pci_unplug; | ||
37 | static int xen_emul_unplug; | 36 | static int xen_emul_unplug; |
38 | 37 | ||
39 | static int check_platform_magic(void) | 38 | static int check_platform_magic(void) |
@@ -69,6 +68,80 @@ static int check_platform_magic(void) | |||
69 | return 0; | 68 | return 0; |
70 | } | 69 | } |
71 | 70 | ||
71 | bool xen_has_pv_devices() | ||
72 | { | ||
73 | if (!xen_domain()) | ||
74 | return false; | ||
75 | |||
76 | /* PV domains always have them. */ | ||
77 | if (xen_pv_domain()) | ||
78 | return true; | ||
79 | |||
80 | /* And user has xen_platform_pci=0 set in guest config as | ||
81 | * driver did not modify the value. */ | ||
82 | if (xen_platform_pci_unplug == 0) | ||
83 | return false; | ||
84 | |||
85 | if (xen_platform_pci_unplug & XEN_UNPLUG_NEVER) | ||
86 | return false; | ||
87 | |||
88 | if (xen_platform_pci_unplug & XEN_UNPLUG_ALL) | ||
89 | return true; | ||
90 | |||
91 | /* This is an odd one - we are going to run legacy | ||
92 | * and PV drivers at the same time. */ | ||
93 | if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) | ||
94 | return true; | ||
95 | |||
96 | /* And the caller has to follow with xen_pv_{disk,nic}_devices | ||
97 | * to be certain which driver can load. */ | ||
98 | return false; | ||
99 | } | ||
100 | EXPORT_SYMBOL_GPL(xen_has_pv_devices); | ||
101 | |||
102 | static bool __xen_has_pv_device(int state) | ||
103 | { | ||
104 | /* HVM domains might or might not */ | ||
105 | if (xen_hvm_domain() && (xen_platform_pci_unplug & state)) | ||
106 | return true; | ||
107 | |||
108 | return xen_has_pv_devices(); | ||
109 | } | ||
110 | |||
111 | bool xen_has_pv_nic_devices(void) | ||
112 | { | ||
113 | return __xen_has_pv_device(XEN_UNPLUG_ALL_NICS | XEN_UNPLUG_ALL); | ||
114 | } | ||
115 | EXPORT_SYMBOL_GPL(xen_has_pv_nic_devices); | ||
116 | |||
117 | bool xen_has_pv_disk_devices(void) | ||
118 | { | ||
119 | return __xen_has_pv_device(XEN_UNPLUG_ALL_IDE_DISKS | | ||
120 | XEN_UNPLUG_AUX_IDE_DISKS | XEN_UNPLUG_ALL); | ||
121 | } | ||
122 | EXPORT_SYMBOL_GPL(xen_has_pv_disk_devices); | ||
123 | |||
124 | /* | ||
125 | * This one is odd - it determines whether you want to run PV _and_ | ||
126 | * legacy (IDE) drivers together. This combination is only possible | ||
127 | * under HVM. | ||
128 | */ | ||
129 | bool xen_has_pv_and_legacy_disk_devices(void) | ||
130 | { | ||
131 | if (!xen_domain()) | ||
132 | return false; | ||
133 | |||
134 | /* N.B. This is only ever used in HVM mode */ | ||
135 | if (xen_pv_domain()) | ||
136 | return false; | ||
137 | |||
138 | if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) | ||
139 | return true; | ||
140 | |||
141 | return false; | ||
142 | } | ||
143 | EXPORT_SYMBOL_GPL(xen_has_pv_and_legacy_disk_devices); | ||
144 | |||
72 | void xen_unplug_emulated_devices(void) | 145 | void xen_unplug_emulated_devices(void) |
73 | { | 146 | { |
74 | int r; | 147 | int r; |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 68c054f59de6..dd5f905e33d5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <xen/interface/memory.h> | 27 | #include <xen/interface/memory.h> |
28 | #include <xen/interface/physdev.h> | 28 | #include <xen/interface/physdev.h> |
29 | #include <xen/features.h> | 29 | #include <xen/features.h> |
30 | #include "mmu.h" | ||
30 | #include "xen-ops.h" | 31 | #include "xen-ops.h" |
31 | #include "vdso.h" | 32 | #include "vdso.h" |
32 | 33 | ||
@@ -81,6 +82,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size) | |||
81 | 82 | ||
82 | memblock_reserve(start, size); | 83 | memblock_reserve(start, size); |
83 | 84 | ||
85 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
86 | return; | ||
87 | |||
84 | xen_max_p2m_pfn = PFN_DOWN(start + size); | 88 | xen_max_p2m_pfn = PFN_DOWN(start + size); |
85 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { | 89 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { |
86 | unsigned long mfn = pfn_to_mfn(pfn); | 90 | unsigned long mfn = pfn_to_mfn(pfn); |
@@ -103,6 +107,7 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
103 | .domid = DOMID_SELF | 107 | .domid = DOMID_SELF |
104 | }; | 108 | }; |
105 | unsigned long len = 0; | 109 | unsigned long len = 0; |
110 | int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap); | ||
106 | unsigned long pfn; | 111 | unsigned long pfn; |
107 | int ret; | 112 | int ret; |
108 | 113 | ||
@@ -116,7 +121,7 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
116 | continue; | 121 | continue; |
117 | frame = mfn; | 122 | frame = mfn; |
118 | } else { | 123 | } else { |
119 | if (mfn != INVALID_P2M_ENTRY) | 124 | if (!xlated_phys && mfn != INVALID_P2M_ENTRY) |
120 | continue; | 125 | continue; |
121 | frame = pfn; | 126 | frame = pfn; |
122 | } | 127 | } |
@@ -154,6 +159,13 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
154 | static unsigned long __init xen_release_chunk(unsigned long start, | 159 | static unsigned long __init xen_release_chunk(unsigned long start, |
155 | unsigned long end) | 160 | unsigned long end) |
156 | { | 161 | { |
162 | /* | ||
163 | * Xen already ballooned out the E820 non RAM regions for us | ||
164 | * and set them up properly in EPT. | ||
165 | */ | ||
166 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
167 | return end - start; | ||
168 | |||
157 | return xen_do_chunk(start, end, true); | 169 | return xen_do_chunk(start, end, true); |
158 | } | 170 | } |
159 | 171 | ||
@@ -222,7 +234,13 @@ static void __init xen_set_identity_and_release_chunk( | |||
222 | * (except for the ISA region which must be 1:1 mapped) to | 234 | * (except for the ISA region which must be 1:1 mapped) to |
223 | * release the refcounts (in Xen) on the original frames. | 235 | * release the refcounts (in Xen) on the original frames. |
224 | */ | 236 | */ |
225 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { | 237 | |
238 | /* | ||
239 | * PVH E820 matches the hypervisor's P2M which means we need to | ||
240 | * account for the proper values of *release and *identity. | ||
241 | */ | ||
242 | for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) && | ||
243 | pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { | ||
226 | pte_t pte = __pte_ma(0); | 244 | pte_t pte = __pte_ma(0); |
227 | 245 | ||
228 | if (pfn < PFN_UP(ISA_END_ADDRESS)) | 246 | if (pfn < PFN_UP(ISA_END_ADDRESS)) |
@@ -563,16 +581,13 @@ void xen_enable_nmi(void) | |||
563 | BUG(); | 581 | BUG(); |
564 | #endif | 582 | #endif |
565 | } | 583 | } |
566 | void __init xen_arch_setup(void) | 584 | void __init xen_pvmmu_arch_setup(void) |
567 | { | 585 | { |
568 | xen_panic_handler_init(); | ||
569 | |||
570 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); | 586 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); |
571 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); | 587 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); |
572 | 588 | ||
573 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 589 | HYPERVISOR_vm_assist(VMASST_CMD_enable, |
574 | HYPERVISOR_vm_assist(VMASST_CMD_enable, | 590 | VMASST_TYPE_pae_extended_cr3); |
575 | VMASST_TYPE_pae_extended_cr3); | ||
576 | 591 | ||
577 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || | 592 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || |
578 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) | 593 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) |
@@ -581,6 +596,15 @@ void __init xen_arch_setup(void) | |||
581 | xen_enable_sysenter(); | 596 | xen_enable_sysenter(); |
582 | xen_enable_syscall(); | 597 | xen_enable_syscall(); |
583 | xen_enable_nmi(); | 598 | xen_enable_nmi(); |
599 | } | ||
600 | |||
601 | /* This function is not called for HVM domains */ | ||
602 | void __init xen_arch_setup(void) | ||
603 | { | ||
604 | xen_panic_handler_init(); | ||
605 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
606 | xen_pvmmu_arch_setup(); | ||
607 | |||
584 | #ifdef CONFIG_ACPI | 608 | #ifdef CONFIG_ACPI |
585 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | 609 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
586 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | 610 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c36b325abd83..a18eadd8bb40 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -73,9 +73,11 @@ static void cpu_bringup(void) | |||
73 | touch_softlockup_watchdog(); | 73 | touch_softlockup_watchdog(); |
74 | preempt_disable(); | 74 | preempt_disable(); |
75 | 75 | ||
76 | xen_enable_sysenter(); | 76 | /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */ |
77 | xen_enable_syscall(); | 77 | if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { |
78 | 78 | xen_enable_sysenter(); | |
79 | xen_enable_syscall(); | ||
80 | } | ||
79 | cpu = smp_processor_id(); | 81 | cpu = smp_processor_id(); |
80 | smp_store_cpu_info(cpu); | 82 | smp_store_cpu_info(cpu); |
81 | cpu_data(cpu).x86_max_cores = 1; | 83 | cpu_data(cpu).x86_max_cores = 1; |
@@ -97,8 +99,14 @@ static void cpu_bringup(void) | |||
97 | wmb(); /* make sure everything is out */ | 99 | wmb(); /* make sure everything is out */ |
98 | } | 100 | } |
99 | 101 | ||
100 | static void cpu_bringup_and_idle(void) | 102 | /* Note: cpu parameter is only relevant for PVH */ |
103 | static void cpu_bringup_and_idle(int cpu) | ||
101 | { | 104 | { |
105 | #ifdef CONFIG_X86_64 | ||
106 | if (xen_feature(XENFEAT_auto_translated_physmap) && | ||
107 | xen_feature(XENFEAT_supervisor_mode_kernel)) | ||
108 | xen_pvh_secondary_vcpu_init(cpu); | ||
109 | #endif | ||
102 | cpu_bringup(); | 110 | cpu_bringup(); |
103 | cpu_startup_entry(CPUHP_ONLINE); | 111 | cpu_startup_entry(CPUHP_ONLINE); |
104 | } | 112 | } |
@@ -274,9 +282,10 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
274 | native_smp_prepare_boot_cpu(); | 282 | native_smp_prepare_boot_cpu(); |
275 | 283 | ||
276 | if (xen_pv_domain()) { | 284 | if (xen_pv_domain()) { |
277 | /* We've switched to the "real" per-cpu gdt, so make sure the | 285 | if (!xen_feature(XENFEAT_writable_page_tables)) |
278 | old memory can be recycled */ | 286 | /* We've switched to the "real" per-cpu gdt, so make |
279 | make_lowmem_page_readwrite(xen_initial_gdt); | 287 | * sure the old memory can be recycled. */ |
288 | make_lowmem_page_readwrite(xen_initial_gdt); | ||
280 | 289 | ||
281 | #ifdef CONFIG_X86_32 | 290 | #ifdef CONFIG_X86_32 |
282 | /* | 291 | /* |
@@ -360,22 +369,21 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
360 | 369 | ||
361 | gdt = get_cpu_gdt_table(cpu); | 370 | gdt = get_cpu_gdt_table(cpu); |
362 | 371 | ||
363 | ctxt->flags = VGCF_IN_KERNEL; | ||
364 | ctxt->user_regs.ss = __KERNEL_DS; | ||
365 | #ifdef CONFIG_X86_32 | 372 | #ifdef CONFIG_X86_32 |
373 | /* Note: PVH is not yet supported on x86_32. */ | ||
366 | ctxt->user_regs.fs = __KERNEL_PERCPU; | 374 | ctxt->user_regs.fs = __KERNEL_PERCPU; |
367 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; | 375 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; |
368 | #else | ||
369 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | ||
370 | #endif | 376 | #endif |
371 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | 377 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; |
372 | 378 | ||
373 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); | 379 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); |
374 | 380 | ||
375 | { | 381 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
382 | ctxt->flags = VGCF_IN_KERNEL; | ||
376 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | 383 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ |
377 | ctxt->user_regs.ds = __USER_DS; | 384 | ctxt->user_regs.ds = __USER_DS; |
378 | ctxt->user_regs.es = __USER_DS; | 385 | ctxt->user_regs.es = __USER_DS; |
386 | ctxt->user_regs.ss = __KERNEL_DS; | ||
379 | 387 | ||
380 | xen_copy_trap_info(ctxt->trap_ctxt); | 388 | xen_copy_trap_info(ctxt->trap_ctxt); |
381 | 389 | ||
@@ -396,18 +404,27 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
396 | #ifdef CONFIG_X86_32 | 404 | #ifdef CONFIG_X86_32 |
397 | ctxt->event_callback_cs = __KERNEL_CS; | 405 | ctxt->event_callback_cs = __KERNEL_CS; |
398 | ctxt->failsafe_callback_cs = __KERNEL_CS; | 406 | ctxt->failsafe_callback_cs = __KERNEL_CS; |
407 | #else | ||
408 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | ||
399 | #endif | 409 | #endif |
400 | ctxt->event_callback_eip = | 410 | ctxt->event_callback_eip = |
401 | (unsigned long)xen_hypervisor_callback; | 411 | (unsigned long)xen_hypervisor_callback; |
402 | ctxt->failsafe_callback_eip = | 412 | ctxt->failsafe_callback_eip = |
403 | (unsigned long)xen_failsafe_callback; | 413 | (unsigned long)xen_failsafe_callback; |
414 | ctxt->user_regs.cs = __KERNEL_CS; | ||
415 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
416 | #ifdef CONFIG_X86_32 | ||
404 | } | 417 | } |
405 | ctxt->user_regs.cs = __KERNEL_CS; | 418 | #else |
419 | } else | ||
420 | /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with | ||
421 | * %rdi having the cpu number - which means are passing in | ||
422 | * as the first parameter the cpu. Subtle! | ||
423 | */ | ||
424 | ctxt->user_regs.rdi = cpu; | ||
425 | #endif | ||
406 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); | 426 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); |
407 | |||
408 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
409 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | 427 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); |
410 | |||
411 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) | 428 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) |
412 | BUG(); | 429 | BUG(); |
413 | 430 | ||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 12a1ca707b94..7b78f88c1707 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -446,6 +446,7 @@ void xen_setup_timer(int cpu) | |||
446 | IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| | 446 | IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| |
447 | IRQF_FORCE_RESUME, | 447 | IRQF_FORCE_RESUME, |
448 | name, NULL); | 448 | name, NULL); |
449 | (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); | ||
449 | 450 | ||
450 | memcpy(evt, xen_clockevent, sizeof(*evt)); | 451 | memcpy(evt, xen_clockevent, sizeof(*evt)); |
451 | 452 | ||
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7faed5869e5b..485b69585540 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -11,8 +11,28 @@ | |||
11 | #include <asm/page_types.h> | 11 | #include <asm/page_types.h> |
12 | 12 | ||
13 | #include <xen/interface/elfnote.h> | 13 | #include <xen/interface/elfnote.h> |
14 | #include <xen/interface/features.h> | ||
14 | #include <asm/xen/interface.h> | 15 | #include <asm/xen/interface.h> |
15 | 16 | ||
17 | #ifdef CONFIG_XEN_PVH | ||
18 | #define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel" | ||
19 | /* Note the lack of 'hvm_callback_vector'. Older hypervisor will | ||
20 | * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in | ||
21 | * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore. | ||
22 | */ | ||
23 | #define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \ | ||
24 | (1 << XENFEAT_auto_translated_physmap) | \ | ||
25 | (1 << XENFEAT_supervisor_mode_kernel) | \ | ||
26 | (1 << XENFEAT_hvm_callback_vector)) | ||
27 | /* The XENFEAT_writable_page_tables is not stricly neccessary as we set that | ||
28 | * up regardless whether this CONFIG option is enabled or not, but it | ||
29 | * clarifies what the right flags need to be. | ||
30 | */ | ||
31 | #else | ||
32 | #define PVH_FEATURES_STR "" | ||
33 | #define PVH_FEATURES (0) | ||
34 | #endif | ||
35 | |||
16 | __INIT | 36 | __INIT |
17 | ENTRY(startup_xen) | 37 | ENTRY(startup_xen) |
18 | cld | 38 | cld |
@@ -95,7 +115,10 @@ NEXT_HYPERCALL(arch_6) | |||
95 | #endif | 115 | #endif |
96 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) | 116 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) |
97 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) | 117 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) |
98 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 118 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR) |
119 | ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) | | ||
120 | (1 << XENFEAT_writable_page_tables) | | ||
121 | (1 << XENFEAT_dom0)) | ||
99 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 122 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
100 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 123 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
101 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, | 124 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 95f8c6142328..1cb6f4c37300 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -123,4 +123,5 @@ __visible void xen_adjust_exception_frame(void); | |||
123 | 123 | ||
124 | extern int xen_panic_handler_init(void); | 124 | extern int xen_panic_handler_init(void); |
125 | 125 | ||
126 | void xen_pvh_secondary_vcpu_init(int cpu); | ||
126 | #endif /* XEN_OPS_H */ | 127 | #endif /* XEN_OPS_H */ |