aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 01:00:18 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 01:00:18 -0500
commit84621c9b18d0bb6cb267e3395c7f3131ecf4d39c (patch)
tree28566fe0211798143136b5cd154e2239d38a7b68
parent7ebd3faa9b5b42caf2d5aa1352a93dcfa0098011 (diff)
parentc9f6e9977e38de15da96b732a8dec0ef56cbf977 (diff)
Merge tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull Xen updates from Konrad Rzeszutek Wilk: "Two major features that Xen community is excited about: The first is event channel scalability by David Vrabel - we switch over from an two-level per-cpu bitmap of events (IRQs) - to an FIFO queue with priorities. This lets us be able to handle more events, have lower latency, and better scalability. Good stuff. The other is PVH by Mukesh Rathor. In short, PV is a mode where the kernel lets the hypervisor program page-tables, segments, etc. With EPT/NPT capabilities in current processors, the overhead of doing this in an HVM (Hardware Virtual Machine) container is much lower than the hypervisor doing it for us. In short we let a PV guest run without doing page-table, segment, syscall, etc updates through the hypervisor - instead it is all done within the guest container. It is a "hybrid" PV - hence the 'PVH' name - a PV guest within an HVM container. The major benefits are less code to deal with - for example we only use one function from the the pv_mmu_ops (which has 39 function calls); faster performance for syscall (no context switches into the hypervisor); less traps on various operations; etc. It is still being baked - the ABI is not yet set in stone. But it is pretty awesome and we are excited about it. Lastly, there are some changes to ARM code - you should get a simple conflict which has been resolved in #linux-next. In short, this pull has awesome features. Features: - FIFO event channels. Key advantages: support for over 100,000 events (2^17), 16 different event priorities, improved fairness in event latency through the use of FIFOs. - Xen PVH support. "It’s a fully PV kernel mode, running with paravirtualized disk and network, paravirtualized interrupts and timers, no emulated devices of any kind (and thus no qemu), no BIOS or legacy boot — but instead of requiring PV MMU, it uses the HVM hardware extensions to virtualize the pagetables, as well as system calls and other privileged operations." (from "The Paravirtualization Spectrum, Part 2: From poles to a spectrum") Bug-fixes: - Fixes in balloon driver (refactor and make it work under ARM) - Allow xenfb to be used in HVM guests. - Allow xen_platform_pci=0 to work properly. - Refactors in event channels" * tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (52 commits) xen/pvh: Set X86_CR0_WP and others in CR0 (v2) MAINTAINERS: add git repository for Xen xen/pvh: Use 'depend' instead of 'select'. xen: delete new instances of __cpuinit usage xen/fb: allow xenfb initialization for hvm guests xen/evtchn_fifo: fix error return code in evtchn_fifo_setup() xen-platform: fix error return code in platform_pci_init() xen/pvh: remove duplicated include from enlighten.c xen/pvh: Fix compile issues with xen_pvh_domain() xen: Use dev_is_pci() to check whether it is pci device xen/grant-table: Force to use v1 of grants. xen/pvh: Support ParaVirtualized Hardware extensions (v3). xen/pvh: Piggyback on PVHVM XenBus. xen/pvh: Piggyback on PVHVM for grant driver (v4) xen/grant: Implement an grant frame array struct (v3). xen/grant-table: Refactor gnttab_init xen/grants: Remove gnttab_max_grant_frames dependency on gnttab_init. xen/pvh: Piggyback on PVHVM for event channels (v2) xen/pvh: Update E820 to work with PVH (v2) xen/pvh: Secondary VCPU bringup (non-bootup CPUs) ...
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/arm/include/asm/xen/page.h3
-rw-r--r--arch/arm/xen/enlighten.c9
-rw-r--r--arch/x86/include/asm/xen/page.h8
-rw-r--r--arch/x86/xen/Kconfig4
-rw-r--r--arch/x86/xen/enlighten.c126
-rw-r--r--arch/x86/xen/grant-table.c63
-rw-r--r--arch/x86/xen/irq.c5
-rw-r--r--arch/x86/xen/mmu.c166
-rw-r--r--arch/x86/xen/p2m.c15
-rw-r--r--arch/x86/xen/platform-pci-unplug.c79
-rw-r--r--arch/x86/xen/setup.c40
-rw-r--r--arch/x86/xen/smp.c49
-rw-r--r--arch/x86/xen/time.c1
-rw-r--r--arch/x86/xen/xen-head.S25
-rw-r--r--arch/x86/xen/xen-ops.h1
-rw-r--r--drivers/block/xen-blkfront.c4
-rw-r--r--drivers/char/tpm/xen-tpmfront.c4
-rw-r--r--drivers/input/misc/xen-kbdfront.c4
-rw-r--r--drivers/net/xen-netfront.c2
-rw-r--r--drivers/pci/xen-pcifront.c4
-rw-r--r--drivers/video/xen-fbfront.c6
-rw-r--r--drivers/xen/Kconfig1
-rw-r--r--drivers/xen/Makefile3
-rw-r--r--drivers/xen/balloon.c9
-rw-r--r--drivers/xen/dbgp.c2
-rw-r--r--drivers/xen/events/Makefile5
-rw-r--r--drivers/xen/events/events_2l.c372
-rw-r--r--drivers/xen/events/events_base.c (renamed from drivers/xen/events.c)797
-rw-r--r--drivers/xen/events/events_fifo.c428
-rw-r--r--drivers/xen/events/events_internal.h150
-rw-r--r--drivers/xen/evtchn.c2
-rw-r--r--drivers/xen/gntdev.c2
-rw-r--r--drivers/xen/grant-table.c90
-rw-r--r--drivers/xen/pci.c2
-rw-r--r--drivers/xen/platform-pci.c11
-rw-r--r--drivers/xen/xenbus/xenbus_client.c3
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c2
-rw-r--r--include/xen/events.h9
-rw-r--r--include/xen/grant_table.h9
-rw-r--r--include/xen/interface/elfnote.h13
-rw-r--r--include/xen/interface/event_channel.h68
-rw-r--r--include/xen/interface/xen.h6
-rw-r--r--include/xen/platform_pci.h25
-rw-r--r--include/xen/xen.h14
45 files changed, 1952 insertions, 690 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index e945c6380f56..0207c30906ad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9559,6 +9559,7 @@ M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
9559M: Boris Ostrovsky <boris.ostrovsky@oracle.com> 9559M: Boris Ostrovsky <boris.ostrovsky@oracle.com>
9560M: David Vrabel <david.vrabel@citrix.com> 9560M: David Vrabel <david.vrabel@citrix.com>
9561L: xen-devel@lists.xenproject.org (moderated for non-subscribers) 9561L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9562T: git git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git
9562S: Supported 9563S: Supported
9563F: arch/x86/xen/ 9564F: arch/x86/xen/
9564F: drivers/*/xen-*front.c 9565F: drivers/*/xen-*front.c
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 3759cacdd7f8..e0965abacb7d 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -117,6 +117,7 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
117 return __set_phys_to_machine(pfn, mfn); 117 return __set_phys_to_machine(pfn, mfn);
118} 118}
119 119
120#define xen_remap(cookie, size) ioremap_cache((cookie), (size)); 120#define xen_remap(cookie, size) ioremap_cache((cookie), (size))
121#define xen_unmap(cookie) iounmap((cookie))
121 122
122#endif /* _ASM_ARM_XEN_PAGE_H */ 123#endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 85501238b425..2162172c0ddc 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -208,6 +208,7 @@ static int __init xen_guest_init(void)
208 const char *version = NULL; 208 const char *version = NULL;
209 const char *xen_prefix = "xen,xen-"; 209 const char *xen_prefix = "xen,xen-";
210 struct resource res; 210 struct resource res;
211 unsigned long grant_frames;
211 212
212 node = of_find_compatible_node(NULL, NULL, "xen,xen"); 213 node = of_find_compatible_node(NULL, NULL, "xen,xen");
213 if (!node) { 214 if (!node) {
@@ -224,10 +225,10 @@ static int __init xen_guest_init(void)
224 } 225 }
225 if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res)) 226 if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res))
226 return 0; 227 return 0;
227 xen_hvm_resume_frames = res.start; 228 grant_frames = res.start;
228 xen_events_irq = irq_of_parse_and_map(node, 0); 229 xen_events_irq = irq_of_parse_and_map(node, 0);
229 pr_info("Xen %s support found, events_irq=%d gnttab_frame_pfn=%lx\n", 230 pr_info("Xen %s support found, events_irq=%d gnttab_frame_pfn=%lx\n",
230 version, xen_events_irq, (xen_hvm_resume_frames >> PAGE_SHIFT)); 231 version, xen_events_irq, (grant_frames >> PAGE_SHIFT));
231 xen_domain_type = XEN_HVM_DOMAIN; 232 xen_domain_type = XEN_HVM_DOMAIN;
232 233
233 xen_setup_features(); 234 xen_setup_features();
@@ -265,6 +266,10 @@ static int __init xen_guest_init(void)
265 if (xen_vcpu_info == NULL) 266 if (xen_vcpu_info == NULL)
266 return -ENOMEM; 267 return -ENOMEM;
267 268
269 if (gnttab_setup_auto_xlat_frames(grant_frames)) {
270 free_percpu(xen_vcpu_info);
271 return -ENOMEM;
272 }
268 gnttab_init(); 273 gnttab_init();
269 if (!xen_initial_domain()) 274 if (!xen_initial_domain())
270 xenbus_probe(NULL); 275 xenbus_probe(NULL);
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index b913915e8e63..3e276eb23d1b 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -167,7 +167,12 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
167 */ 167 */
168static inline unsigned long mfn_to_local_pfn(unsigned long mfn) 168static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
169{ 169{
170 unsigned long pfn = mfn_to_pfn(mfn); 170 unsigned long pfn;
171
172 if (xen_feature(XENFEAT_auto_translated_physmap))
173 return mfn;
174
175 pfn = mfn_to_pfn(mfn);
171 if (get_phys_to_machine(pfn) != mfn) 176 if (get_phys_to_machine(pfn) != mfn)
172 return -1; /* force !pfn_valid() */ 177 return -1; /* force !pfn_valid() */
173 return pfn; 178 return pfn;
@@ -222,5 +227,6 @@ void make_lowmem_page_readonly(void *vaddr);
222void make_lowmem_page_readwrite(void *vaddr); 227void make_lowmem_page_readwrite(void *vaddr);
223 228
224#define xen_remap(cookie, size) ioremap((cookie), (size)); 229#define xen_remap(cookie, size) ioremap((cookie), (size));
230#define xen_unmap(cookie) iounmap((cookie))
225 231
226#endif /* _ASM_X86_XEN_PAGE_H */ 232#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 1a3c76505649..01b90261fa38 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -51,3 +51,7 @@ config XEN_DEBUG_FS
51 Enable statistics output and various tuning options in debugfs. 51 Enable statistics output and various tuning options in debugfs.
52 Enabling this option may incur a significant performance overhead. 52 Enabling this option may incur a significant performance overhead.
53 53
54config XEN_PVH
55 bool "Support for running as a PVH guest"
56 depends on X86_64 && XEN && XEN_PVHVM
57 def_bool n
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index fa6ade76ef3f..a4d7b647867f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -262,8 +262,9 @@ static void __init xen_banner(void)
262 struct xen_extraversion extra; 262 struct xen_extraversion extra;
263 HYPERVISOR_xen_version(XENVER_extraversion, &extra); 263 HYPERVISOR_xen_version(XENVER_extraversion, &extra);
264 264
265 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 265 pr_info("Booting paravirtualized kernel %son %s\n",
266 pv_info.name); 266 xen_feature(XENFEAT_auto_translated_physmap) ?
267 "with PVH extensions " : "", pv_info.name);
267 printk(KERN_INFO "Xen version: %d.%d%s%s\n", 268 printk(KERN_INFO "Xen version: %d.%d%s%s\n",
268 version >> 16, version & 0xffff, extra.extraversion, 269 version >> 16, version & 0xffff, extra.extraversion,
269 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 270 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
@@ -433,7 +434,7 @@ static void __init xen_init_cpuid_mask(void)
433 434
434 ax = 1; 435 ax = 1;
435 cx = 0; 436 cx = 0;
436 xen_cpuid(&ax, &bx, &cx, &dx); 437 cpuid(1, &ax, &bx, &cx, &dx);
437 438
438 xsave_mask = 439 xsave_mask =
439 (1 << (X86_FEATURE_XSAVE % 32)) | 440 (1 << (X86_FEATURE_XSAVE % 32)) |
@@ -1142,8 +1143,9 @@ void xen_setup_vcpu_info_placement(void)
1142 xen_vcpu_setup(cpu); 1143 xen_vcpu_setup(cpu);
1143 1144
1144 /* xen_vcpu_setup managed to place the vcpu_info within the 1145 /* xen_vcpu_setup managed to place the vcpu_info within the
1145 percpu area for all cpus, so make use of it */ 1146 * percpu area for all cpus, so make use of it. Note that for
1146 if (have_vcpu_info_placement) { 1147 * PVH we want to use native IRQ mechanism. */
1148 if (have_vcpu_info_placement && !xen_pvh_domain()) {
1147 pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); 1149 pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
1148 pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); 1150 pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
1149 pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); 1151 pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
@@ -1407,9 +1409,49 @@ static void __init xen_boot_params_init_edd(void)
1407 * Set up the GDT and segment registers for -fstack-protector. Until 1409 * Set up the GDT and segment registers for -fstack-protector. Until
1408 * we do this, we have to be careful not to call any stack-protected 1410 * we do this, we have to be careful not to call any stack-protected
1409 * function, which is most of the kernel. 1411 * function, which is most of the kernel.
1412 *
1413 * Note, that it is __ref because the only caller of this after init
1414 * is PVH which is not going to use xen_load_gdt_boot or other
1415 * __init functions.
1410 */ 1416 */
1411static void __init xen_setup_stackprotector(void) 1417static void __ref xen_setup_gdt(int cpu)
1412{ 1418{
1419 if (xen_feature(XENFEAT_auto_translated_physmap)) {
1420#ifdef CONFIG_X86_64
1421 unsigned long dummy;
1422
1423 load_percpu_segment(cpu); /* We need to access per-cpu area */
1424 switch_to_new_gdt(cpu); /* GDT and GS set */
1425
1426 /* We are switching of the Xen provided GDT to our HVM mode
1427 * GDT. The new GDT has __KERNEL_CS with CS.L = 1
1428 * and we are jumping to reload it.
1429 */
1430 asm volatile ("pushq %0\n"
1431 "leaq 1f(%%rip),%0\n"
1432 "pushq %0\n"
1433 "lretq\n"
1434 "1:\n"
1435 : "=&r" (dummy) : "0" (__KERNEL_CS));
1436
1437 /*
1438 * While not needed, we also set the %es, %ds, and %fs
1439 * to zero. We don't care about %ss as it is NULL.
1440 * Strictly speaking this is not needed as Xen zeros those
1441 * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE)
1442 *
1443 * Linux zeros them in cpu_init() and in secondary_startup_64
1444 * (for BSP).
1445 */
1446 loadsegment(es, 0);
1447 loadsegment(ds, 0);
1448 loadsegment(fs, 0);
1449#else
1450 /* PVH: TODO Implement. */
1451 BUG();
1452#endif
1453 return; /* PVH does not need any PV GDT ops. */
1454 }
1413 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; 1455 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
1414 pv_cpu_ops.load_gdt = xen_load_gdt_boot; 1456 pv_cpu_ops.load_gdt = xen_load_gdt_boot;
1415 1457
@@ -1420,6 +1462,46 @@ static void __init xen_setup_stackprotector(void)
1420 pv_cpu_ops.load_gdt = xen_load_gdt; 1462 pv_cpu_ops.load_gdt = xen_load_gdt;
1421} 1463}
1422 1464
1465/*
1466 * A PV guest starts with default flags that are not set for PVH, set them
1467 * here asap.
1468 */
1469static void xen_pvh_set_cr_flags(int cpu)
1470{
1471
1472 /* Some of these are setup in 'secondary_startup_64'. The others:
1473 * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
1474 * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
1475 write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
1476}
1477
1478/*
1479 * Note, that it is ref - because the only caller of this after init
1480 * is PVH which is not going to use xen_load_gdt_boot or other
1481 * __init functions.
1482 */
1483void __ref xen_pvh_secondary_vcpu_init(int cpu)
1484{
1485 xen_setup_gdt(cpu);
1486 xen_pvh_set_cr_flags(cpu);
1487}
1488
1489static void __init xen_pvh_early_guest_init(void)
1490{
1491 if (!xen_feature(XENFEAT_auto_translated_physmap))
1492 return;
1493
1494 if (!xen_feature(XENFEAT_hvm_callback_vector))
1495 return;
1496
1497 xen_have_vector_callback = 1;
1498 xen_pvh_set_cr_flags(0);
1499
1500#ifdef CONFIG_X86_32
1501 BUG(); /* PVH: Implement proper support. */
1502#endif
1503}
1504
1423/* First C function to be called on Xen boot */ 1505/* First C function to be called on Xen boot */
1424asmlinkage void __init xen_start_kernel(void) 1506asmlinkage void __init xen_start_kernel(void)
1425{ 1507{
@@ -1431,13 +1513,16 @@ asmlinkage void __init xen_start_kernel(void)
1431 1513
1432 xen_domain_type = XEN_PV_DOMAIN; 1514 xen_domain_type = XEN_PV_DOMAIN;
1433 1515
1516 xen_setup_features();
1517 xen_pvh_early_guest_init();
1434 xen_setup_machphys_mapping(); 1518 xen_setup_machphys_mapping();
1435 1519
1436 /* Install Xen paravirt ops */ 1520 /* Install Xen paravirt ops */
1437 pv_info = xen_info; 1521 pv_info = xen_info;
1438 pv_init_ops = xen_init_ops; 1522 pv_init_ops = xen_init_ops;
1439 pv_cpu_ops = xen_cpu_ops;
1440 pv_apic_ops = xen_apic_ops; 1523 pv_apic_ops = xen_apic_ops;
1524 if (!xen_pvh_domain())
1525 pv_cpu_ops = xen_cpu_ops;
1441 1526
1442 x86_init.resources.memory_setup = xen_memory_setup; 1527 x86_init.resources.memory_setup = xen_memory_setup;
1443 x86_init.oem.arch_setup = xen_arch_setup; 1528 x86_init.oem.arch_setup = xen_arch_setup;
@@ -1469,17 +1554,14 @@ asmlinkage void __init xen_start_kernel(void)
1469 /* Work out if we support NX */ 1554 /* Work out if we support NX */
1470 x86_configure_nx(); 1555 x86_configure_nx();
1471 1556
1472 xen_setup_features();
1473
1474 /* Get mfn list */ 1557 /* Get mfn list */
1475 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1558 xen_build_dynamic_phys_to_machine();
1476 xen_build_dynamic_phys_to_machine();
1477 1559
1478 /* 1560 /*
1479 * Set up kernel GDT and segment registers, mainly so that 1561 * Set up kernel GDT and segment registers, mainly so that
1480 * -fstack-protector code can be executed. 1562 * -fstack-protector code can be executed.
1481 */ 1563 */
1482 xen_setup_stackprotector(); 1564 xen_setup_gdt(0);
1483 1565
1484 xen_init_irq_ops(); 1566 xen_init_irq_ops();
1485 xen_init_cpuid_mask(); 1567 xen_init_cpuid_mask();
@@ -1548,14 +1630,18 @@ asmlinkage void __init xen_start_kernel(void)
1548 /* set the limit of our address space */ 1630 /* set the limit of our address space */
1549 xen_reserve_top(); 1631 xen_reserve_top();
1550 1632
1551 /* We used to do this in xen_arch_setup, but that is too late on AMD 1633 /* PVH: runs at default kernel iopl of 0 */
1552 * were early_cpu_init (run before ->arch_setup()) calls early_amd_init 1634 if (!xen_pvh_domain()) {
1553 * which pokes 0xcf8 port. 1635 /*
1554 */ 1636 * We used to do this in xen_arch_setup, but that is too late
1555 set_iopl.iopl = 1; 1637 * on AMD were early_cpu_init (run before ->arch_setup()) calls
1556 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 1638 * early_amd_init which pokes 0xcf8 port.
1557 if (rc != 0) 1639 */
1558 xen_raw_printk("physdev_op failed %d\n", rc); 1640 set_iopl.iopl = 1;
1641 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1642 if (rc != 0)
1643 xen_raw_printk("physdev_op failed %d\n", rc);
1644 }
1559 1645
1560#ifdef CONFIG_X86_32 1646#ifdef CONFIG_X86_32
1561 /* set up basic CPUID stuff */ 1647 /* set up basic CPUID stuff */
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 3a5f55d51907..103c93f874b2 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -125,3 +125,66 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
125 apply_to_page_range(&init_mm, (unsigned long)shared, 125 apply_to_page_range(&init_mm, (unsigned long)shared,
126 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); 126 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
127} 127}
128#ifdef CONFIG_XEN_PVH
129#include <xen/balloon.h>
130#include <xen/events.h>
131#include <xen/xen.h>
132#include <linux/slab.h>
133static int __init xlated_setup_gnttab_pages(void)
134{
135 struct page **pages;
136 xen_pfn_t *pfns;
137 int rc;
138 unsigned int i;
139 unsigned long nr_grant_frames = gnttab_max_grant_frames();
140
141 BUG_ON(nr_grant_frames == 0);
142 pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL);
143 if (!pages)
144 return -ENOMEM;
145
146 pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL);
147 if (!pfns) {
148 kfree(pages);
149 return -ENOMEM;
150 }
151 rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */);
152 if (rc) {
153 pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__,
154 nr_grant_frames, rc);
155 kfree(pages);
156 kfree(pfns);
157 return rc;
158 }
159 for (i = 0; i < nr_grant_frames; i++)
160 pfns[i] = page_to_pfn(pages[i]);
161
162 rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames,
163 &xen_auto_xlat_grant_frames.vaddr);
164
165 kfree(pages);
166 if (rc) {
167 pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__,
168 nr_grant_frames, rc);
169 free_xenballooned_pages(nr_grant_frames, pages);
170 kfree(pfns);
171 return rc;
172 }
173
174 xen_auto_xlat_grant_frames.pfn = pfns;
175 xen_auto_xlat_grant_frames.count = nr_grant_frames;
176
177 return 0;
178}
179
180static int __init xen_pvh_gnttab_setup(void)
181{
182 if (!xen_pvh_domain())
183 return -ENODEV;
184
185 return xlated_setup_gnttab_pages();
186}
187/* Call it _before_ __gnttab_init as we need to initialize the
188 * xen_auto_xlat_grant_frames first. */
189core_initcall(xen_pvh_gnttab_setup);
190#endif
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 0da7f863056f..76ca326105f7 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -5,6 +5,7 @@
5#include <xen/interface/xen.h> 5#include <xen/interface/xen.h>
6#include <xen/interface/sched.h> 6#include <xen/interface/sched.h>
7#include <xen/interface/vcpu.h> 7#include <xen/interface/vcpu.h>
8#include <xen/features.h>
8#include <xen/events.h> 9#include <xen/events.h>
9 10
10#include <asm/xen/hypercall.h> 11#include <asm/xen/hypercall.h>
@@ -128,6 +129,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
128 129
129void __init xen_init_irq_ops(void) 130void __init xen_init_irq_ops(void)
130{ 131{
131 pv_irq_ops = xen_irq_ops; 132 /* For PVH we use default pv_irq_ops settings. */
133 if (!xen_feature(XENFEAT_hvm_callback_vector))
134 pv_irq_ops = xen_irq_ops;
132 x86_init.irqs.intr_init = xen_init_IRQ; 135 x86_init.irqs.intr_init = xen_init_IRQ;
133} 136}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ce563be09cc1..c1d406f35523 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1198,44 +1198,40 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
1198 * instead of somewhere later and be confusing. */ 1198 * instead of somewhere later and be confusing. */
1199 xen_mc_flush(); 1199 xen_mc_flush();
1200} 1200}
1201#endif 1201static void __init xen_pagetable_p2m_copy(void)
1202static void __init xen_pagetable_init(void)
1203{ 1202{
1204#ifdef CONFIG_X86_64
1205 unsigned long size; 1203 unsigned long size;
1206 unsigned long addr; 1204 unsigned long addr;
1207#endif 1205 unsigned long new_mfn_list;
1208 paging_init(); 1206
1209 xen_setup_shared_info(); 1207 if (xen_feature(XENFEAT_auto_translated_physmap))
1210#ifdef CONFIG_X86_64 1208 return;
1211 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 1209
1212 unsigned long new_mfn_list; 1210 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1213 1211
1214 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1212 new_mfn_list = xen_revector_p2m_tree();
1215 1213 /* No memory or already called. */
1216 /* On 32-bit, we get zero so this never gets executed. */ 1214 if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list)
1217 new_mfn_list = xen_revector_p2m_tree(); 1215 return;
1218 if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { 1216
1219 /* using __ka address and sticking INVALID_P2M_ENTRY! */ 1217 /* using __ka address and sticking INVALID_P2M_ENTRY! */
1220 memset((void *)xen_start_info->mfn_list, 0xff, size); 1218 memset((void *)xen_start_info->mfn_list, 0xff, size);
1221 1219
1222 /* We should be in __ka space. */ 1220 /* We should be in __ka space. */
1223 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); 1221 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
1224 addr = xen_start_info->mfn_list; 1222 addr = xen_start_info->mfn_list;
1225 /* We roundup to the PMD, which means that if anybody at this stage is 1223 /* We roundup to the PMD, which means that if anybody at this stage is
1226 * using the __ka address of xen_start_info or xen_start_info->shared_info 1224 * using the __ka address of xen_start_info or xen_start_info->shared_info
1227 * they are in going to crash. Fortunatly we have already revectored 1225 * they are in going to crash. Fortunatly we have already revectored
1228 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ 1226 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
1229 size = roundup(size, PMD_SIZE); 1227 size = roundup(size, PMD_SIZE);
1230 xen_cleanhighmap(addr, addr + size); 1228 xen_cleanhighmap(addr, addr + size);
1231 1229
1232 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1230 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1233 memblock_free(__pa(xen_start_info->mfn_list), size); 1231 memblock_free(__pa(xen_start_info->mfn_list), size);
1234 /* And revector! Bye bye old array */ 1232 /* And revector! Bye bye old array */
1235 xen_start_info->mfn_list = new_mfn_list; 1233 xen_start_info->mfn_list = new_mfn_list;
1236 } else 1234
1237 goto skip;
1238 }
1239 /* At this stage, cleanup_highmap has already cleaned __ka space 1235 /* At this stage, cleanup_highmap has already cleaned __ka space
1240 * from _brk_limit way up to the max_pfn_mapped (which is the end of 1236 * from _brk_limit way up to the max_pfn_mapped (which is the end of
1241 * the ramdisk). We continue on, erasing PMD entries that point to page 1237 * the ramdisk). We continue on, erasing PMD entries that point to page
@@ -1255,7 +1251,15 @@ static void __init xen_pagetable_init(void)
1255 * anything at this stage. */ 1251 * anything at this stage. */
1256 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); 1252 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
1257#endif 1253#endif
1258skip: 1254}
1255#endif
1256
1257static void __init xen_pagetable_init(void)
1258{
1259 paging_init();
1260 xen_setup_shared_info();
1261#ifdef CONFIG_X86_64
1262 xen_pagetable_p2m_copy();
1259#endif 1263#endif
1260 xen_post_allocator_init(); 1264 xen_post_allocator_init();
1261} 1265}
@@ -1753,6 +1757,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags)
1753 unsigned long pfn = __pa(addr) >> PAGE_SHIFT; 1757 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1754 pte_t pte = pfn_pte(pfn, prot); 1758 pte_t pte = pfn_pte(pfn, prot);
1755 1759
1760 /* For PVH no need to set R/O or R/W to pin them or unpin them. */
1761 if (xen_feature(XENFEAT_auto_translated_physmap))
1762 return;
1763
1756 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) 1764 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
1757 BUG(); 1765 BUG();
1758} 1766}
@@ -1863,6 +1871,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1863 * but that's enough to get __va working. We need to fill in the rest 1871 * but that's enough to get __va working. We need to fill in the rest
1864 * of the physical mapping once some sort of allocator has been set 1872 * of the physical mapping once some sort of allocator has been set
1865 * up. 1873 * up.
1874 * NOTE: for PVH, the page tables are native.
1866 */ 1875 */
1867void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 1876void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1868{ 1877{
@@ -1884,17 +1893,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1884 /* Zap identity mapping */ 1893 /* Zap identity mapping */
1885 init_level4_pgt[0] = __pgd(0); 1894 init_level4_pgt[0] = __pgd(0);
1886 1895
1887 /* Pre-constructed entries are in pfn, so convert to mfn */ 1896 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1888 /* L4[272] -> level3_ident_pgt 1897 /* Pre-constructed entries are in pfn, so convert to mfn */
1889 * L4[511] -> level3_kernel_pgt */ 1898 /* L4[272] -> level3_ident_pgt
1890 convert_pfn_mfn(init_level4_pgt); 1899 * L4[511] -> level3_kernel_pgt */
1891 1900 convert_pfn_mfn(init_level4_pgt);
1892 /* L3_i[0] -> level2_ident_pgt */ 1901
1893 convert_pfn_mfn(level3_ident_pgt); 1902 /* L3_i[0] -> level2_ident_pgt */
1894 /* L3_k[510] -> level2_kernel_pgt 1903 convert_pfn_mfn(level3_ident_pgt);
1895 * L3_i[511] -> level2_fixmap_pgt */ 1904 /* L3_k[510] -> level2_kernel_pgt
1896 convert_pfn_mfn(level3_kernel_pgt); 1905 * L3_i[511] -> level2_fixmap_pgt */
1897 1906 convert_pfn_mfn(level3_kernel_pgt);
1907 }
1898 /* We get [511][511] and have Xen's version of level2_kernel_pgt */ 1908 /* We get [511][511] and have Xen's version of level2_kernel_pgt */
1899 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); 1909 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1900 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); 1910 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
@@ -1918,31 +1928,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1918 copy_page(level2_fixmap_pgt, l2); 1928 copy_page(level2_fixmap_pgt, l2);
1919 /* Note that we don't do anything with level1_fixmap_pgt which 1929 /* Note that we don't do anything with level1_fixmap_pgt which
1920 * we don't need. */ 1930 * we don't need. */
1931 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1932 /* Make pagetable pieces RO */
1933 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1934 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1935 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1936 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1937 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1938 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1939 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1940
1941 /* Pin down new L4 */
1942 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1943 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1944
1945 /* Unpin Xen-provided one */
1946 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1921 1947
1922 /* Make pagetable pieces RO */ 1948 /*
1923 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); 1949 * At this stage there can be no user pgd, and no page
1924 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); 1950 * structure to attach it to, so make sure we just set kernel
1925 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); 1951 * pgd.
1926 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); 1952 */
1927 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); 1953 xen_mc_batch();
1928 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1954 __xen_write_cr3(true, __pa(init_level4_pgt));
1929 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1955 xen_mc_issue(PARAVIRT_LAZY_CPU);
1930 1956 } else
1931 /* Pin down new L4 */ 1957 native_write_cr3(__pa(init_level4_pgt));
1932 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1933 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1934
1935 /* Unpin Xen-provided one */
1936 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1937
1938 /*
1939 * At this stage there can be no user pgd, and no page
1940 * structure to attach it to, so make sure we just set kernel
1941 * pgd.
1942 */
1943 xen_mc_batch();
1944 __xen_write_cr3(true, __pa(init_level4_pgt));
1945 xen_mc_issue(PARAVIRT_LAZY_CPU);
1946 1958
1947 /* We can't that easily rip out L3 and L2, as the Xen pagetables are 1959 /* We can't that easily rip out L3 and L2, as the Xen pagetables are
1948 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for 1960 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
@@ -2103,6 +2115,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2103 2115
2104static void __init xen_post_allocator_init(void) 2116static void __init xen_post_allocator_init(void)
2105{ 2117{
2118 if (xen_feature(XENFEAT_auto_translated_physmap))
2119 return;
2120
2106 pv_mmu_ops.set_pte = xen_set_pte; 2121 pv_mmu_ops.set_pte = xen_set_pte;
2107 pv_mmu_ops.set_pmd = xen_set_pmd; 2122 pv_mmu_ops.set_pmd = xen_set_pmd;
2108 pv_mmu_ops.set_pud = xen_set_pud; 2123 pv_mmu_ops.set_pud = xen_set_pud;
@@ -2207,6 +2222,15 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2207void __init xen_init_mmu_ops(void) 2222void __init xen_init_mmu_ops(void)
2208{ 2223{
2209 x86_init.paging.pagetable_init = xen_pagetable_init; 2224 x86_init.paging.pagetable_init = xen_pagetable_init;
2225
2226 /* Optimization - we can use the HVM one but it has no idea which
2227 * VCPUs are descheduled - which means that it will needlessly IPI
2228 * them. Xen knows so let it do the job.
2229 */
2230 if (xen_feature(XENFEAT_auto_translated_physmap)) {
2231 pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
2232 return;
2233 }
2210 pv_mmu_ops = xen_mmu_ops; 2234 pv_mmu_ops = xen_mmu_ops;
2211 2235
2212 memset(dummy_mapping, 0xff, PAGE_SIZE); 2236 memset(dummy_mapping, 0xff, PAGE_SIZE);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 2ae8699e8767..696c694986d0 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -280,6 +280,9 @@ void __ref xen_build_mfn_list_list(void)
280{ 280{
281 unsigned long pfn; 281 unsigned long pfn;
282 282
283 if (xen_feature(XENFEAT_auto_translated_physmap))
284 return;
285
283 /* Pre-initialize p2m_top_mfn to be completely missing */ 286 /* Pre-initialize p2m_top_mfn to be completely missing */
284 if (p2m_top_mfn == NULL) { 287 if (p2m_top_mfn == NULL) {
285 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 288 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
@@ -336,6 +339,9 @@ void __ref xen_build_mfn_list_list(void)
336 339
337void xen_setup_mfn_list_list(void) 340void xen_setup_mfn_list_list(void)
338{ 341{
342 if (xen_feature(XENFEAT_auto_translated_physmap))
343 return;
344
339 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); 345 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
340 346
341 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 347 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -346,10 +352,15 @@ void xen_setup_mfn_list_list(void)
346/* Set up p2m_top to point to the domain-builder provided p2m pages */ 352/* Set up p2m_top to point to the domain-builder provided p2m pages */
347void __init xen_build_dynamic_phys_to_machine(void) 353void __init xen_build_dynamic_phys_to_machine(void)
348{ 354{
349 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; 355 unsigned long *mfn_list;
350 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); 356 unsigned long max_pfn;
351 unsigned long pfn; 357 unsigned long pfn;
352 358
359 if (xen_feature(XENFEAT_auto_translated_physmap))
360 return;
361
362 mfn_list = (unsigned long *)xen_start_info->mfn_list;
363 max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
353 xen_max_p2m_pfn = max_pfn; 364 xen_max_p2m_pfn = max_pfn;
354 365
355 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); 366 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 0a7852483ffe..a8261716d58d 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -30,10 +30,9 @@
30#define XEN_PLATFORM_ERR_PROTOCOL -2 30#define XEN_PLATFORM_ERR_PROTOCOL -2
31#define XEN_PLATFORM_ERR_BLACKLIST -3 31#define XEN_PLATFORM_ERR_BLACKLIST -3
32 32
33/* store the value of xen_emul_unplug after the unplug is done */
34int xen_platform_pci_unplug;
35EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
36#ifdef CONFIG_XEN_PVHVM 33#ifdef CONFIG_XEN_PVHVM
34/* store the value of xen_emul_unplug after the unplug is done */
35static int xen_platform_pci_unplug;
37static int xen_emul_unplug; 36static int xen_emul_unplug;
38 37
39static int check_platform_magic(void) 38static int check_platform_magic(void)
@@ -69,6 +68,80 @@ static int check_platform_magic(void)
69 return 0; 68 return 0;
70} 69}
71 70
71bool xen_has_pv_devices()
72{
73 if (!xen_domain())
74 return false;
75
76 /* PV domains always have them. */
77 if (xen_pv_domain())
78 return true;
79
80 /* And user has xen_platform_pci=0 set in guest config as
81 * driver did not modify the value. */
82 if (xen_platform_pci_unplug == 0)
83 return false;
84
85 if (xen_platform_pci_unplug & XEN_UNPLUG_NEVER)
86 return false;
87
88 if (xen_platform_pci_unplug & XEN_UNPLUG_ALL)
89 return true;
90
91 /* This is an odd one - we are going to run legacy
92 * and PV drivers at the same time. */
93 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY)
94 return true;
95
96 /* And the caller has to follow with xen_pv_{disk,nic}_devices
97 * to be certain which driver can load. */
98 return false;
99}
100EXPORT_SYMBOL_GPL(xen_has_pv_devices);
101
102static bool __xen_has_pv_device(int state)
103{
104 /* HVM domains might or might not */
105 if (xen_hvm_domain() && (xen_platform_pci_unplug & state))
106 return true;
107
108 return xen_has_pv_devices();
109}
110
111bool xen_has_pv_nic_devices(void)
112{
113 return __xen_has_pv_device(XEN_UNPLUG_ALL_NICS | XEN_UNPLUG_ALL);
114}
115EXPORT_SYMBOL_GPL(xen_has_pv_nic_devices);
116
117bool xen_has_pv_disk_devices(void)
118{
119 return __xen_has_pv_device(XEN_UNPLUG_ALL_IDE_DISKS |
120 XEN_UNPLUG_AUX_IDE_DISKS | XEN_UNPLUG_ALL);
121}
122EXPORT_SYMBOL_GPL(xen_has_pv_disk_devices);
123
124/*
125 * This one is odd - it determines whether you want to run PV _and_
126 * legacy (IDE) drivers together. This combination is only possible
127 * under HVM.
128 */
129bool xen_has_pv_and_legacy_disk_devices(void)
130{
131 if (!xen_domain())
132 return false;
133
134 /* N.B. This is only ever used in HVM mode */
135 if (xen_pv_domain())
136 return false;
137
138 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY)
139 return true;
140
141 return false;
142}
143EXPORT_SYMBOL_GPL(xen_has_pv_and_legacy_disk_devices);
144
72void xen_unplug_emulated_devices(void) 145void xen_unplug_emulated_devices(void)
73{ 146{
74 int r; 147 int r;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 68c054f59de6..dd5f905e33d5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -27,6 +27,7 @@
27#include <xen/interface/memory.h> 27#include <xen/interface/memory.h>
28#include <xen/interface/physdev.h> 28#include <xen/interface/physdev.h>
29#include <xen/features.h> 29#include <xen/features.h>
30#include "mmu.h"
30#include "xen-ops.h" 31#include "xen-ops.h"
31#include "vdso.h" 32#include "vdso.h"
32 33
@@ -81,6 +82,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
81 82
82 memblock_reserve(start, size); 83 memblock_reserve(start, size);
83 84
85 if (xen_feature(XENFEAT_auto_translated_physmap))
86 return;
87
84 xen_max_p2m_pfn = PFN_DOWN(start + size); 88 xen_max_p2m_pfn = PFN_DOWN(start + size);
85 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { 89 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
86 unsigned long mfn = pfn_to_mfn(pfn); 90 unsigned long mfn = pfn_to_mfn(pfn);
@@ -103,6 +107,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
103 .domid = DOMID_SELF 107 .domid = DOMID_SELF
104 }; 108 };
105 unsigned long len = 0; 109 unsigned long len = 0;
110 int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
106 unsigned long pfn; 111 unsigned long pfn;
107 int ret; 112 int ret;
108 113
@@ -116,7 +121,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
116 continue; 121 continue;
117 frame = mfn; 122 frame = mfn;
118 } else { 123 } else {
119 if (mfn != INVALID_P2M_ENTRY) 124 if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
120 continue; 125 continue;
121 frame = pfn; 126 frame = pfn;
122 } 127 }
@@ -154,6 +159,13 @@ static unsigned long __init xen_do_chunk(unsigned long start,
154static unsigned long __init xen_release_chunk(unsigned long start, 159static unsigned long __init xen_release_chunk(unsigned long start,
155 unsigned long end) 160 unsigned long end)
156{ 161{
162 /*
163 * Xen already ballooned out the E820 non RAM regions for us
164 * and set them up properly in EPT.
165 */
166 if (xen_feature(XENFEAT_auto_translated_physmap))
167 return end - start;
168
157 return xen_do_chunk(start, end, true); 169 return xen_do_chunk(start, end, true);
158} 170}
159 171
@@ -222,7 +234,13 @@ static void __init xen_set_identity_and_release_chunk(
222 * (except for the ISA region which must be 1:1 mapped) to 234 * (except for the ISA region which must be 1:1 mapped) to
223 * release the refcounts (in Xen) on the original frames. 235 * release the refcounts (in Xen) on the original frames.
224 */ 236 */
225 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { 237
238 /*
239 * PVH E820 matches the hypervisor's P2M which means we need to
240 * account for the proper values of *release and *identity.
241 */
242 for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) &&
243 pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
226 pte_t pte = __pte_ma(0); 244 pte_t pte = __pte_ma(0);
227 245
228 if (pfn < PFN_UP(ISA_END_ADDRESS)) 246 if (pfn < PFN_UP(ISA_END_ADDRESS))
@@ -563,16 +581,13 @@ void xen_enable_nmi(void)
563 BUG(); 581 BUG();
564#endif 582#endif
565} 583}
566void __init xen_arch_setup(void) 584void __init xen_pvmmu_arch_setup(void)
567{ 585{
568 xen_panic_handler_init();
569
570 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 586 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
571 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 587 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
572 588
573 if (!xen_feature(XENFEAT_auto_translated_physmap)) 589 HYPERVISOR_vm_assist(VMASST_CMD_enable,
574 HYPERVISOR_vm_assist(VMASST_CMD_enable, 590 VMASST_TYPE_pae_extended_cr3);
575 VMASST_TYPE_pae_extended_cr3);
576 591
577 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 592 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
578 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 593 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
@@ -581,6 +596,15 @@ void __init xen_arch_setup(void)
581 xen_enable_sysenter(); 596 xen_enable_sysenter();
582 xen_enable_syscall(); 597 xen_enable_syscall();
583 xen_enable_nmi(); 598 xen_enable_nmi();
599}
600
601/* This function is not called for HVM domains */
602void __init xen_arch_setup(void)
603{
604 xen_panic_handler_init();
605 if (!xen_feature(XENFEAT_auto_translated_physmap))
606 xen_pvmmu_arch_setup();
607
584#ifdef CONFIG_ACPI 608#ifdef CONFIG_ACPI
585 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 609 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
586 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 610 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c36b325abd83..a18eadd8bb40 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -73,9 +73,11 @@ static void cpu_bringup(void)
73 touch_softlockup_watchdog(); 73 touch_softlockup_watchdog();
74 preempt_disable(); 74 preempt_disable();
75 75
76 xen_enable_sysenter(); 76 /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
77 xen_enable_syscall(); 77 if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
78 78 xen_enable_sysenter();
79 xen_enable_syscall();
80 }
79 cpu = smp_processor_id(); 81 cpu = smp_processor_id();
80 smp_store_cpu_info(cpu); 82 smp_store_cpu_info(cpu);
81 cpu_data(cpu).x86_max_cores = 1; 83 cpu_data(cpu).x86_max_cores = 1;
@@ -97,8 +99,14 @@ static void cpu_bringup(void)
97 wmb(); /* make sure everything is out */ 99 wmb(); /* make sure everything is out */
98} 100}
99 101
100static void cpu_bringup_and_idle(void) 102/* Note: cpu parameter is only relevant for PVH */
103static void cpu_bringup_and_idle(int cpu)
101{ 104{
105#ifdef CONFIG_X86_64
106 if (xen_feature(XENFEAT_auto_translated_physmap) &&
107 xen_feature(XENFEAT_supervisor_mode_kernel))
108 xen_pvh_secondary_vcpu_init(cpu);
109#endif
102 cpu_bringup(); 110 cpu_bringup();
103 cpu_startup_entry(CPUHP_ONLINE); 111 cpu_startup_entry(CPUHP_ONLINE);
104} 112}
@@ -274,9 +282,10 @@ static void __init xen_smp_prepare_boot_cpu(void)
274 native_smp_prepare_boot_cpu(); 282 native_smp_prepare_boot_cpu();
275 283
276 if (xen_pv_domain()) { 284 if (xen_pv_domain()) {
277 /* We've switched to the "real" per-cpu gdt, so make sure the 285 if (!xen_feature(XENFEAT_writable_page_tables))
278 old memory can be recycled */ 286 /* We've switched to the "real" per-cpu gdt, so make
279 make_lowmem_page_readwrite(xen_initial_gdt); 287 * sure the old memory can be recycled. */
288 make_lowmem_page_readwrite(xen_initial_gdt);
280 289
281#ifdef CONFIG_X86_32 290#ifdef CONFIG_X86_32
282 /* 291 /*
@@ -360,22 +369,21 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
360 369
361 gdt = get_cpu_gdt_table(cpu); 370 gdt = get_cpu_gdt_table(cpu);
362 371
363 ctxt->flags = VGCF_IN_KERNEL;
364 ctxt->user_regs.ss = __KERNEL_DS;
365#ifdef CONFIG_X86_32 372#ifdef CONFIG_X86_32
373 /* Note: PVH is not yet supported on x86_32. */
366 ctxt->user_regs.fs = __KERNEL_PERCPU; 374 ctxt->user_regs.fs = __KERNEL_PERCPU;
367 ctxt->user_regs.gs = __KERNEL_STACK_CANARY; 375 ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
368#else
369 ctxt->gs_base_kernel = per_cpu_offset(cpu);
370#endif 376#endif
371 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 377 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
372 378
373 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 379 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
374 380
375 { 381 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
382 ctxt->flags = VGCF_IN_KERNEL;
376 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ 383 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
377 ctxt->user_regs.ds = __USER_DS; 384 ctxt->user_regs.ds = __USER_DS;
378 ctxt->user_regs.es = __USER_DS; 385 ctxt->user_regs.es = __USER_DS;
386 ctxt->user_regs.ss = __KERNEL_DS;
379 387
380 xen_copy_trap_info(ctxt->trap_ctxt); 388 xen_copy_trap_info(ctxt->trap_ctxt);
381 389
@@ -396,18 +404,27 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
396#ifdef CONFIG_X86_32 404#ifdef CONFIG_X86_32
397 ctxt->event_callback_cs = __KERNEL_CS; 405 ctxt->event_callback_cs = __KERNEL_CS;
398 ctxt->failsafe_callback_cs = __KERNEL_CS; 406 ctxt->failsafe_callback_cs = __KERNEL_CS;
407#else
408 ctxt->gs_base_kernel = per_cpu_offset(cpu);
399#endif 409#endif
400 ctxt->event_callback_eip = 410 ctxt->event_callback_eip =
401 (unsigned long)xen_hypervisor_callback; 411 (unsigned long)xen_hypervisor_callback;
402 ctxt->failsafe_callback_eip = 412 ctxt->failsafe_callback_eip =
403 (unsigned long)xen_failsafe_callback; 413 (unsigned long)xen_failsafe_callback;
414 ctxt->user_regs.cs = __KERNEL_CS;
415 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
416#ifdef CONFIG_X86_32
404 } 417 }
405 ctxt->user_regs.cs = __KERNEL_CS; 418#else
419 } else
420 /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with
421 * %rdi having the cpu number - which means are passing in
422 * as the first parameter the cpu. Subtle!
423 */
424 ctxt->user_regs.rdi = cpu;
425#endif
406 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 426 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
407
408 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
409 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 427 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
410
411 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) 428 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
412 BUG(); 429 BUG();
413 430
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 12a1ca707b94..7b78f88c1707 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -446,6 +446,7 @@ void xen_setup_timer(int cpu)
446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| 446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
447 IRQF_FORCE_RESUME, 447 IRQF_FORCE_RESUME,
448 name, NULL); 448 name, NULL);
449 (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
449 450
450 memcpy(evt, xen_clockevent, sizeof(*evt)); 451 memcpy(evt, xen_clockevent, sizeof(*evt));
451 452
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7faed5869e5b..485b69585540 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -11,8 +11,28 @@
11#include <asm/page_types.h> 11#include <asm/page_types.h>
12 12
13#include <xen/interface/elfnote.h> 13#include <xen/interface/elfnote.h>
14#include <xen/interface/features.h>
14#include <asm/xen/interface.h> 15#include <asm/xen/interface.h>
15 16
17#ifdef CONFIG_XEN_PVH
18#define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel"
19/* Note the lack of 'hvm_callback_vector'. Older hypervisor will
20 * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in
21 * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore.
22 */
23#define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \
24 (1 << XENFEAT_auto_translated_physmap) | \
25 (1 << XENFEAT_supervisor_mode_kernel) | \
26 (1 << XENFEAT_hvm_callback_vector))
27/* The XENFEAT_writable_page_tables is not stricly neccessary as we set that
28 * up regardless whether this CONFIG option is enabled or not, but it
29 * clarifies what the right flags need to be.
30 */
31#else
32#define PVH_FEATURES_STR ""
33#define PVH_FEATURES (0)
34#endif
35
16 __INIT 36 __INIT
17ENTRY(startup_xen) 37ENTRY(startup_xen)
18 cld 38 cld
@@ -95,7 +115,10 @@ NEXT_HYPERCALL(arch_6)
95#endif 115#endif
96 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) 116 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
97 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) 117 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
98 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") 118 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR)
119 ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) |
120 (1 << XENFEAT_writable_page_tables) |
121 (1 << XENFEAT_dom0))
99 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") 122 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
100 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") 123 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
101 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, 124 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 95f8c6142328..1cb6f4c37300 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -123,4 +123,5 @@ __visible void xen_adjust_exception_frame(void);
123 123
124extern int xen_panic_handler_init(void); 124extern int xen_panic_handler_init(void);
125 125
126void xen_pvh_secondary_vcpu_init(int cpu);
126#endif /* XEN_OPS_H */ 127#endif /* XEN_OPS_H */
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c4a4c9006288..f9c43f91f03e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1356,7 +1356,7 @@ static int blkfront_probe(struct xenbus_device *dev,
1356 char *type; 1356 char *type;
1357 int len; 1357 int len;
1358 /* no unplug has been done: do not hook devices != xen vbds */ 1358 /* no unplug has been done: do not hook devices != xen vbds */
1359 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { 1359 if (xen_has_pv_and_legacy_disk_devices()) {
1360 int major; 1360 int major;
1361 1361
1362 if (!VDEV_IS_EXTENDED(vdevice)) 1362 if (!VDEV_IS_EXTENDED(vdevice))
@@ -2079,7 +2079,7 @@ static int __init xlblk_init(void)
2079 if (!xen_domain()) 2079 if (!xen_domain())
2080 return -ENODEV; 2080 return -ENODEV;
2081 2081
2082 if (xen_hvm_domain() && !xen_platform_pci_unplug) 2082 if (!xen_has_pv_disk_devices())
2083 return -ENODEV; 2083 return -ENODEV;
2084 2084
2085 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { 2085 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 92b097064df5..2064b4527040 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -17,6 +17,7 @@
17#include <xen/xenbus.h> 17#include <xen/xenbus.h>
18#include <xen/page.h> 18#include <xen/page.h>
19#include "tpm.h" 19#include "tpm.h"
20#include <xen/platform_pci.h>
20 21
21struct tpm_private { 22struct tpm_private {
22 struct tpm_chip *chip; 23 struct tpm_chip *chip;
@@ -378,6 +379,9 @@ static int __init xen_tpmfront_init(void)
378 if (!xen_domain()) 379 if (!xen_domain())
379 return -ENODEV; 380 return -ENODEV;
380 381
382 if (!xen_has_pv_devices())
383 return -ENODEV;
384
381 return xenbus_register_frontend(&tpmfront_driver); 385 return xenbus_register_frontend(&tpmfront_driver);
382} 386}
383module_init(xen_tpmfront_init); 387module_init(xen_tpmfront_init);
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index e21c1816a8f9..fbfdc10573be 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -29,6 +29,7 @@
29#include <xen/interface/io/fbif.h> 29#include <xen/interface/io/fbif.h>
30#include <xen/interface/io/kbdif.h> 30#include <xen/interface/io/kbdif.h>
31#include <xen/xenbus.h> 31#include <xen/xenbus.h>
32#include <xen/platform_pci.h>
32 33
33struct xenkbd_info { 34struct xenkbd_info {
34 struct input_dev *kbd; 35 struct input_dev *kbd;
@@ -380,6 +381,9 @@ static int __init xenkbd_init(void)
380 if (xen_initial_domain()) 381 if (xen_initial_domain())
381 return -ENODEV; 382 return -ENODEV;
382 383
384 if (!xen_has_pv_devices())
385 return -ENODEV;
386
383 return xenbus_register_frontend(&xenkbd_driver); 387 return xenbus_register_frontend(&xenkbd_driver);
384} 388}
385 389
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index e59acb1daa23..2ab82fe75ede 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -2115,7 +2115,7 @@ static int __init netif_init(void)
2115 if (!xen_domain()) 2115 if (!xen_domain())
2116 return -ENODEV; 2116 return -ENODEV;
2117 2117
2118 if (xen_hvm_domain() && !xen_platform_pci_unplug) 2118 if (!xen_has_pv_nic_devices())
2119 return -ENODEV; 2119 return -ENODEV;
2120 2120
2121 pr_info("Initialising Xen virtual ethernet driver\n"); 2121 pr_info("Initialising Xen virtual ethernet driver\n");
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index d1cd60f51f87..179b8edc2262 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -20,6 +20,7 @@
20#include <linux/workqueue.h> 20#include <linux/workqueue.h>
21#include <linux/bitops.h> 21#include <linux/bitops.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <xen/platform_pci.h>
23 24
24#include <asm/xen/swiotlb-xen.h> 25#include <asm/xen/swiotlb-xen.h>
25#define INVALID_GRANT_REF (0) 26#define INVALID_GRANT_REF (0)
@@ -1146,6 +1147,9 @@ static int __init pcifront_init(void)
1146 if (!xen_pv_domain() || xen_initial_domain()) 1147 if (!xen_pv_domain() || xen_initial_domain())
1147 return -ENODEV; 1148 return -ENODEV;
1148 1149
1150 if (!xen_has_pv_devices())
1151 return -ENODEV;
1152
1149 pci_frontend_registrar(1 /* enable */); 1153 pci_frontend_registrar(1 /* enable */);
1150 1154
1151 return xenbus_register_frontend(&xenpci_driver); 1155 return xenbus_register_frontend(&xenpci_driver);
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index cd005c227a23..901014bbc821 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -35,6 +35,7 @@
35#include <xen/interface/io/fbif.h> 35#include <xen/interface/io/fbif.h>
36#include <xen/interface/io/protocols.h> 36#include <xen/interface/io/protocols.h>
37#include <xen/xenbus.h> 37#include <xen/xenbus.h>
38#include <xen/platform_pci.h>
38 39
39struct xenfb_info { 40struct xenfb_info {
40 unsigned char *fb; 41 unsigned char *fb;
@@ -692,13 +693,16 @@ static DEFINE_XENBUS_DRIVER(xenfb, ,
692 693
693static int __init xenfb_init(void) 694static int __init xenfb_init(void)
694{ 695{
695 if (!xen_pv_domain()) 696 if (!xen_domain())
696 return -ENODEV; 697 return -ENODEV;
697 698
698 /* Nothing to do if running in dom0. */ 699 /* Nothing to do if running in dom0. */
699 if (xen_initial_domain()) 700 if (xen_initial_domain())
700 return -ENODEV; 701 return -ENODEV;
701 702
703 if (!xen_has_pv_devices())
704 return -ENODEV;
705
702 return xenbus_register_frontend(&xenfb_driver); 706 return xenbus_register_frontend(&xenfb_driver);
703} 707}
704 708
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 12ba6db65142..38fb36e1c592 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -3,7 +3,6 @@ menu "Xen driver support"
3 3
4config XEN_BALLOON 4config XEN_BALLOON
5 bool "Xen memory balloon driver" 5 bool "Xen memory balloon driver"
6 depends on !ARM
7 default y 6 default y
8 help 7 help
9 The balloon driver allows the Xen domain to request more memory from 8 The balloon driver allows the Xen domain to request more memory from
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 14fe79d8634a..d75c811bfa56 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -2,7 +2,8 @@ ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
2obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 2obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
3endif 3endif
4obj-$(CONFIG_X86) += fallback.o 4obj-$(CONFIG_X86) += fallback.o
5obj-y += grant-table.o features.o events.o balloon.o manage.o 5obj-y += grant-table.o features.o balloon.o manage.o
6obj-y += events/
6obj-y += xenbus/ 7obj-y += xenbus/
7 8
8nostackp := $(call cc-option, -fno-stack-protector) 9nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 4c02e2b94103..37d06ea624aa 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -157,13 +157,6 @@ static struct page *balloon_retrieve(bool prefer_highmem)
157 return page; 157 return page;
158} 158}
159 159
160static struct page *balloon_first_page(void)
161{
162 if (list_empty(&ballooned_pages))
163 return NULL;
164 return list_entry(ballooned_pages.next, struct page, lru);
165}
166
167static struct page *balloon_next_page(struct page *page) 160static struct page *balloon_next_page(struct page *page)
168{ 161{
169 struct list_head *next = page->lru.next; 162 struct list_head *next = page->lru.next;
@@ -328,7 +321,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
328 if (nr_pages > ARRAY_SIZE(frame_list)) 321 if (nr_pages > ARRAY_SIZE(frame_list))
329 nr_pages = ARRAY_SIZE(frame_list); 322 nr_pages = ARRAY_SIZE(frame_list);
330 323
331 page = balloon_first_page(); 324 page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
332 for (i = 0; i < nr_pages; i++) { 325 for (i = 0; i < nr_pages; i++) {
333 if (!page) { 326 if (!page) {
334 nr_pages = i; 327 nr_pages = i;
diff --git a/drivers/xen/dbgp.c b/drivers/xen/dbgp.c
index f3ccc80a455f..8145a59fd9f6 100644
--- a/drivers/xen/dbgp.c
+++ b/drivers/xen/dbgp.c
@@ -19,7 +19,7 @@ static int xen_dbgp_op(struct usb_hcd *hcd, int op)
19 dbgp.op = op; 19 dbgp.op = op;
20 20
21#ifdef CONFIG_PCI 21#ifdef CONFIG_PCI
22 if (ctrlr->bus == &pci_bus_type) { 22 if (dev_is_pci(ctrlr)) {
23 const struct pci_dev *pdev = to_pci_dev(ctrlr); 23 const struct pci_dev *pdev = to_pci_dev(ctrlr);
24 24
25 dbgp.u.pci.seg = pci_domain_nr(pdev->bus); 25 dbgp.u.pci.seg = pci_domain_nr(pdev->bus);
diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile
new file mode 100644
index 000000000000..62be55cd981d
--- /dev/null
+++ b/drivers/xen/events/Makefile
@@ -0,0 +1,5 @@
1obj-y += events.o
2
3events-y += events_base.o
4events-y += events_2l.o
5events-y += events_fifo.o
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
new file mode 100644
index 000000000000..d7ff91757307
--- /dev/null
+++ b/drivers/xen/events/events_2l.c
@@ -0,0 +1,372 @@
1/*
2 * Xen event channels (2-level ABI)
3 *
4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
5 */
6
7#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
8
9#include <linux/linkage.h>
10#include <linux/interrupt.h>
11#include <linux/irq.h>
12#include <linux/module.h>
13
14#include <asm/sync_bitops.h>
15#include <asm/xen/hypercall.h>
16#include <asm/xen/hypervisor.h>
17
18#include <xen/xen.h>
19#include <xen/xen-ops.h>
20#include <xen/events.h>
21#include <xen/interface/xen.h>
22#include <xen/interface/event_channel.h>
23
24#include "events_internal.h"
25
26/*
27 * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
28 * careful to only use bitops which allow for this (e.g
29 * test_bit/find_first_bit and friends but not __ffs) and to pass
30 * BITS_PER_EVTCHN_WORD as the bitmask length.
31 */
32#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
33/*
34 * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
35 * array. Primarily to avoid long lines (hence the terse name).
36 */
37#define BM(x) (unsigned long *)(x)
38/* Find the first set bit in a evtchn mask */
39#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
40
41static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD],
42 cpu_evtchn_mask);
43
44static unsigned evtchn_2l_max_channels(void)
45{
46 return EVTCHN_2L_NR_CHANNELS;
47}
48
49static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu)
50{
51 clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu)));
52 set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
53}
54
55static void evtchn_2l_clear_pending(unsigned port)
56{
57 struct shared_info *s = HYPERVISOR_shared_info;
58 sync_clear_bit(port, BM(&s->evtchn_pending[0]));
59}
60
61static void evtchn_2l_set_pending(unsigned port)
62{
63 struct shared_info *s = HYPERVISOR_shared_info;
64 sync_set_bit(port, BM(&s->evtchn_pending[0]));
65}
66
67static bool evtchn_2l_is_pending(unsigned port)
68{
69 struct shared_info *s = HYPERVISOR_shared_info;
70 return sync_test_bit(port, BM(&s->evtchn_pending[0]));
71}
72
73static bool evtchn_2l_test_and_set_mask(unsigned port)
74{
75 struct shared_info *s = HYPERVISOR_shared_info;
76 return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
77}
78
79static void evtchn_2l_mask(unsigned port)
80{
81 struct shared_info *s = HYPERVISOR_shared_info;
82 sync_set_bit(port, BM(&s->evtchn_mask[0]));
83}
84
85static void evtchn_2l_unmask(unsigned port)
86{
87 struct shared_info *s = HYPERVISOR_shared_info;
88 unsigned int cpu = get_cpu();
89 int do_hypercall = 0, evtchn_pending = 0;
90
91 BUG_ON(!irqs_disabled());
92
93 if (unlikely((cpu != cpu_from_evtchn(port))))
94 do_hypercall = 1;
95 else {
96 /*
97 * Need to clear the mask before checking pending to
98 * avoid a race with an event becoming pending.
99 *
100 * EVTCHNOP_unmask will only trigger an upcall if the
101 * mask bit was set, so if a hypercall is needed
102 * remask the event.
103 */
104 sync_clear_bit(port, BM(&s->evtchn_mask[0]));
105 evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
106
107 if (unlikely(evtchn_pending && xen_hvm_domain())) {
108 sync_set_bit(port, BM(&s->evtchn_mask[0]));
109 do_hypercall = 1;
110 }
111 }
112
113 /* Slow path (hypercall) if this is a non-local port or if this is
114 * an hvm domain and an event is pending (hvm domains don't have
115 * their own implementation of irq_enable). */
116 if (do_hypercall) {
117 struct evtchn_unmask unmask = { .port = port };
118 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
119 } else {
120 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
121
122 /*
123 * The following is basically the equivalent of
124 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
125 * the interrupt edge' if the channel is masked.
126 */
127 if (evtchn_pending &&
128 !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
129 BM(&vcpu_info->evtchn_pending_sel)))
130 vcpu_info->evtchn_upcall_pending = 1;
131 }
132
133 put_cpu();
134}
135
136static DEFINE_PER_CPU(unsigned int, current_word_idx);
137static DEFINE_PER_CPU(unsigned int, current_bit_idx);
138
139/*
140 * Mask out the i least significant bits of w
141 */
142#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
143
144static inline xen_ulong_t active_evtchns(unsigned int cpu,
145 struct shared_info *sh,
146 unsigned int idx)
147{
148 return sh->evtchn_pending[idx] &
149 per_cpu(cpu_evtchn_mask, cpu)[idx] &
150 ~sh->evtchn_mask[idx];
151}
152
153/*
154 * Search the CPU's pending events bitmasks. For each one found, map
155 * the event number to an irq, and feed it into do_IRQ() for handling.
156 *
157 * Xen uses a two-level bitmap to speed searching. The first level is
158 * a bitset of words which contain pending event bits. The second
159 * level is a bitset of pending events themselves.
160 */
161static void evtchn_2l_handle_events(unsigned cpu)
162{
163 int irq;
164 xen_ulong_t pending_words;
165 xen_ulong_t pending_bits;
166 int start_word_idx, start_bit_idx;
167 int word_idx, bit_idx;
168 int i;
169 struct irq_desc *desc;
170 struct shared_info *s = HYPERVISOR_shared_info;
171 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
172
173 /* Timer interrupt has highest priority. */
174 irq = irq_from_virq(cpu, VIRQ_TIMER);
175 if (irq != -1) {
176 unsigned int evtchn = evtchn_from_irq(irq);
177 word_idx = evtchn / BITS_PER_LONG;
178 bit_idx = evtchn % BITS_PER_LONG;
179 if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) {
180 desc = irq_to_desc(irq);
181 if (desc)
182 generic_handle_irq_desc(irq, desc);
183 }
184 }
185
186 /*
187 * Master flag must be cleared /before/ clearing
188 * selector flag. xchg_xen_ulong must contain an
189 * appropriate barrier.
190 */
191 pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
192
193 start_word_idx = __this_cpu_read(current_word_idx);
194 start_bit_idx = __this_cpu_read(current_bit_idx);
195
196 word_idx = start_word_idx;
197
198 for (i = 0; pending_words != 0; i++) {
199 xen_ulong_t words;
200
201 words = MASK_LSBS(pending_words, word_idx);
202
203 /*
204 * If we masked out all events, wrap to beginning.
205 */
206 if (words == 0) {
207 word_idx = 0;
208 bit_idx = 0;
209 continue;
210 }
211 word_idx = EVTCHN_FIRST_BIT(words);
212
213 pending_bits = active_evtchns(cpu, s, word_idx);
214 bit_idx = 0; /* usually scan entire word from start */
215 /*
216 * We scan the starting word in two parts.
217 *
218 * 1st time: start in the middle, scanning the
219 * upper bits.
220 *
221 * 2nd time: scan the whole word (not just the
222 * parts skipped in the first pass) -- if an
223 * event in the previously scanned bits is
224 * pending again it would just be scanned on
225 * the next loop anyway.
226 */
227 if (word_idx == start_word_idx) {
228 if (i == 0)
229 bit_idx = start_bit_idx;
230 }
231
232 do {
233 xen_ulong_t bits;
234 int port;
235
236 bits = MASK_LSBS(pending_bits, bit_idx);
237
238 /* If we masked out all events, move on. */
239 if (bits == 0)
240 break;
241
242 bit_idx = EVTCHN_FIRST_BIT(bits);
243
244 /* Process port. */
245 port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
246 irq = get_evtchn_to_irq(port);
247
248 if (irq != -1) {
249 desc = irq_to_desc(irq);
250 if (desc)
251 generic_handle_irq_desc(irq, desc);
252 }
253
254 bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
255
256 /* Next caller starts at last processed + 1 */
257 __this_cpu_write(current_word_idx,
258 bit_idx ? word_idx :
259 (word_idx+1) % BITS_PER_EVTCHN_WORD);
260 __this_cpu_write(current_bit_idx, bit_idx);
261 } while (bit_idx != 0);
262
263 /* Scan start_l1i twice; all others once. */
264 if ((word_idx != start_word_idx) || (i != 0))
265 pending_words &= ~(1UL << word_idx);
266
267 word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
268 }
269}
270
271irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
272{
273 struct shared_info *sh = HYPERVISOR_shared_info;
274 int cpu = smp_processor_id();
275 xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
276 int i;
277 unsigned long flags;
278 static DEFINE_SPINLOCK(debug_lock);
279 struct vcpu_info *v;
280
281 spin_lock_irqsave(&debug_lock, flags);
282
283 printk("\nvcpu %d\n ", cpu);
284
285 for_each_online_cpu(i) {
286 int pending;
287 v = per_cpu(xen_vcpu, i);
288 pending = (get_irq_regs() && i == cpu)
289 ? xen_irqs_disabled(get_irq_regs())
290 : v->evtchn_upcall_mask;
291 printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
292 pending, v->evtchn_upcall_pending,
293 (int)(sizeof(v->evtchn_pending_sel)*2),
294 v->evtchn_pending_sel);
295 }
296 v = per_cpu(xen_vcpu, cpu);
297
298 printk("\npending:\n ");
299 for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
300 printk("%0*"PRI_xen_ulong"%s",
301 (int)sizeof(sh->evtchn_pending[0])*2,
302 sh->evtchn_pending[i],
303 i % 8 == 0 ? "\n " : " ");
304 printk("\nglobal mask:\n ");
305 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
306 printk("%0*"PRI_xen_ulong"%s",
307 (int)(sizeof(sh->evtchn_mask[0])*2),
308 sh->evtchn_mask[i],
309 i % 8 == 0 ? "\n " : " ");
310
311 printk("\nglobally unmasked:\n ");
312 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
313 printk("%0*"PRI_xen_ulong"%s",
314 (int)(sizeof(sh->evtchn_mask[0])*2),
315 sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
316 i % 8 == 0 ? "\n " : " ");
317
318 printk("\nlocal cpu%d mask:\n ", cpu);
319 for (i = (EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
320 printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
321 cpu_evtchn[i],
322 i % 8 == 0 ? "\n " : " ");
323
324 printk("\nlocally unmasked:\n ");
325 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
326 xen_ulong_t pending = sh->evtchn_pending[i]
327 & ~sh->evtchn_mask[i]
328 & cpu_evtchn[i];
329 printk("%0*"PRI_xen_ulong"%s",
330 (int)(sizeof(sh->evtchn_mask[0])*2),
331 pending, i % 8 == 0 ? "\n " : " ");
332 }
333
334 printk("\npending list:\n");
335 for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
336 if (sync_test_bit(i, BM(sh->evtchn_pending))) {
337 int word_idx = i / BITS_PER_EVTCHN_WORD;
338 printk(" %d: event %d -> irq %d%s%s%s\n",
339 cpu_from_evtchn(i), i,
340 get_evtchn_to_irq(i),
341 sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
342 ? "" : " l2-clear",
343 !sync_test_bit(i, BM(sh->evtchn_mask))
344 ? "" : " globally-masked",
345 sync_test_bit(i, BM(cpu_evtchn))
346 ? "" : " locally-masked");
347 }
348 }
349
350 spin_unlock_irqrestore(&debug_lock, flags);
351
352 return IRQ_HANDLED;
353}
354
355static const struct evtchn_ops evtchn_ops_2l = {
356 .max_channels = evtchn_2l_max_channels,
357 .nr_channels = evtchn_2l_max_channels,
358 .bind_to_cpu = evtchn_2l_bind_to_cpu,
359 .clear_pending = evtchn_2l_clear_pending,
360 .set_pending = evtchn_2l_set_pending,
361 .is_pending = evtchn_2l_is_pending,
362 .test_and_set_mask = evtchn_2l_test_and_set_mask,
363 .mask = evtchn_2l_mask,
364 .unmask = evtchn_2l_unmask,
365 .handle_events = evtchn_2l_handle_events,
366};
367
368void __init xen_evtchn_2l_init(void)
369{
370 pr_info("Using 2-level ABI\n");
371 evtchn_ops = &evtchn_ops_2l;
372}
diff --git a/drivers/xen/events.c b/drivers/xen/events/events_base.c
index 4035e833ea26..4672e003c0ad 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events/events_base.c
@@ -59,6 +59,10 @@
59#include <xen/interface/vcpu.h> 59#include <xen/interface/vcpu.h>
60#include <asm/hw_irq.h> 60#include <asm/hw_irq.h>
61 61
62#include "events_internal.h"
63
64const struct evtchn_ops *evtchn_ops;
65
62/* 66/*
63 * This lock protects updates to the following mapping and reference-count 67 * This lock protects updates to the following mapping and reference-count
64 * arrays. The lock does not need to be acquired to read the mapping tables. 68 * arrays. The lock does not need to be acquired to read the mapping tables.
@@ -73,71 +77,15 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
73/* IRQ <-> IPI mapping */ 77/* IRQ <-> IPI mapping */
74static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; 78static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
75 79
76/* Interrupt types. */ 80int **evtchn_to_irq;
77enum xen_irq_type {
78 IRQT_UNBOUND = 0,
79 IRQT_PIRQ,
80 IRQT_VIRQ,
81 IRQT_IPI,
82 IRQT_EVTCHN
83};
84
85/*
86 * Packed IRQ information:
87 * type - enum xen_irq_type
88 * event channel - irq->event channel mapping
89 * cpu - cpu this event channel is bound to
90 * index - type-specific information:
91 * PIRQ - physical IRQ, GSI, flags, and owner domain
92 * VIRQ - virq number
93 * IPI - IPI vector
94 * EVTCHN -
95 */
96struct irq_info {
97 struct list_head list;
98 int refcnt;
99 enum xen_irq_type type; /* type */
100 unsigned irq;
101 unsigned short evtchn; /* event channel */
102 unsigned short cpu; /* cpu bound */
103
104 union {
105 unsigned short virq;
106 enum ipi_vector ipi;
107 struct {
108 unsigned short pirq;
109 unsigned short gsi;
110 unsigned char flags;
111 uint16_t domid;
112 } pirq;
113 } u;
114};
115#define PIRQ_NEEDS_EOI (1 << 0)
116#define PIRQ_SHAREABLE (1 << 1)
117
118static int *evtchn_to_irq;
119#ifdef CONFIG_X86 81#ifdef CONFIG_X86
120static unsigned long *pirq_eoi_map; 82static unsigned long *pirq_eoi_map;
121#endif 83#endif
122static bool (*pirq_needs_eoi)(unsigned irq); 84static bool (*pirq_needs_eoi)(unsigned irq);
123 85
124/* 86#define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
125 * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be 87#define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
126 * careful to only use bitops which allow for this (e.g 88#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
127 * test_bit/find_first_bit and friends but not __ffs) and to pass
128 * BITS_PER_EVTCHN_WORD as the bitmask length.
129 */
130#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
131/*
132 * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
133 * array. Primarily to avoid long lines (hence the terse name).
134 */
135#define BM(x) (unsigned long *)(x)
136/* Find the first set bit in a evtchn mask */
137#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
138
139static DEFINE_PER_CPU(xen_ulong_t [NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD],
140 cpu_evtchn_mask);
141 89
142/* Xen will never allocate port zero for any purpose. */ 90/* Xen will never allocate port zero for any purpose. */
143#define VALID_EVTCHN(chn) ((chn) != 0) 91#define VALID_EVTCHN(chn) ((chn) != 0)
@@ -148,19 +96,75 @@ static struct irq_chip xen_pirq_chip;
148static void enable_dynirq(struct irq_data *data); 96static void enable_dynirq(struct irq_data *data);
149static void disable_dynirq(struct irq_data *data); 97static void disable_dynirq(struct irq_data *data);
150 98
99static void clear_evtchn_to_irq_row(unsigned row)
100{
101 unsigned col;
102
103 for (col = 0; col < EVTCHN_PER_ROW; col++)
104 evtchn_to_irq[row][col] = -1;
105}
106
107static void clear_evtchn_to_irq_all(void)
108{
109 unsigned row;
110
111 for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
112 if (evtchn_to_irq[row] == NULL)
113 continue;
114 clear_evtchn_to_irq_row(row);
115 }
116}
117
118static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
119{
120 unsigned row;
121 unsigned col;
122
123 if (evtchn >= xen_evtchn_max_channels())
124 return -EINVAL;
125
126 row = EVTCHN_ROW(evtchn);
127 col = EVTCHN_COL(evtchn);
128
129 if (evtchn_to_irq[row] == NULL) {
130 /* Unallocated irq entries return -1 anyway */
131 if (irq == -1)
132 return 0;
133
134 evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
135 if (evtchn_to_irq[row] == NULL)
136 return -ENOMEM;
137
138 clear_evtchn_to_irq_row(row);
139 }
140
141 evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq;
142 return 0;
143}
144
145int get_evtchn_to_irq(unsigned evtchn)
146{
147 if (evtchn >= xen_evtchn_max_channels())
148 return -1;
149 if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
150 return -1;
151 return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
152}
153
151/* Get info for IRQ */ 154/* Get info for IRQ */
152static struct irq_info *info_for_irq(unsigned irq) 155struct irq_info *info_for_irq(unsigned irq)
153{ 156{
154 return irq_get_handler_data(irq); 157 return irq_get_handler_data(irq);
155} 158}
156 159
157/* Constructors for packed IRQ information. */ 160/* Constructors for packed IRQ information. */
158static void xen_irq_info_common_init(struct irq_info *info, 161static int xen_irq_info_common_setup(struct irq_info *info,
159 unsigned irq, 162 unsigned irq,
160 enum xen_irq_type type, 163 enum xen_irq_type type,
161 unsigned short evtchn, 164 unsigned evtchn,
162 unsigned short cpu) 165 unsigned short cpu)
163{ 166{
167 int ret;
164 168
165 BUG_ON(info->type != IRQT_UNBOUND && info->type != type); 169 BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
166 170
@@ -169,68 +173,78 @@ static void xen_irq_info_common_init(struct irq_info *info,
169 info->evtchn = evtchn; 173 info->evtchn = evtchn;
170 info->cpu = cpu; 174 info->cpu = cpu;
171 175
172 evtchn_to_irq[evtchn] = irq; 176 ret = set_evtchn_to_irq(evtchn, irq);
177 if (ret < 0)
178 return ret;
173 179
174 irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); 180 irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
181
182 return xen_evtchn_port_setup(info);
175} 183}
176 184
177static void xen_irq_info_evtchn_init(unsigned irq, 185static int xen_irq_info_evtchn_setup(unsigned irq,
178 unsigned short evtchn) 186 unsigned evtchn)
179{ 187{
180 struct irq_info *info = info_for_irq(irq); 188 struct irq_info *info = info_for_irq(irq);
181 189
182 xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0); 190 return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
183} 191}
184 192
185static void xen_irq_info_ipi_init(unsigned cpu, 193static int xen_irq_info_ipi_setup(unsigned cpu,
186 unsigned irq, 194 unsigned irq,
187 unsigned short evtchn, 195 unsigned evtchn,
188 enum ipi_vector ipi) 196 enum ipi_vector ipi)
189{ 197{
190 struct irq_info *info = info_for_irq(irq); 198 struct irq_info *info = info_for_irq(irq);
191 199
192 xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
193
194 info->u.ipi = ipi; 200 info->u.ipi = ipi;
195 201
196 per_cpu(ipi_to_irq, cpu)[ipi] = irq; 202 per_cpu(ipi_to_irq, cpu)[ipi] = irq;
203
204 return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
197} 205}
198 206
199static void xen_irq_info_virq_init(unsigned cpu, 207static int xen_irq_info_virq_setup(unsigned cpu,
200 unsigned irq, 208 unsigned irq,
201 unsigned short evtchn, 209 unsigned evtchn,
202 unsigned short virq) 210 unsigned virq)
203{ 211{
204 struct irq_info *info = info_for_irq(irq); 212 struct irq_info *info = info_for_irq(irq);
205 213
206 xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
207
208 info->u.virq = virq; 214 info->u.virq = virq;
209 215
210 per_cpu(virq_to_irq, cpu)[virq] = irq; 216 per_cpu(virq_to_irq, cpu)[virq] = irq;
217
218 return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
211} 219}
212 220
213static void xen_irq_info_pirq_init(unsigned irq, 221static int xen_irq_info_pirq_setup(unsigned irq,
214 unsigned short evtchn, 222 unsigned evtchn,
215 unsigned short pirq, 223 unsigned pirq,
216 unsigned short gsi, 224 unsigned gsi,
217 uint16_t domid, 225 uint16_t domid,
218 unsigned char flags) 226 unsigned char flags)
219{ 227{
220 struct irq_info *info = info_for_irq(irq); 228 struct irq_info *info = info_for_irq(irq);
221 229
222 xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
223
224 info->u.pirq.pirq = pirq; 230 info->u.pirq.pirq = pirq;
225 info->u.pirq.gsi = gsi; 231 info->u.pirq.gsi = gsi;
226 info->u.pirq.domid = domid; 232 info->u.pirq.domid = domid;
227 info->u.pirq.flags = flags; 233 info->u.pirq.flags = flags;
234
235 return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
236}
237
238static void xen_irq_info_cleanup(struct irq_info *info)
239{
240 set_evtchn_to_irq(info->evtchn, -1);
241 info->evtchn = 0;
228} 242}
229 243
230/* 244/*
231 * Accessors for packed IRQ information. 245 * Accessors for packed IRQ information.
232 */ 246 */
233static unsigned int evtchn_from_irq(unsigned irq) 247unsigned int evtchn_from_irq(unsigned irq)
234{ 248{
235 if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq))) 249 if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
236 return 0; 250 return 0;
@@ -240,10 +254,15 @@ static unsigned int evtchn_from_irq(unsigned irq)
240 254
241unsigned irq_from_evtchn(unsigned int evtchn) 255unsigned irq_from_evtchn(unsigned int evtchn)
242{ 256{
243 return evtchn_to_irq[evtchn]; 257 return get_evtchn_to_irq(evtchn);
244} 258}
245EXPORT_SYMBOL_GPL(irq_from_evtchn); 259EXPORT_SYMBOL_GPL(irq_from_evtchn);
246 260
261int irq_from_virq(unsigned int cpu, unsigned int virq)
262{
263 return per_cpu(virq_to_irq, cpu)[virq];
264}
265
247static enum ipi_vector ipi_from_irq(unsigned irq) 266static enum ipi_vector ipi_from_irq(unsigned irq)
248{ 267{
249 struct irq_info *info = info_for_irq(irq); 268 struct irq_info *info = info_for_irq(irq);
@@ -279,14 +298,14 @@ static enum xen_irq_type type_from_irq(unsigned irq)
279 return info_for_irq(irq)->type; 298 return info_for_irq(irq)->type;
280} 299}
281 300
282static unsigned cpu_from_irq(unsigned irq) 301unsigned cpu_from_irq(unsigned irq)
283{ 302{
284 return info_for_irq(irq)->cpu; 303 return info_for_irq(irq)->cpu;
285} 304}
286 305
287static unsigned int cpu_from_evtchn(unsigned int evtchn) 306unsigned int cpu_from_evtchn(unsigned int evtchn)
288{ 307{
289 int irq = evtchn_to_irq[evtchn]; 308 int irq = get_evtchn_to_irq(evtchn);
290 unsigned ret = 0; 309 unsigned ret = 0;
291 310
292 if (irq != -1) 311 if (irq != -1)
@@ -310,67 +329,29 @@ static bool pirq_needs_eoi_flag(unsigned irq)
310 return info->u.pirq.flags & PIRQ_NEEDS_EOI; 329 return info->u.pirq.flags & PIRQ_NEEDS_EOI;
311} 330}
312 331
313static inline xen_ulong_t active_evtchns(unsigned int cpu,
314 struct shared_info *sh,
315 unsigned int idx)
316{
317 return sh->evtchn_pending[idx] &
318 per_cpu(cpu_evtchn_mask, cpu)[idx] &
319 ~sh->evtchn_mask[idx];
320}
321
322static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) 332static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
323{ 333{
324 int irq = evtchn_to_irq[chn]; 334 int irq = get_evtchn_to_irq(chn);
335 struct irq_info *info = info_for_irq(irq);
325 336
326 BUG_ON(irq == -1); 337 BUG_ON(irq == -1);
327#ifdef CONFIG_SMP 338#ifdef CONFIG_SMP
328 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu)); 339 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
329#endif 340#endif
330 341
331 clear_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu_from_irq(irq)))); 342 xen_evtchn_port_bind_to_cpu(info, cpu);
332 set_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu)));
333
334 info_for_irq(irq)->cpu = cpu;
335}
336
337static void init_evtchn_cpu_bindings(void)
338{
339 int i;
340#ifdef CONFIG_SMP
341 struct irq_info *info;
342
343 /* By default all event channels notify CPU#0. */
344 list_for_each_entry(info, &xen_irq_list_head, list) {
345 struct irq_desc *desc = irq_to_desc(info->irq);
346 cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
347 }
348#endif
349
350 for_each_possible_cpu(i)
351 memset(per_cpu(cpu_evtchn_mask, i),
352 (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8);
353}
354 343
355static inline void clear_evtchn(int port) 344 info->cpu = cpu;
356{
357 struct shared_info *s = HYPERVISOR_shared_info;
358 sync_clear_bit(port, BM(&s->evtchn_pending[0]));
359} 345}
360 346
361static inline void set_evtchn(int port) 347static void xen_evtchn_mask_all(void)
362{ 348{
363 struct shared_info *s = HYPERVISOR_shared_info; 349 unsigned int evtchn;
364 sync_set_bit(port, BM(&s->evtchn_pending[0]));
365}
366 350
367static inline int test_evtchn(int port) 351 for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
368{ 352 mask_evtchn(evtchn);
369 struct shared_info *s = HYPERVISOR_shared_info;
370 return sync_test_bit(port, BM(&s->evtchn_pending[0]));
371} 353}
372 354
373
374/** 355/**
375 * notify_remote_via_irq - send event to remote end of event channel via irq 356 * notify_remote_via_irq - send event to remote end of event channel via irq
376 * @irq: irq of event channel to send event to 357 * @irq: irq of event channel to send event to
@@ -388,63 +369,6 @@ void notify_remote_via_irq(int irq)
388} 369}
389EXPORT_SYMBOL_GPL(notify_remote_via_irq); 370EXPORT_SYMBOL_GPL(notify_remote_via_irq);
390 371
391static void mask_evtchn(int port)
392{
393 struct shared_info *s = HYPERVISOR_shared_info;
394 sync_set_bit(port, BM(&s->evtchn_mask[0]));
395}
396
397static void unmask_evtchn(int port)
398{
399 struct shared_info *s = HYPERVISOR_shared_info;
400 unsigned int cpu = get_cpu();
401 int do_hypercall = 0, evtchn_pending = 0;
402
403 BUG_ON(!irqs_disabled());
404
405 if (unlikely((cpu != cpu_from_evtchn(port))))
406 do_hypercall = 1;
407 else {
408 /*
409 * Need to clear the mask before checking pending to
410 * avoid a race with an event becoming pending.
411 *
412 * EVTCHNOP_unmask will only trigger an upcall if the
413 * mask bit was set, so if a hypercall is needed
414 * remask the event.
415 */
416 sync_clear_bit(port, BM(&s->evtchn_mask[0]));
417 evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
418
419 if (unlikely(evtchn_pending && xen_hvm_domain())) {
420 sync_set_bit(port, BM(&s->evtchn_mask[0]));
421 do_hypercall = 1;
422 }
423 }
424
425 /* Slow path (hypercall) if this is a non-local port or if this is
426 * an hvm domain and an event is pending (hvm domains don't have
427 * their own implementation of irq_enable). */
428 if (do_hypercall) {
429 struct evtchn_unmask unmask = { .port = port };
430 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
431 } else {
432 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
433
434 /*
435 * The following is basically the equivalent of
436 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
437 * the interrupt edge' if the channel is masked.
438 */
439 if (evtchn_pending &&
440 !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
441 BM(&vcpu_info->evtchn_pending_sel)))
442 vcpu_info->evtchn_upcall_pending = 1;
443 }
444
445 put_cpu();
446}
447
448static void xen_irq_init(unsigned irq) 372static void xen_irq_init(unsigned irq)
449{ 373{
450 struct irq_info *info; 374 struct irq_info *info;
@@ -538,6 +462,18 @@ static void xen_free_irq(unsigned irq)
538 irq_free_desc(irq); 462 irq_free_desc(irq);
539} 463}
540 464
465static void xen_evtchn_close(unsigned int port)
466{
467 struct evtchn_close close;
468
469 close.port = port;
470 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
471 BUG();
472
473 /* Closed ports are implicitly re-bound to VCPU0. */
474 bind_evtchn_to_cpu(port, 0);
475}
476
541static void pirq_query_unmask(int irq) 477static void pirq_query_unmask(int irq)
542{ 478{
543 struct physdev_irq_status_query irq_status; 479 struct physdev_irq_status_query irq_status;
@@ -610,7 +546,13 @@ static unsigned int __startup_pirq(unsigned int irq)
610 546
611 pirq_query_unmask(irq); 547 pirq_query_unmask(irq);
612 548
613 evtchn_to_irq[evtchn] = irq; 549 rc = set_evtchn_to_irq(evtchn, irq);
550 if (rc != 0) {
551 pr_err("irq%d: Failed to set port to irq mapping (%d)\n",
552 irq, rc);
553 xen_evtchn_close(evtchn);
554 return 0;
555 }
614 bind_evtchn_to_cpu(evtchn, 0); 556 bind_evtchn_to_cpu(evtchn, 0);
615 info->evtchn = evtchn; 557 info->evtchn = evtchn;
616 558
@@ -628,10 +570,9 @@ static unsigned int startup_pirq(struct irq_data *data)
628 570
629static void shutdown_pirq(struct irq_data *data) 571static void shutdown_pirq(struct irq_data *data)
630{ 572{
631 struct evtchn_close close;
632 unsigned int irq = data->irq; 573 unsigned int irq = data->irq;
633 struct irq_info *info = info_for_irq(irq); 574 struct irq_info *info = info_for_irq(irq);
634 int evtchn = evtchn_from_irq(irq); 575 unsigned evtchn = evtchn_from_irq(irq);
635 576
636 BUG_ON(info->type != IRQT_PIRQ); 577 BUG_ON(info->type != IRQT_PIRQ);
637 578
@@ -639,14 +580,8 @@ static void shutdown_pirq(struct irq_data *data)
639 return; 580 return;
640 581
641 mask_evtchn(evtchn); 582 mask_evtchn(evtchn);
642 583 xen_evtchn_close(evtchn);
643 close.port = evtchn; 584 xen_irq_info_cleanup(info);
644 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
645 BUG();
646
647 bind_evtchn_to_cpu(evtchn, 0);
648 evtchn_to_irq[evtchn] = -1;
649 info->evtchn = 0;
650} 585}
651 586
652static void enable_pirq(struct irq_data *data) 587static void enable_pirq(struct irq_data *data)
@@ -675,6 +610,41 @@ int xen_irq_from_gsi(unsigned gsi)
675} 610}
676EXPORT_SYMBOL_GPL(xen_irq_from_gsi); 611EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
677 612
613static void __unbind_from_irq(unsigned int irq)
614{
615 int evtchn = evtchn_from_irq(irq);
616 struct irq_info *info = irq_get_handler_data(irq);
617
618 if (info->refcnt > 0) {
619 info->refcnt--;
620 if (info->refcnt != 0)
621 return;
622 }
623
624 if (VALID_EVTCHN(evtchn)) {
625 unsigned int cpu = cpu_from_irq(irq);
626
627 xen_evtchn_close(evtchn);
628
629 switch (type_from_irq(irq)) {
630 case IRQT_VIRQ:
631 per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
632 break;
633 case IRQT_IPI:
634 per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
635 break;
636 default:
637 break;
638 }
639
640 xen_irq_info_cleanup(info);
641 }
642
643 BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
644
645 xen_free_irq(irq);
646}
647
678/* 648/*
679 * Do not make any assumptions regarding the relationship between the 649 * Do not make any assumptions regarding the relationship between the
680 * IRQ number returned here and the Xen pirq argument. 650 * IRQ number returned here and the Xen pirq argument.
@@ -690,6 +660,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
690{ 660{
691 int irq = -1; 661 int irq = -1;
692 struct physdev_irq irq_op; 662 struct physdev_irq irq_op;
663 int ret;
693 664
694 mutex_lock(&irq_mapping_update_lock); 665 mutex_lock(&irq_mapping_update_lock);
695 666
@@ -717,8 +688,13 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
717 goto out; 688 goto out;
718 } 689 }
719 690
720 xen_irq_info_pirq_init(irq, 0, pirq, gsi, DOMID_SELF, 691 ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
721 shareable ? PIRQ_SHAREABLE : 0); 692 shareable ? PIRQ_SHAREABLE : 0);
693 if (ret < 0) {
694 __unbind_from_irq(irq);
695 irq = ret;
696 goto out;
697 }
722 698
723 pirq_query_unmask(irq); 699 pirq_query_unmask(irq);
724 /* We try to use the handler with the appropriate semantic for the 700 /* We try to use the handler with the appropriate semantic for the
@@ -778,7 +754,9 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
778 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, 754 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
779 name); 755 name);
780 756
781 xen_irq_info_pirq_init(irq, 0, pirq, 0, domid, 0); 757 ret = xen_irq_info_pirq_setup(irq, 0, pirq, 0, domid, 0);
758 if (ret < 0)
759 goto error_irq;
782 ret = irq_set_msi_desc(irq, msidesc); 760 ret = irq_set_msi_desc(irq, msidesc);
783 if (ret < 0) 761 if (ret < 0)
784 goto error_irq; 762 goto error_irq;
@@ -786,8 +764,8 @@ out:
786 mutex_unlock(&irq_mapping_update_lock); 764 mutex_unlock(&irq_mapping_update_lock);
787 return irq; 765 return irq;
788error_irq: 766error_irq:
767 __unbind_from_irq(irq);
789 mutex_unlock(&irq_mapping_update_lock); 768 mutex_unlock(&irq_mapping_update_lock);
790 xen_free_irq(irq);
791 return ret; 769 return ret;
792} 770}
793#endif 771#endif
@@ -857,13 +835,18 @@ int xen_pirq_from_irq(unsigned irq)
857 return pirq_from_irq(irq); 835 return pirq_from_irq(irq);
858} 836}
859EXPORT_SYMBOL_GPL(xen_pirq_from_irq); 837EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
838
860int bind_evtchn_to_irq(unsigned int evtchn) 839int bind_evtchn_to_irq(unsigned int evtchn)
861{ 840{
862 int irq; 841 int irq;
842 int ret;
843
844 if (evtchn >= xen_evtchn_max_channels())
845 return -ENOMEM;
863 846
864 mutex_lock(&irq_mapping_update_lock); 847 mutex_lock(&irq_mapping_update_lock);
865 848
866 irq = evtchn_to_irq[evtchn]; 849 irq = get_evtchn_to_irq(evtchn);
867 850
868 if (irq == -1) { 851 if (irq == -1) {
869 irq = xen_allocate_irq_dynamic(); 852 irq = xen_allocate_irq_dynamic();
@@ -873,7 +856,12 @@ int bind_evtchn_to_irq(unsigned int evtchn)
873 irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, 856 irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
874 handle_edge_irq, "event"); 857 handle_edge_irq, "event");
875 858
876 xen_irq_info_evtchn_init(irq, evtchn); 859 ret = xen_irq_info_evtchn_setup(irq, evtchn);
860 if (ret < 0) {
861 __unbind_from_irq(irq);
862 irq = ret;
863 goto out;
864 }
877 } else { 865 } else {
878 struct irq_info *info = info_for_irq(irq); 866 struct irq_info *info = info_for_irq(irq);
879 WARN_ON(info == NULL || info->type != IRQT_EVTCHN); 867 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@ -890,6 +878,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
890{ 878{
891 struct evtchn_bind_ipi bind_ipi; 879 struct evtchn_bind_ipi bind_ipi;
892 int evtchn, irq; 880 int evtchn, irq;
881 int ret;
893 882
894 mutex_lock(&irq_mapping_update_lock); 883 mutex_lock(&irq_mapping_update_lock);
895 884
@@ -909,8 +898,12 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
909 BUG(); 898 BUG();
910 evtchn = bind_ipi.port; 899 evtchn = bind_ipi.port;
911 900
912 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); 901 ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
913 902 if (ret < 0) {
903 __unbind_from_irq(irq);
904 irq = ret;
905 goto out;
906 }
914 bind_evtchn_to_cpu(evtchn, cpu); 907 bind_evtchn_to_cpu(evtchn, cpu);
915 } else { 908 } else {
916 struct irq_info *info = info_for_irq(irq); 909 struct irq_info *info = info_for_irq(irq);
@@ -943,7 +936,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
943 int port, rc = -ENOENT; 936 int port, rc = -ENOENT;
944 937
945 memset(&status, 0, sizeof(status)); 938 memset(&status, 0, sizeof(status));
946 for (port = 0; port <= NR_EVENT_CHANNELS; port++) { 939 for (port = 0; port < xen_evtchn_max_channels(); port++) {
947 status.dom = DOMID_SELF; 940 status.dom = DOMID_SELF;
948 status.port = port; 941 status.port = port;
949 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); 942 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
@@ -959,6 +952,19 @@ static int find_virq(unsigned int virq, unsigned int cpu)
959 return rc; 952 return rc;
960} 953}
961 954
955/**
956 * xen_evtchn_nr_channels - number of usable event channel ports
957 *
958 * This may be less than the maximum supported by the current
959 * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
960 * supported.
961 */
962unsigned xen_evtchn_nr_channels(void)
963{
964 return evtchn_ops->nr_channels();
965}
966EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
967
962int bind_virq_to_irq(unsigned int virq, unsigned int cpu) 968int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
963{ 969{
964 struct evtchn_bind_virq bind_virq; 970 struct evtchn_bind_virq bind_virq;
@@ -989,7 +995,12 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
989 evtchn = ret; 995 evtchn = ret;
990 } 996 }
991 997
992 xen_irq_info_virq_init(cpu, irq, evtchn, virq); 998 ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
999 if (ret < 0) {
1000 __unbind_from_irq(irq);
1001 irq = ret;
1002 goto out;
1003 }
993 1004
994 bind_evtchn_to_cpu(evtchn, cpu); 1005 bind_evtchn_to_cpu(evtchn, cpu);
995 } else { 1006 } else {
@@ -1005,50 +1016,8 @@ out:
1005 1016
1006static void unbind_from_irq(unsigned int irq) 1017static void unbind_from_irq(unsigned int irq)
1007{ 1018{
1008 struct evtchn_close close;
1009 int evtchn = evtchn_from_irq(irq);
1010 struct irq_info *info = irq_get_handler_data(irq);
1011
1012 if (WARN_ON(!info))
1013 return;
1014
1015 mutex_lock(&irq_mapping_update_lock); 1019 mutex_lock(&irq_mapping_update_lock);
1016 1020 __unbind_from_irq(irq);
1017 if (info->refcnt > 0) {
1018 info->refcnt--;
1019 if (info->refcnt != 0)
1020 goto done;
1021 }
1022
1023 if (VALID_EVTCHN(evtchn)) {
1024 close.port = evtchn;
1025 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
1026 BUG();
1027
1028 switch (type_from_irq(irq)) {
1029 case IRQT_VIRQ:
1030 per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
1031 [virq_from_irq(irq)] = -1;
1032 break;
1033 case IRQT_IPI:
1034 per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
1035 [ipi_from_irq(irq)] = -1;
1036 break;
1037 default:
1038 break;
1039 }
1040
1041 /* Closed ports are implicitly re-bound to VCPU0. */
1042 bind_evtchn_to_cpu(evtchn, 0);
1043
1044 evtchn_to_irq[evtchn] = -1;
1045 }
1046
1047 BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
1048
1049 xen_free_irq(irq);
1050
1051 done:
1052 mutex_unlock(&irq_mapping_update_lock); 1021 mutex_unlock(&irq_mapping_update_lock);
1053} 1022}
1054 1023
@@ -1148,9 +1117,26 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1148} 1117}
1149EXPORT_SYMBOL_GPL(unbind_from_irqhandler); 1118EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1150 1119
1120/**
1121 * xen_set_irq_priority() - set an event channel priority.
1122 * @irq:irq bound to an event channel.
1123 * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1124 */
1125int xen_set_irq_priority(unsigned irq, unsigned priority)
1126{
1127 struct evtchn_set_priority set_priority;
1128
1129 set_priority.port = evtchn_from_irq(irq);
1130 set_priority.priority = priority;
1131
1132 return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1133 &set_priority);
1134}
1135EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1136
1151int evtchn_make_refcounted(unsigned int evtchn) 1137int evtchn_make_refcounted(unsigned int evtchn)
1152{ 1138{
1153 int irq = evtchn_to_irq[evtchn]; 1139 int irq = get_evtchn_to_irq(evtchn);
1154 struct irq_info *info; 1140 struct irq_info *info;
1155 1141
1156 if (irq == -1) 1142 if (irq == -1)
@@ -1175,12 +1161,12 @@ int evtchn_get(unsigned int evtchn)
1175 struct irq_info *info; 1161 struct irq_info *info;
1176 int err = -ENOENT; 1162 int err = -ENOENT;
1177 1163
1178 if (evtchn >= NR_EVENT_CHANNELS) 1164 if (evtchn >= xen_evtchn_max_channels())
1179 return -EINVAL; 1165 return -EINVAL;
1180 1166
1181 mutex_lock(&irq_mapping_update_lock); 1167 mutex_lock(&irq_mapping_update_lock);
1182 1168
1183 irq = evtchn_to_irq[evtchn]; 1169 irq = get_evtchn_to_irq(evtchn);
1184 if (irq == -1) 1170 if (irq == -1)
1185 goto done; 1171 goto done;
1186 1172
@@ -1204,7 +1190,7 @@ EXPORT_SYMBOL_GPL(evtchn_get);
1204 1190
1205void evtchn_put(unsigned int evtchn) 1191void evtchn_put(unsigned int evtchn)
1206{ 1192{
1207 int irq = evtchn_to_irq[evtchn]; 1193 int irq = get_evtchn_to_irq(evtchn);
1208 if (WARN_ON(irq == -1)) 1194 if (WARN_ON(irq == -1))
1209 return; 1195 return;
1210 unbind_from_irq(irq); 1196 unbind_from_irq(irq);
@@ -1228,222 +1214,21 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1228 notify_remote_via_irq(irq); 1214 notify_remote_via_irq(irq);
1229} 1215}
1230 1216
1231irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
1232{
1233 struct shared_info *sh = HYPERVISOR_shared_info;
1234 int cpu = smp_processor_id();
1235 xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
1236 int i;
1237 unsigned long flags;
1238 static DEFINE_SPINLOCK(debug_lock);
1239 struct vcpu_info *v;
1240
1241 spin_lock_irqsave(&debug_lock, flags);
1242
1243 printk("\nvcpu %d\n ", cpu);
1244
1245 for_each_online_cpu(i) {
1246 int pending;
1247 v = per_cpu(xen_vcpu, i);
1248 pending = (get_irq_regs() && i == cpu)
1249 ? xen_irqs_disabled(get_irq_regs())
1250 : v->evtchn_upcall_mask;
1251 printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
1252 pending, v->evtchn_upcall_pending,
1253 (int)(sizeof(v->evtchn_pending_sel)*2),
1254 v->evtchn_pending_sel);
1255 }
1256 v = per_cpu(xen_vcpu, cpu);
1257
1258 printk("\npending:\n ");
1259 for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
1260 printk("%0*"PRI_xen_ulong"%s",
1261 (int)sizeof(sh->evtchn_pending[0])*2,
1262 sh->evtchn_pending[i],
1263 i % 8 == 0 ? "\n " : " ");
1264 printk("\nglobal mask:\n ");
1265 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
1266 printk("%0*"PRI_xen_ulong"%s",
1267 (int)(sizeof(sh->evtchn_mask[0])*2),
1268 sh->evtchn_mask[i],
1269 i % 8 == 0 ? "\n " : " ");
1270
1271 printk("\nglobally unmasked:\n ");
1272 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
1273 printk("%0*"PRI_xen_ulong"%s",
1274 (int)(sizeof(sh->evtchn_mask[0])*2),
1275 sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
1276 i % 8 == 0 ? "\n " : " ");
1277
1278 printk("\nlocal cpu%d mask:\n ", cpu);
1279 for (i = (NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
1280 printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
1281 cpu_evtchn[i],
1282 i % 8 == 0 ? "\n " : " ");
1283
1284 printk("\nlocally unmasked:\n ");
1285 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
1286 xen_ulong_t pending = sh->evtchn_pending[i]
1287 & ~sh->evtchn_mask[i]
1288 & cpu_evtchn[i];
1289 printk("%0*"PRI_xen_ulong"%s",
1290 (int)(sizeof(sh->evtchn_mask[0])*2),
1291 pending, i % 8 == 0 ? "\n " : " ");
1292 }
1293
1294 printk("\npending list:\n");
1295 for (i = 0; i < NR_EVENT_CHANNELS; i++) {
1296 if (sync_test_bit(i, BM(sh->evtchn_pending))) {
1297 int word_idx = i / BITS_PER_EVTCHN_WORD;
1298 printk(" %d: event %d -> irq %d%s%s%s\n",
1299 cpu_from_evtchn(i), i,
1300 evtchn_to_irq[i],
1301 sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
1302 ? "" : " l2-clear",
1303 !sync_test_bit(i, BM(sh->evtchn_mask))
1304 ? "" : " globally-masked",
1305 sync_test_bit(i, BM(cpu_evtchn))
1306 ? "" : " locally-masked");
1307 }
1308 }
1309
1310 spin_unlock_irqrestore(&debug_lock, flags);
1311
1312 return IRQ_HANDLED;
1313}
1314
1315static DEFINE_PER_CPU(unsigned, xed_nesting_count); 1217static DEFINE_PER_CPU(unsigned, xed_nesting_count);
1316static DEFINE_PER_CPU(unsigned int, current_word_idx);
1317static DEFINE_PER_CPU(unsigned int, current_bit_idx);
1318
1319/*
1320 * Mask out the i least significant bits of w
1321 */
1322#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
1323 1218
1324/*
1325 * Search the CPUs pending events bitmasks. For each one found, map
1326 * the event number to an irq, and feed it into do_IRQ() for
1327 * handling.
1328 *
1329 * Xen uses a two-level bitmap to speed searching. The first level is
1330 * a bitset of words which contain pending event bits. The second
1331 * level is a bitset of pending events themselves.
1332 */
1333static void __xen_evtchn_do_upcall(void) 1219static void __xen_evtchn_do_upcall(void)
1334{ 1220{
1335 int start_word_idx, start_bit_idx;
1336 int word_idx, bit_idx;
1337 int i, irq;
1338 int cpu = get_cpu();
1339 struct shared_info *s = HYPERVISOR_shared_info;
1340 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); 1221 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1222 int cpu = get_cpu();
1341 unsigned count; 1223 unsigned count;
1342 1224
1343 do { 1225 do {
1344 xen_ulong_t pending_words;
1345 xen_ulong_t pending_bits;
1346 struct irq_desc *desc;
1347
1348 vcpu_info->evtchn_upcall_pending = 0; 1226 vcpu_info->evtchn_upcall_pending = 0;
1349 1227
1350 if (__this_cpu_inc_return(xed_nesting_count) - 1) 1228 if (__this_cpu_inc_return(xed_nesting_count) - 1)
1351 goto out; 1229 goto out;
1352 1230
1353 /* 1231 xen_evtchn_handle_events(cpu);
1354 * Master flag must be cleared /before/ clearing
1355 * selector flag. xchg_xen_ulong must contain an
1356 * appropriate barrier.
1357 */
1358 if ((irq = per_cpu(virq_to_irq, cpu)[VIRQ_TIMER]) != -1) {
1359 int evtchn = evtchn_from_irq(irq);
1360 word_idx = evtchn / BITS_PER_LONG;
1361 pending_bits = evtchn % BITS_PER_LONG;
1362 if (active_evtchns(cpu, s, word_idx) & (1ULL << pending_bits)) {
1363 desc = irq_to_desc(irq);
1364 if (desc)
1365 generic_handle_irq_desc(irq, desc);
1366 }
1367 }
1368
1369 pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
1370
1371 start_word_idx = __this_cpu_read(current_word_idx);
1372 start_bit_idx = __this_cpu_read(current_bit_idx);
1373
1374 word_idx = start_word_idx;
1375
1376 for (i = 0; pending_words != 0; i++) {
1377 xen_ulong_t words;
1378
1379 words = MASK_LSBS(pending_words, word_idx);
1380
1381 /*
1382 * If we masked out all events, wrap to beginning.
1383 */
1384 if (words == 0) {
1385 word_idx = 0;
1386 bit_idx = 0;
1387 continue;
1388 }
1389 word_idx = EVTCHN_FIRST_BIT(words);
1390
1391 pending_bits = active_evtchns(cpu, s, word_idx);
1392 bit_idx = 0; /* usually scan entire word from start */
1393 /*
1394 * We scan the starting word in two parts.
1395 *
1396 * 1st time: start in the middle, scanning the
1397 * upper bits.
1398 *
1399 * 2nd time: scan the whole word (not just the
1400 * parts skipped in the first pass) -- if an
1401 * event in the previously scanned bits is
1402 * pending again it would just be scanned on
1403 * the next loop anyway.
1404 */
1405 if (word_idx == start_word_idx) {
1406 if (i == 0)
1407 bit_idx = start_bit_idx;
1408 }
1409
1410 do {
1411 xen_ulong_t bits;
1412 int port;
1413
1414 bits = MASK_LSBS(pending_bits, bit_idx);
1415
1416 /* If we masked out all events, move on. */
1417 if (bits == 0)
1418 break;
1419
1420 bit_idx = EVTCHN_FIRST_BIT(bits);
1421
1422 /* Process port. */
1423 port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
1424 irq = evtchn_to_irq[port];
1425
1426 if (irq != -1) {
1427 desc = irq_to_desc(irq);
1428 if (desc)
1429 generic_handle_irq_desc(irq, desc);
1430 }
1431
1432 bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
1433
1434 /* Next caller starts at last processed + 1 */
1435 __this_cpu_write(current_word_idx,
1436 bit_idx ? word_idx :
1437 (word_idx+1) % BITS_PER_EVTCHN_WORD);
1438 __this_cpu_write(current_bit_idx, bit_idx);
1439 } while (bit_idx != 0);
1440
1441 /* Scan start_l1i twice; all others once. */
1442 if ((word_idx != start_word_idx) || (i != 0))
1443 pending_words &= ~(1UL << word_idx);
1444
1445 word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
1446 }
1447 1232
1448 BUG_ON(!irqs_disabled()); 1233 BUG_ON(!irqs_disabled());
1449 1234
@@ -1492,12 +1277,12 @@ void rebind_evtchn_irq(int evtchn, int irq)
1492 mutex_lock(&irq_mapping_update_lock); 1277 mutex_lock(&irq_mapping_update_lock);
1493 1278
1494 /* After resume the irq<->evtchn mappings are all cleared out */ 1279 /* After resume the irq<->evtchn mappings are all cleared out */
1495 BUG_ON(evtchn_to_irq[evtchn] != -1); 1280 BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1496 /* Expect irq to have been bound before, 1281 /* Expect irq to have been bound before,
1497 so there should be a proper type */ 1282 so there should be a proper type */
1498 BUG_ON(info->type == IRQT_UNBOUND); 1283 BUG_ON(info->type == IRQT_UNBOUND);
1499 1284
1500 xen_irq_info_evtchn_init(irq, evtchn); 1285 (void)xen_irq_info_evtchn_setup(irq, evtchn);
1501 1286
1502 mutex_unlock(&irq_mapping_update_lock); 1287 mutex_unlock(&irq_mapping_update_lock);
1503 1288
@@ -1511,7 +1296,6 @@ void rebind_evtchn_irq(int evtchn, int irq)
1511/* Rebind an evtchn so that it gets delivered to a specific cpu */ 1296/* Rebind an evtchn so that it gets delivered to a specific cpu */
1512static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) 1297static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1513{ 1298{
1514 struct shared_info *s = HYPERVISOR_shared_info;
1515 struct evtchn_bind_vcpu bind_vcpu; 1299 struct evtchn_bind_vcpu bind_vcpu;
1516 int evtchn = evtchn_from_irq(irq); 1300 int evtchn = evtchn_from_irq(irq);
1517 int masked; 1301 int masked;
@@ -1534,7 +1318,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1534 * Mask the event while changing the VCPU binding to prevent 1318 * Mask the event while changing the VCPU binding to prevent
1535 * it being delivered on an unexpected VCPU. 1319 * it being delivered on an unexpected VCPU.
1536 */ 1320 */
1537 masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask)); 1321 masked = test_and_set_mask(evtchn);
1538 1322
1539 /* 1323 /*
1540 * If this fails, it usually just indicates that we're dealing with a 1324 * If this fails, it usually just indicates that we're dealing with a
@@ -1558,22 +1342,26 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1558 return rebind_irq_to_cpu(data->irq, tcpu); 1342 return rebind_irq_to_cpu(data->irq, tcpu);
1559} 1343}
1560 1344
1561int resend_irq_on_evtchn(unsigned int irq) 1345static int retrigger_evtchn(int evtchn)
1562{ 1346{
1563 int masked, evtchn = evtchn_from_irq(irq); 1347 int masked;
1564 struct shared_info *s = HYPERVISOR_shared_info;
1565 1348
1566 if (!VALID_EVTCHN(evtchn)) 1349 if (!VALID_EVTCHN(evtchn))
1567 return 1; 1350 return 0;
1568 1351
1569 masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask)); 1352 masked = test_and_set_mask(evtchn);
1570 sync_set_bit(evtchn, BM(s->evtchn_pending)); 1353 set_evtchn(evtchn);
1571 if (!masked) 1354 if (!masked)
1572 unmask_evtchn(evtchn); 1355 unmask_evtchn(evtchn);
1573 1356
1574 return 1; 1357 return 1;
1575} 1358}
1576 1359
1360int resend_irq_on_evtchn(unsigned int irq)
1361{
1362 return retrigger_evtchn(evtchn_from_irq(irq));
1363}
1364
1577static void enable_dynirq(struct irq_data *data) 1365static void enable_dynirq(struct irq_data *data)
1578{ 1366{
1579 int evtchn = evtchn_from_irq(data->irq); 1367 int evtchn = evtchn_from_irq(data->irq);
@@ -1608,21 +1396,7 @@ static void mask_ack_dynirq(struct irq_data *data)
1608 1396
1609static int retrigger_dynirq(struct irq_data *data) 1397static int retrigger_dynirq(struct irq_data *data)
1610{ 1398{
1611 int evtchn = evtchn_from_irq(data->irq); 1399 return retrigger_evtchn(evtchn_from_irq(data->irq));
1612 struct shared_info *sh = HYPERVISOR_shared_info;
1613 int ret = 0;
1614
1615 if (VALID_EVTCHN(evtchn)) {
1616 int masked;
1617
1618 masked = sync_test_and_set_bit(evtchn, BM(sh->evtchn_mask));
1619 sync_set_bit(evtchn, BM(sh->evtchn_pending));
1620 if (!masked)
1621 unmask_evtchn(evtchn);
1622 ret = 1;
1623 }
1624
1625 return ret;
1626} 1400}
1627 1401
1628static void restore_pirqs(void) 1402static void restore_pirqs(void)
@@ -1683,7 +1457,7 @@ static void restore_cpu_virqs(unsigned int cpu)
1683 evtchn = bind_virq.port; 1457 evtchn = bind_virq.port;
1684 1458
1685 /* Record the new mapping. */ 1459 /* Record the new mapping. */
1686 xen_irq_info_virq_init(cpu, irq, evtchn, virq); 1460 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1687 bind_evtchn_to_cpu(evtchn, cpu); 1461 bind_evtchn_to_cpu(evtchn, cpu);
1688 } 1462 }
1689} 1463}
@@ -1707,7 +1481,7 @@ static void restore_cpu_ipis(unsigned int cpu)
1707 evtchn = bind_ipi.port; 1481 evtchn = bind_ipi.port;
1708 1482
1709 /* Record the new mapping. */ 1483 /* Record the new mapping. */
1710 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); 1484 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1711 bind_evtchn_to_cpu(evtchn, cpu); 1485 bind_evtchn_to_cpu(evtchn, cpu);
1712 } 1486 }
1713} 1487}
@@ -1784,21 +1558,18 @@ EXPORT_SYMBOL_GPL(xen_test_irq_shared);
1784 1558
1785void xen_irq_resume(void) 1559void xen_irq_resume(void)
1786{ 1560{
1787 unsigned int cpu, evtchn; 1561 unsigned int cpu;
1788 struct irq_info *info; 1562 struct irq_info *info;
1789 1563
1790 init_evtchn_cpu_bindings();
1791
1792 /* New event-channel space is not 'live' yet. */ 1564 /* New event-channel space is not 'live' yet. */
1793 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) 1565 xen_evtchn_mask_all();
1794 mask_evtchn(evtchn); 1566 xen_evtchn_resume();
1795 1567
1796 /* No IRQ <-> event-channel mappings. */ 1568 /* No IRQ <-> event-channel mappings. */
1797 list_for_each_entry(info, &xen_irq_list_head, list) 1569 list_for_each_entry(info, &xen_irq_list_head, list)
1798 info->evtchn = 0; /* zap event-channel binding */ 1570 info->evtchn = 0; /* zap event-channel binding */
1799 1571
1800 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) 1572 clear_evtchn_to_irq_all();
1801 evtchn_to_irq[evtchn] = -1;
1802 1573
1803 for_each_possible_cpu(cpu) { 1574 for_each_possible_cpu(cpu) {
1804 restore_cpu_virqs(cpu); 1575 restore_cpu_virqs(cpu);
@@ -1889,27 +1660,40 @@ void xen_callback_vector(void)
1889void xen_callback_vector(void) {} 1660void xen_callback_vector(void) {}
1890#endif 1661#endif
1891 1662
1663#undef MODULE_PARAM_PREFIX
1664#define MODULE_PARAM_PREFIX "xen."
1665
1666static bool fifo_events = true;
1667module_param(fifo_events, bool, 0);
1668
1892void __init xen_init_IRQ(void) 1669void __init xen_init_IRQ(void)
1893{ 1670{
1894 int i; 1671 int ret = -EINVAL;
1895 1672
1896 evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), 1673 if (fifo_events)
1897 GFP_KERNEL); 1674 ret = xen_evtchn_fifo_init();
1898 BUG_ON(!evtchn_to_irq); 1675 if (ret < 0)
1899 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1676 xen_evtchn_2l_init();
1900 evtchn_to_irq[i] = -1;
1901 1677
1902 init_evtchn_cpu_bindings(); 1678 evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
1679 sizeof(*evtchn_to_irq), GFP_KERNEL);
1680 BUG_ON(!evtchn_to_irq);
1903 1681
1904 /* No event channels are 'live' right now. */ 1682 /* No event channels are 'live' right now. */
1905 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1683 xen_evtchn_mask_all();
1906 mask_evtchn(i);
1907 1684
1908 pirq_needs_eoi = pirq_needs_eoi_flag; 1685 pirq_needs_eoi = pirq_needs_eoi_flag;
1909 1686
1910#ifdef CONFIG_X86 1687#ifdef CONFIG_X86
1911 if (xen_hvm_domain()) { 1688 if (xen_pv_domain()) {
1689 irq_ctx_init(smp_processor_id());
1690 if (xen_initial_domain())
1691 pci_xen_initial_domain();
1692 }
1693 if (xen_feature(XENFEAT_hvm_callback_vector))
1912 xen_callback_vector(); 1694 xen_callback_vector();
1695
1696 if (xen_hvm_domain()) {
1913 native_init_IRQ(); 1697 native_init_IRQ();
1914 /* pci_xen_hvm_init must be called after native_init_IRQ so that 1698 /* pci_xen_hvm_init must be called after native_init_IRQ so that
1915 * __acpi_register_gsi can point at the right function */ 1699 * __acpi_register_gsi can point at the right function */
@@ -1918,13 +1702,10 @@ void __init xen_init_IRQ(void)
1918 int rc; 1702 int rc;
1919 struct physdev_pirq_eoi_gmfn eoi_gmfn; 1703 struct physdev_pirq_eoi_gmfn eoi_gmfn;
1920 1704
1921 irq_ctx_init(smp_processor_id());
1922 if (xen_initial_domain())
1923 pci_xen_initial_domain();
1924
1925 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); 1705 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
1926 eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map); 1706 eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
1927 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn); 1707 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
1708 /* TODO: No PVH support for PIRQ EOI */
1928 if (rc != 0) { 1709 if (rc != 0) {
1929 free_page((unsigned long) pirq_eoi_map); 1710 free_page((unsigned long) pirq_eoi_map);
1930 pirq_eoi_map = NULL; 1711 pirq_eoi_map = NULL;
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
new file mode 100644
index 000000000000..1de2a191b395
--- /dev/null
+++ b/drivers/xen/events/events_fifo.c
@@ -0,0 +1,428 @@
1/*
2 * Xen event channels (FIFO-based ABI)
3 *
4 * Copyright (C) 2013 Citrix Systems R&D ltd.
5 *
6 * This source code is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
10 *
11 * Or, when distributed separately from the Linux kernel or
12 * incorporated into other software packages, subject to the following
13 * license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
35
36#include <linux/linkage.h>
37#include <linux/interrupt.h>
38#include <linux/irq.h>
39#include <linux/module.h>
40#include <linux/smp.h>
41#include <linux/percpu.h>
42#include <linux/cpu.h>
43
44#include <asm/sync_bitops.h>
45#include <asm/xen/hypercall.h>
46#include <asm/xen/hypervisor.h>
47#include <asm/xen/page.h>
48
49#include <xen/xen.h>
50#include <xen/xen-ops.h>
51#include <xen/events.h>
52#include <xen/interface/xen.h>
53#include <xen/interface/event_channel.h>
54
55#include "events_internal.h"
56
57#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
58#define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE)
59
60struct evtchn_fifo_queue {
61 uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
62};
63
64static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
65static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue);
66static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES] __read_mostly;
67static unsigned event_array_pages __read_mostly;
68
69#define BM(w) ((unsigned long *)(w))
70
71static inline event_word_t *event_word_from_port(unsigned port)
72{
73 unsigned i = port / EVENT_WORDS_PER_PAGE;
74
75 return event_array[i] + port % EVENT_WORDS_PER_PAGE;
76}
77
78static unsigned evtchn_fifo_max_channels(void)
79{
80 return EVTCHN_FIFO_NR_CHANNELS;
81}
82
83static unsigned evtchn_fifo_nr_channels(void)
84{
85 return event_array_pages * EVENT_WORDS_PER_PAGE;
86}
87
88static void free_unused_array_pages(void)
89{
90 unsigned i;
91
92 for (i = event_array_pages; i < MAX_EVENT_ARRAY_PAGES; i++) {
93 if (!event_array[i])
94 break;
95 free_page((unsigned long)event_array[i]);
96 event_array[i] = NULL;
97 }
98}
99
100static void init_array_page(event_word_t *array_page)
101{
102 unsigned i;
103
104 for (i = 0; i < EVENT_WORDS_PER_PAGE; i++)
105 array_page[i] = 1 << EVTCHN_FIFO_MASKED;
106}
107
108static int evtchn_fifo_setup(struct irq_info *info)
109{
110 unsigned port = info->evtchn;
111 unsigned new_array_pages;
112 int ret;
113
114 new_array_pages = port / EVENT_WORDS_PER_PAGE + 1;
115
116 if (new_array_pages > MAX_EVENT_ARRAY_PAGES)
117 return -EINVAL;
118
119 while (event_array_pages < new_array_pages) {
120 void *array_page;
121 struct evtchn_expand_array expand_array;
122
123 /* Might already have a page if we've resumed. */
124 array_page = event_array[event_array_pages];
125 if (!array_page) {
126 array_page = (void *)__get_free_page(GFP_KERNEL);
127 if (array_page == NULL) {
128 ret = -ENOMEM;
129 goto error;
130 }
131 event_array[event_array_pages] = array_page;
132 }
133
134 /* Mask all events in this page before adding it. */
135 init_array_page(array_page);
136
137 expand_array.array_gfn = virt_to_mfn(array_page);
138
139 ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array);
140 if (ret < 0)
141 goto error;
142
143 event_array_pages++;
144 }
145 return 0;
146
147 error:
148 if (event_array_pages == 0)
149 panic("xen: unable to expand event array with initial page (%d)\n", ret);
150 else
151 pr_err("unable to expand event array (%d)\n", ret);
152 free_unused_array_pages();
153 return ret;
154}
155
156static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu)
157{
158 /* no-op */
159}
160
161static void evtchn_fifo_clear_pending(unsigned port)
162{
163 event_word_t *word = event_word_from_port(port);
164 sync_clear_bit(EVTCHN_FIFO_PENDING, BM(word));
165}
166
167static void evtchn_fifo_set_pending(unsigned port)
168{
169 event_word_t *word = event_word_from_port(port);
170 sync_set_bit(EVTCHN_FIFO_PENDING, BM(word));
171}
172
173static bool evtchn_fifo_is_pending(unsigned port)
174{
175 event_word_t *word = event_word_from_port(port);
176 return sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
177}
178
179static bool evtchn_fifo_test_and_set_mask(unsigned port)
180{
181 event_word_t *word = event_word_from_port(port);
182 return sync_test_and_set_bit(EVTCHN_FIFO_MASKED, BM(word));
183}
184
185static void evtchn_fifo_mask(unsigned port)
186{
187 event_word_t *word = event_word_from_port(port);
188 sync_set_bit(EVTCHN_FIFO_MASKED, BM(word));
189}
190
191/*
192 * Clear MASKED, spinning if BUSY is set.
193 */
194static void clear_masked(volatile event_word_t *word)
195{
196 event_word_t new, old, w;
197
198 w = *word;
199
200 do {
201 old = w & ~(1 << EVTCHN_FIFO_BUSY);
202 new = old & ~(1 << EVTCHN_FIFO_MASKED);
203 w = sync_cmpxchg(word, old, new);
204 } while (w != old);
205}
206
207static void evtchn_fifo_unmask(unsigned port)
208{
209 event_word_t *word = event_word_from_port(port);
210
211 BUG_ON(!irqs_disabled());
212
213 clear_masked(word);
214 if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))) {
215 struct evtchn_unmask unmask = { .port = port };
216 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
217 }
218}
219
220static uint32_t clear_linked(volatile event_word_t *word)
221{
222 event_word_t new, old, w;
223
224 w = *word;
225
226 do {
227 old = w;
228 new = (w & ~((1 << EVTCHN_FIFO_LINKED)
229 | EVTCHN_FIFO_LINK_MASK));
230 } while ((w = sync_cmpxchg(word, old, new)) != old);
231
232 return w & EVTCHN_FIFO_LINK_MASK;
233}
234
235static void handle_irq_for_port(unsigned port)
236{
237 int irq;
238 struct irq_desc *desc;
239
240 irq = get_evtchn_to_irq(port);
241 if (irq != -1) {
242 desc = irq_to_desc(irq);
243 if (desc)
244 generic_handle_irq_desc(irq, desc);
245 }
246}
247
248static void consume_one_event(unsigned cpu,
249 struct evtchn_fifo_control_block *control_block,
250 unsigned priority, uint32_t *ready)
251{
252 struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
253 uint32_t head;
254 unsigned port;
255 event_word_t *word;
256
257 head = q->head[priority];
258
259 /*
260 * Reached the tail last time? Read the new HEAD from the
261 * control block.
262 */
263 if (head == 0) {
264 rmb(); /* Ensure word is up-to-date before reading head. */
265 head = control_block->head[priority];
266 }
267
268 port = head;
269 word = event_word_from_port(port);
270 head = clear_linked(word);
271
272 /*
273 * If the link is non-zero, there are more events in the
274 * queue, otherwise the queue is empty.
275 *
276 * If the queue is empty, clear this priority from our local
277 * copy of the ready word.
278 */
279 if (head == 0)
280 clear_bit(priority, BM(ready));
281
282 if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))
283 && !sync_test_bit(EVTCHN_FIFO_MASKED, BM(word)))
284 handle_irq_for_port(port);
285
286 q->head[priority] = head;
287}
288
289static void evtchn_fifo_handle_events(unsigned cpu)
290{
291 struct evtchn_fifo_control_block *control_block;
292 uint32_t ready;
293 unsigned q;
294
295 control_block = per_cpu(cpu_control_block, cpu);
296
297 ready = xchg(&control_block->ready, 0);
298
299 while (ready) {
300 q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES);
301 consume_one_event(cpu, control_block, q, &ready);
302 ready |= xchg(&control_block->ready, 0);
303 }
304}
305
306static void evtchn_fifo_resume(void)
307{
308 unsigned cpu;
309
310 for_each_possible_cpu(cpu) {
311 void *control_block = per_cpu(cpu_control_block, cpu);
312 struct evtchn_init_control init_control;
313 int ret;
314
315 if (!control_block)
316 continue;
317
318 /*
319 * If this CPU is offline, take the opportunity to
320 * free the control block while it is not being
321 * used.
322 */
323 if (!cpu_online(cpu)) {
324 free_page((unsigned long)control_block);
325 per_cpu(cpu_control_block, cpu) = NULL;
326 continue;
327 }
328
329 init_control.control_gfn = virt_to_mfn(control_block);
330 init_control.offset = 0;
331 init_control.vcpu = cpu;
332
333 ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control,
334 &init_control);
335 if (ret < 0)
336 BUG();
337 }
338
339 /*
340 * The event array starts out as empty again and is extended
341 * as normal when events are bound. The existing pages will
342 * be reused.
343 */
344 event_array_pages = 0;
345}
346
347static const struct evtchn_ops evtchn_ops_fifo = {
348 .max_channels = evtchn_fifo_max_channels,
349 .nr_channels = evtchn_fifo_nr_channels,
350 .setup = evtchn_fifo_setup,
351 .bind_to_cpu = evtchn_fifo_bind_to_cpu,
352 .clear_pending = evtchn_fifo_clear_pending,
353 .set_pending = evtchn_fifo_set_pending,
354 .is_pending = evtchn_fifo_is_pending,
355 .test_and_set_mask = evtchn_fifo_test_and_set_mask,
356 .mask = evtchn_fifo_mask,
357 .unmask = evtchn_fifo_unmask,
358 .handle_events = evtchn_fifo_handle_events,
359 .resume = evtchn_fifo_resume,
360};
361
362static int evtchn_fifo_init_control_block(unsigned cpu)
363{
364 struct page *control_block = NULL;
365 struct evtchn_init_control init_control;
366 int ret = -ENOMEM;
367
368 control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
369 if (control_block == NULL)
370 goto error;
371
372 init_control.control_gfn = virt_to_mfn(page_address(control_block));
373 init_control.offset = 0;
374 init_control.vcpu = cpu;
375
376 ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
377 if (ret < 0)
378 goto error;
379
380 per_cpu(cpu_control_block, cpu) = page_address(control_block);
381
382 return 0;
383
384 error:
385 __free_page(control_block);
386 return ret;
387}
388
389static int evtchn_fifo_cpu_notification(struct notifier_block *self,
390 unsigned long action,
391 void *hcpu)
392{
393 int cpu = (long)hcpu;
394 int ret = 0;
395
396 switch (action) {
397 case CPU_UP_PREPARE:
398 if (!per_cpu(cpu_control_block, cpu))
399 ret = evtchn_fifo_init_control_block(cpu);
400 break;
401 default:
402 break;
403 }
404 return ret < 0 ? NOTIFY_BAD : NOTIFY_OK;
405}
406
407static struct notifier_block evtchn_fifo_cpu_notifier = {
408 .notifier_call = evtchn_fifo_cpu_notification,
409};
410
411int __init xen_evtchn_fifo_init(void)
412{
413 int cpu = get_cpu();
414 int ret;
415
416 ret = evtchn_fifo_init_control_block(cpu);
417 if (ret < 0)
418 goto out;
419
420 pr_info("Using FIFO-based ABI\n");
421
422 evtchn_ops = &evtchn_ops_fifo;
423
424 register_cpu_notifier(&evtchn_fifo_cpu_notifier);
425out:
426 put_cpu();
427 return ret;
428}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
new file mode 100644
index 000000000000..677f41a0fff9
--- /dev/null
+++ b/drivers/xen/events/events_internal.h
@@ -0,0 +1,150 @@
1/*
2 * Xen Event Channels (internal header)
3 *
4 * Copyright (C) 2013 Citrix Systems R&D Ltd.
5 *
6 * This source code is licensed under the GNU General Public License,
7 * Version 2 or later. See the file COPYING for more details.
8 */
9#ifndef __EVENTS_INTERNAL_H__
10#define __EVENTS_INTERNAL_H__
11
12/* Interrupt types. */
13enum xen_irq_type {
14 IRQT_UNBOUND = 0,
15 IRQT_PIRQ,
16 IRQT_VIRQ,
17 IRQT_IPI,
18 IRQT_EVTCHN
19};
20
21/*
22 * Packed IRQ information:
23 * type - enum xen_irq_type
24 * event channel - irq->event channel mapping
25 * cpu - cpu this event channel is bound to
26 * index - type-specific information:
27 * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
28 * guest, or GSI (real passthrough IRQ) of the device.
29 * VIRQ - virq number
30 * IPI - IPI vector
31 * EVTCHN -
32 */
33struct irq_info {
34 struct list_head list;
35 int refcnt;
36 enum xen_irq_type type; /* type */
37 unsigned irq;
38 unsigned int evtchn; /* event channel */
39 unsigned short cpu; /* cpu bound */
40
41 union {
42 unsigned short virq;
43 enum ipi_vector ipi;
44 struct {
45 unsigned short pirq;
46 unsigned short gsi;
47 unsigned char vector;
48 unsigned char flags;
49 uint16_t domid;
50 } pirq;
51 } u;
52};
53
54#define PIRQ_NEEDS_EOI (1 << 0)
55#define PIRQ_SHAREABLE (1 << 1)
56
57struct evtchn_ops {
58 unsigned (*max_channels)(void);
59 unsigned (*nr_channels)(void);
60
61 int (*setup)(struct irq_info *info);
62 void (*bind_to_cpu)(struct irq_info *info, unsigned cpu);
63
64 void (*clear_pending)(unsigned port);
65 void (*set_pending)(unsigned port);
66 bool (*is_pending)(unsigned port);
67 bool (*test_and_set_mask)(unsigned port);
68 void (*mask)(unsigned port);
69 void (*unmask)(unsigned port);
70
71 void (*handle_events)(unsigned cpu);
72 void (*resume)(void);
73};
74
75extern const struct evtchn_ops *evtchn_ops;
76
77extern int **evtchn_to_irq;
78int get_evtchn_to_irq(unsigned int evtchn);
79
80struct irq_info *info_for_irq(unsigned irq);
81unsigned cpu_from_irq(unsigned irq);
82unsigned cpu_from_evtchn(unsigned int evtchn);
83
84static inline unsigned xen_evtchn_max_channels(void)
85{
86 return evtchn_ops->max_channels();
87}
88
89/*
90 * Do any ABI specific setup for a bound event channel before it can
91 * be unmasked and used.
92 */
93static inline int xen_evtchn_port_setup(struct irq_info *info)
94{
95 if (evtchn_ops->setup)
96 return evtchn_ops->setup(info);
97 return 0;
98}
99
100static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info,
101 unsigned cpu)
102{
103 evtchn_ops->bind_to_cpu(info, cpu);
104}
105
106static inline void clear_evtchn(unsigned port)
107{
108 evtchn_ops->clear_pending(port);
109}
110
111static inline void set_evtchn(unsigned port)
112{
113 evtchn_ops->set_pending(port);
114}
115
116static inline bool test_evtchn(unsigned port)
117{
118 return evtchn_ops->is_pending(port);
119}
120
121static inline bool test_and_set_mask(unsigned port)
122{
123 return evtchn_ops->test_and_set_mask(port);
124}
125
126static inline void mask_evtchn(unsigned port)
127{
128 return evtchn_ops->mask(port);
129}
130
131static inline void unmask_evtchn(unsigned port)
132{
133 return evtchn_ops->unmask(port);
134}
135
136static inline void xen_evtchn_handle_events(unsigned cpu)
137{
138 return evtchn_ops->handle_events(cpu);
139}
140
141static inline void xen_evtchn_resume(void)
142{
143 if (evtchn_ops->resume)
144 evtchn_ops->resume();
145}
146
147void xen_evtchn_2l_init(void);
148int xen_evtchn_fifo_init(void);
149
150#endif /* #ifndef __EVENTS_INTERNAL_H__ */
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 5de2063e16d3..00f40f051d95 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -417,7 +417,7 @@ static long evtchn_ioctl(struct file *file,
417 break; 417 break;
418 418
419 rc = -EINVAL; 419 rc = -EINVAL;
420 if (unbind.port >= NR_EVENT_CHANNELS) 420 if (unbind.port >= xen_evtchn_nr_channels())
421 break; 421 break;
422 422
423 rc = -ENOTCONN; 423 rc = -ENOTCONN;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e41c79c986ea..073b4a19a8b0 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -846,7 +846,7 @@ static int __init gntdev_init(void)
846 if (!xen_domain()) 846 if (!xen_domain())
847 return -ENODEV; 847 return -ENODEV;
848 848
849 use_ptemod = xen_pv_domain(); 849 use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
850 850
851 err = misc_register(&gntdev_miscdev); 851 err = misc_register(&gntdev_miscdev);
852 if (err != 0) { 852 if (err != 0) {
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index aa846a48f400..1ce1c40331f3 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -62,12 +62,10 @@
62 62
63static grant_ref_t **gnttab_list; 63static grant_ref_t **gnttab_list;
64static unsigned int nr_grant_frames; 64static unsigned int nr_grant_frames;
65static unsigned int boot_max_nr_grant_frames;
66static int gnttab_free_count; 65static int gnttab_free_count;
67static grant_ref_t gnttab_free_head; 66static grant_ref_t gnttab_free_head;
68static DEFINE_SPINLOCK(gnttab_list_lock); 67static DEFINE_SPINLOCK(gnttab_list_lock);
69unsigned long xen_hvm_resume_frames; 68struct grant_frames xen_auto_xlat_grant_frames;
70EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
71 69
72static union { 70static union {
73 struct grant_entry_v1 *v1; 71 struct grant_entry_v1 *v1;
@@ -827,6 +825,11 @@ static unsigned int __max_nr_grant_frames(void)
827unsigned int gnttab_max_grant_frames(void) 825unsigned int gnttab_max_grant_frames(void)
828{ 826{
829 unsigned int xen_max = __max_nr_grant_frames(); 827 unsigned int xen_max = __max_nr_grant_frames();
828 static unsigned int boot_max_nr_grant_frames;
829
830 /* First time, initialize it properly. */
831 if (!boot_max_nr_grant_frames)
832 boot_max_nr_grant_frames = __max_nr_grant_frames();
830 833
831 if (xen_max > boot_max_nr_grant_frames) 834 if (xen_max > boot_max_nr_grant_frames)
832 return boot_max_nr_grant_frames; 835 return boot_max_nr_grant_frames;
@@ -834,6 +837,51 @@ unsigned int gnttab_max_grant_frames(void)
834} 837}
835EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); 838EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
836 839
840int gnttab_setup_auto_xlat_frames(unsigned long addr)
841{
842 xen_pfn_t *pfn;
843 unsigned int max_nr_gframes = __max_nr_grant_frames();
844 unsigned int i;
845 void *vaddr;
846
847 if (xen_auto_xlat_grant_frames.count)
848 return -EINVAL;
849
850 vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes);
851 if (vaddr == NULL) {
852 pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n",
853 addr);
854 return -ENOMEM;
855 }
856 pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
857 if (!pfn) {
858 xen_unmap(vaddr);
859 return -ENOMEM;
860 }
861 for (i = 0; i < max_nr_gframes; i++)
862 pfn[i] = PFN_DOWN(addr) + i;
863
864 xen_auto_xlat_grant_frames.vaddr = vaddr;
865 xen_auto_xlat_grant_frames.pfn = pfn;
866 xen_auto_xlat_grant_frames.count = max_nr_gframes;
867
868 return 0;
869}
870EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
871
872void gnttab_free_auto_xlat_frames(void)
873{
874 if (!xen_auto_xlat_grant_frames.count)
875 return;
876 kfree(xen_auto_xlat_grant_frames.pfn);
877 xen_unmap(xen_auto_xlat_grant_frames.vaddr);
878
879 xen_auto_xlat_grant_frames.pfn = NULL;
880 xen_auto_xlat_grant_frames.count = 0;
881 xen_auto_xlat_grant_frames.vaddr = NULL;
882}
883EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
884
837/* Handling of paged out grant targets (GNTST_eagain) */ 885/* Handling of paged out grant targets (GNTST_eagain) */
838#define MAX_DELAY 256 886#define MAX_DELAY 256
839static inline void 887static inline void
@@ -1060,10 +1108,11 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1060 unsigned int nr_gframes = end_idx + 1; 1108 unsigned int nr_gframes = end_idx + 1;
1061 int rc; 1109 int rc;
1062 1110
1063 if (xen_hvm_domain()) { 1111 if (xen_feature(XENFEAT_auto_translated_physmap)) {
1064 struct xen_add_to_physmap xatp; 1112 struct xen_add_to_physmap xatp;
1065 unsigned int i = end_idx; 1113 unsigned int i = end_idx;
1066 rc = 0; 1114 rc = 0;
1115 BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
1067 /* 1116 /*
1068 * Loop backwards, so that the first hypercall has the largest 1117 * Loop backwards, so that the first hypercall has the largest
1069 * index, ensuring that the table will grow only once. 1118 * index, ensuring that the table will grow only once.
@@ -1072,7 +1121,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1072 xatp.domid = DOMID_SELF; 1121 xatp.domid = DOMID_SELF;
1073 xatp.idx = i; 1122 xatp.idx = i;
1074 xatp.space = XENMAPSPACE_grant_table; 1123 xatp.space = XENMAPSPACE_grant_table;
1075 xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i; 1124 xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
1076 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); 1125 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
1077 if (rc != 0) { 1126 if (rc != 0) {
1078 pr_warn("grant table add_to_physmap failed, err=%d\n", 1127 pr_warn("grant table add_to_physmap failed, err=%d\n",
@@ -1135,10 +1184,8 @@ static void gnttab_request_version(void)
1135 int rc; 1184 int rc;
1136 struct gnttab_set_version gsv; 1185 struct gnttab_set_version gsv;
1137 1186
1138 if (xen_hvm_domain()) 1187 gsv.version = 1;
1139 gsv.version = 1; 1188
1140 else
1141 gsv.version = 2;
1142 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1); 1189 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1143 if (rc == 0 && gsv.version == 2) { 1190 if (rc == 0 && gsv.version == 2) {
1144 grant_table_version = 2; 1191 grant_table_version = 2;
@@ -1169,22 +1216,15 @@ static int gnttab_setup(void)
1169 if (max_nr_gframes < nr_grant_frames) 1216 if (max_nr_gframes < nr_grant_frames)
1170 return -ENOSYS; 1217 return -ENOSYS;
1171 1218
1172 if (xen_pv_domain()) 1219 if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
1173 return gnttab_map(0, nr_grant_frames - 1); 1220 gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
1174
1175 if (gnttab_shared.addr == NULL) {
1176 gnttab_shared.addr = xen_remap(xen_hvm_resume_frames,
1177 PAGE_SIZE * max_nr_gframes);
1178 if (gnttab_shared.addr == NULL) { 1221 if (gnttab_shared.addr == NULL) {
1179 pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n", 1222 pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
1180 xen_hvm_resume_frames); 1223 (unsigned long)xen_auto_xlat_grant_frames.vaddr);
1181 return -ENOMEM; 1224 return -ENOMEM;
1182 } 1225 }
1183 } 1226 }
1184 1227 return gnttab_map(0, nr_grant_frames - 1);
1185 gnttab_map(0, nr_grant_frames - 1);
1186
1187 return 0;
1188} 1228}
1189 1229
1190int gnttab_resume(void) 1230int gnttab_resume(void)
@@ -1227,13 +1267,12 @@ int gnttab_init(void)
1227 1267
1228 gnttab_request_version(); 1268 gnttab_request_version();
1229 nr_grant_frames = 1; 1269 nr_grant_frames = 1;
1230 boot_max_nr_grant_frames = __max_nr_grant_frames();
1231 1270
1232 /* Determine the maximum number of frames required for the 1271 /* Determine the maximum number of frames required for the
1233 * grant reference free list on the current hypervisor. 1272 * grant reference free list on the current hypervisor.
1234 */ 1273 */
1235 BUG_ON(grefs_per_grant_frame == 0); 1274 BUG_ON(grefs_per_grant_frame == 0);
1236 max_nr_glist_frames = (boot_max_nr_grant_frames * 1275 max_nr_glist_frames = (gnttab_max_grant_frames() *
1237 grefs_per_grant_frame / RPP); 1276 grefs_per_grant_frame / RPP);
1238 1277
1239 gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), 1278 gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
@@ -1286,5 +1325,6 @@ static int __gnttab_init(void)
1286 1325
1287 return gnttab_init(); 1326 return gnttab_init();
1288} 1327}
1289 1328/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
1290core_initcall(__gnttab_init); 1329 * beforehand to initialize xen_auto_xlat_grant_frames. */
1330core_initcall_sync(__gnttab_init);
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 188825122aae..dd9c249ea311 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -26,7 +26,9 @@
26#include <asm/xen/hypervisor.h> 26#include <asm/xen/hypervisor.h>
27#include <asm/xen/hypercall.h> 27#include <asm/xen/hypercall.h>
28#include "../pci/pci.h" 28#include "../pci/pci.h"
29#ifdef CONFIG_PCI_MMCONFIG
29#include <asm/pci_x86.h> 30#include <asm/pci_x86.h>
31#endif
30 32
31static bool __read_mostly pci_seg_supported = true; 33static bool __read_mostly pci_seg_supported = true;
32 34
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 2f3528e93cb9..a1361c312c06 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -108,6 +108,7 @@ static int platform_pci_init(struct pci_dev *pdev,
108 long ioaddr; 108 long ioaddr;
109 long mmio_addr, mmio_len; 109 long mmio_addr, mmio_len;
110 unsigned int max_nr_gframes; 110 unsigned int max_nr_gframes;
111 unsigned long grant_frames;
111 112
112 if (!xen_domain()) 113 if (!xen_domain())
113 return -ENODEV; 114 return -ENODEV;
@@ -154,13 +155,17 @@ static int platform_pci_init(struct pci_dev *pdev,
154 } 155 }
155 156
156 max_nr_gframes = gnttab_max_grant_frames(); 157 max_nr_gframes = gnttab_max_grant_frames();
157 xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); 158 grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
158 ret = gnttab_init(); 159 ret = gnttab_setup_auto_xlat_frames(grant_frames);
159 if (ret) 160 if (ret)
160 goto out; 161 goto out;
162 ret = gnttab_init();
163 if (ret)
164 goto grant_out;
161 xenbus_probe(NULL); 165 xenbus_probe(NULL);
162 return 0; 166 return 0;
163 167grant_out:
168 gnttab_free_auto_xlat_frames();
164out: 169out:
165 pci_release_region(pdev, 0); 170 pci_release_region(pdev, 0);
166mem_out: 171mem_out:
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index ec097d6f964d..01d59e66565d 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -45,6 +45,7 @@
45#include <xen/grant_table.h> 45#include <xen/grant_table.h>
46#include <xen/xenbus.h> 46#include <xen/xenbus.h>
47#include <xen/xen.h> 47#include <xen/xen.h>
48#include <xen/features.h>
48 49
49#include "xenbus_probe.h" 50#include "xenbus_probe.h"
50 51
@@ -743,7 +744,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
743 744
744void __init xenbus_ring_ops_init(void) 745void __init xenbus_ring_ops_init(void)
745{ 746{
746 if (xen_pv_domain()) 747 if (!xen_feature(XENFEAT_auto_translated_physmap))
747 ring_ops = &ring_ops_pv; 748 ring_ops = &ring_ops_pv;
748 else 749 else
749 ring_ops = &ring_ops_hvm; 750 ring_ops = &ring_ops_hvm;
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 129bf84c19ec..cb385c10d2b1 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -496,7 +496,7 @@ subsys_initcall(xenbus_probe_frontend_init);
496#ifndef MODULE 496#ifndef MODULE
497static int __init boot_wait_for_devices(void) 497static int __init boot_wait_for_devices(void)
498{ 498{
499 if (xen_hvm_domain() && !xen_platform_pci_unplug) 499 if (!xen_has_pv_devices())
500 return -ENODEV; 500 return -ENODEV;
501 501
502 ready_to_wait_for_devices = 1; 502 ready_to_wait_for_devices = 1;
diff --git a/include/xen/events.h b/include/xen/events.h
index c9ea10ee2273..c9c85cf84895 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -7,6 +7,8 @@
7#include <asm/xen/hypercall.h> 7#include <asm/xen/hypercall.h>
8#include <asm/xen/events.h> 8#include <asm/xen/events.h>
9 9
10unsigned xen_evtchn_nr_channels(void);
11
10int bind_evtchn_to_irq(unsigned int evtchn); 12int bind_evtchn_to_irq(unsigned int evtchn);
11int bind_evtchn_to_irqhandler(unsigned int evtchn, 13int bind_evtchn_to_irqhandler(unsigned int evtchn,
12 irq_handler_t handler, 14 irq_handler_t handler,
@@ -37,6 +39,11 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
37 */ 39 */
38void unbind_from_irqhandler(unsigned int irq, void *dev_id); 40void unbind_from_irqhandler(unsigned int irq, void *dev_id);
39 41
42#define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX
43#define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT
44#define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN
45int xen_set_irq_priority(unsigned irq, unsigned priority);
46
40/* 47/*
41 * Allow extra references to event channels exposed to userspace by evtchn 48 * Allow extra references to event channels exposed to userspace by evtchn
42 */ 49 */
@@ -73,6 +80,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout);
73 80
74/* Determine the IRQ which is bound to an event channel */ 81/* Determine the IRQ which is bound to an event channel */
75unsigned irq_from_evtchn(unsigned int evtchn); 82unsigned irq_from_evtchn(unsigned int evtchn);
83int irq_from_virq(unsigned int cpu, unsigned int virq);
84unsigned int evtchn_from_irq(unsigned irq);
76 85
77/* Xen HVM evtchn vector callback */ 86/* Xen HVM evtchn vector callback */
78void xen_hvm_callback_vector(void); 87void xen_hvm_callback_vector(void);
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 694dcaf266e6..5acb1e4ac0d3 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -178,8 +178,15 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
178 grant_status_t **__shared); 178 grant_status_t **__shared);
179void arch_gnttab_unmap(void *shared, unsigned long nr_gframes); 179void arch_gnttab_unmap(void *shared, unsigned long nr_gframes);
180 180
181extern unsigned long xen_hvm_resume_frames; 181struct grant_frames {
182 xen_pfn_t *pfn;
183 unsigned int count;
184 void *vaddr;
185};
186extern struct grant_frames xen_auto_xlat_grant_frames;
182unsigned int gnttab_max_grant_frames(void); 187unsigned int gnttab_max_grant_frames(void);
188int gnttab_setup_auto_xlat_frames(unsigned long addr);
189void gnttab_free_auto_xlat_frames(void);
183 190
184#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) 191#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
185 192
diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index 0360b15f4883..6f4eae328ca7 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -140,6 +140,19 @@
140 */ 140 */
141#define XEN_ELFNOTE_SUSPEND_CANCEL 14 141#define XEN_ELFNOTE_SUSPEND_CANCEL 14
142 142
143/*
144 * The features supported by this kernel (numeric).
145 *
146 * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a
147 * kernel to specify support for features that older hypervisors don't
148 * know about. The set of features 4.2 and newer hypervisors will
149 * consider supported by the kernel is the combination of the sets
150 * specified through this and the string note.
151 *
152 * LEGACY: FEATURES
153 */
154#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
155
143#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ 156#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
144 157
145/* 158/*
diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
index f4942921e202..7e6acef5415b 100644
--- a/include/xen/interface/event_channel.h
+++ b/include/xen/interface/event_channel.h
@@ -190,6 +190,39 @@ struct evtchn_reset {
190}; 190};
191typedef struct evtchn_reset evtchn_reset_t; 191typedef struct evtchn_reset evtchn_reset_t;
192 192
193/*
194 * EVTCHNOP_init_control: initialize the control block for the FIFO ABI.
195 */
196#define EVTCHNOP_init_control 11
197struct evtchn_init_control {
198 /* IN parameters. */
199 uint64_t control_gfn;
200 uint32_t offset;
201 uint32_t vcpu;
202 /* OUT parameters. */
203 uint8_t link_bits;
204 uint8_t _pad[7];
205};
206
207/*
208 * EVTCHNOP_expand_array: add an additional page to the event array.
209 */
210#define EVTCHNOP_expand_array 12
211struct evtchn_expand_array {
212 /* IN parameters. */
213 uint64_t array_gfn;
214};
215
216/*
217 * EVTCHNOP_set_priority: set the priority for an event channel.
218 */
219#define EVTCHNOP_set_priority 13
220struct evtchn_set_priority {
221 /* IN parameters. */
222 uint32_t port;
223 uint32_t priority;
224};
225
193struct evtchn_op { 226struct evtchn_op {
194 uint32_t cmd; /* EVTCHNOP_* */ 227 uint32_t cmd; /* EVTCHNOP_* */
195 union { 228 union {
@@ -207,4 +240,39 @@ struct evtchn_op {
207}; 240};
208DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); 241DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
209 242
243/*
244 * 2-level ABI
245 */
246
247#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
248
249/*
250 * FIFO ABI
251 */
252
253/* Events may have priorities from 0 (highest) to 15 (lowest). */
254#define EVTCHN_FIFO_PRIORITY_MAX 0
255#define EVTCHN_FIFO_PRIORITY_DEFAULT 7
256#define EVTCHN_FIFO_PRIORITY_MIN 15
257
258#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1)
259
260typedef uint32_t event_word_t;
261
262#define EVTCHN_FIFO_PENDING 31
263#define EVTCHN_FIFO_MASKED 30
264#define EVTCHN_FIFO_LINKED 29
265#define EVTCHN_FIFO_BUSY 28
266
267#define EVTCHN_FIFO_LINK_BITS 17
268#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1)
269
270#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS)
271
272struct evtchn_fifo_control_block {
273 uint32_t ready;
274 uint32_t _rsvd;
275 event_word_t head[EVTCHN_FIFO_MAX_QUEUES];
276};
277
210#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ 278#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 53ec4167bd0b..0cd5ca333fac 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -281,12 +281,6 @@ struct multicall_entry {
281}; 281};
282DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); 282DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);
283 283
284/*
285 * Event channel endpoints per domain:
286 * 1024 if a long is 32 bits; 4096 if a long is 64 bits.
287 */
288#define NR_EVENT_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
289
290struct vcpu_time_info { 284struct vcpu_time_info {
291 /* 285 /*
292 * Updates to the following values are preceded and followed 286 * Updates to the following values are preceded and followed
diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
index 438c256c274b..5c52b5583917 100644
--- a/include/xen/platform_pci.h
+++ b/include/xen/platform_pci.h
@@ -46,6 +46,27 @@ static inline int xen_must_unplug_disks(void) {
46#endif 46#endif
47} 47}
48 48
49extern int xen_platform_pci_unplug; 49#if defined(CONFIG_XEN_PVHVM)
50 50extern bool xen_has_pv_devices(void);
51extern bool xen_has_pv_disk_devices(void);
52extern bool xen_has_pv_nic_devices(void);
53extern bool xen_has_pv_and_legacy_disk_devices(void);
54#else
55static inline bool xen_has_pv_devices(void)
56{
57 return IS_ENABLED(CONFIG_XEN);
58}
59static inline bool xen_has_pv_disk_devices(void)
60{
61 return IS_ENABLED(CONFIG_XEN);
62}
63static inline bool xen_has_pv_nic_devices(void)
64{
65 return IS_ENABLED(CONFIG_XEN);
66}
67static inline bool xen_has_pv_and_legacy_disk_devices(void)
68{
69 return false;
70}
71#endif
51#endif /* _XEN_PLATFORM_PCI_H */ 72#endif /* _XEN_PLATFORM_PCI_H */
diff --git a/include/xen/xen.h b/include/xen/xen.h
index a74d4362c4f8..0c0e3ef4c45d 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -29,4 +29,18 @@ extern enum xen_domain_type xen_domain_type;
29#define xen_initial_domain() (0) 29#define xen_initial_domain() (0)
30#endif /* CONFIG_XEN_DOM0 */ 30#endif /* CONFIG_XEN_DOM0 */
31 31
32#ifdef CONFIG_XEN_PVH
33/* This functionality exists only for x86. The XEN_PVHVM support exists
34 * only in x86 world - hence on ARM it will be always disabled.
35 * N.B. ARM guests are neither PV nor HVM nor PVHVM.
36 * It's a bit like PVH but is different also (it's further towards the H
37 * end of the spectrum than even PVH).
38 */
39#include <xen/features.h>
40#define xen_pvh_domain() (xen_pv_domain() && \
41 xen_feature(XENFEAT_auto_translated_physmap) && \
42 xen_have_vector_callback)
43#else
44#define xen_pvh_domain() (0)
45#endif
32#endif /* _XEN_XEN_H */ 46#endif /* _XEN_XEN_H */