aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/arm/include/asm/xen/page.h3
-rw-r--r--arch/arm/xen/enlighten.c9
-rw-r--r--arch/x86/include/asm/xen/page.h8
-rw-r--r--arch/x86/xen/Kconfig4
-rw-r--r--arch/x86/xen/enlighten.c126
-rw-r--r--arch/x86/xen/grant-table.c63
-rw-r--r--arch/x86/xen/irq.c5
-rw-r--r--arch/x86/xen/mmu.c166
-rw-r--r--arch/x86/xen/p2m.c15
-rw-r--r--arch/x86/xen/platform-pci-unplug.c79
-rw-r--r--arch/x86/xen/setup.c40
-rw-r--r--arch/x86/xen/smp.c49
-rw-r--r--arch/x86/xen/time.c1
-rw-r--r--arch/x86/xen/xen-head.S25
-rw-r--r--arch/x86/xen/xen-ops.h1
-rw-r--r--drivers/block/xen-blkfront.c4
-rw-r--r--drivers/char/tpm/xen-tpmfront.c4
-rw-r--r--drivers/input/misc/xen-kbdfront.c4
-rw-r--r--drivers/net/xen-netfront.c2
-rw-r--r--drivers/pci/xen-pcifront.c4
-rw-r--r--drivers/video/xen-fbfront.c6
-rw-r--r--drivers/xen/Kconfig1
-rw-r--r--drivers/xen/Makefile3
-rw-r--r--drivers/xen/balloon.c9
-rw-r--r--drivers/xen/dbgp.c2
-rw-r--r--drivers/xen/events/Makefile5
-rw-r--r--drivers/xen/events/events_2l.c372
-rw-r--r--drivers/xen/events/events_base.c (renamed from drivers/xen/events.c)797
-rw-r--r--drivers/xen/events/events_fifo.c428
-rw-r--r--drivers/xen/events/events_internal.h150
-rw-r--r--drivers/xen/evtchn.c2
-rw-r--r--drivers/xen/gntdev.c2
-rw-r--r--drivers/xen/grant-table.c90
-rw-r--r--drivers/xen/pci.c2
-rw-r--r--drivers/xen/platform-pci.c11
-rw-r--r--drivers/xen/xenbus/xenbus_client.c3
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c2
-rw-r--r--include/xen/events.h9
-rw-r--r--include/xen/grant_table.h9
-rw-r--r--include/xen/interface/elfnote.h13
-rw-r--r--include/xen/interface/event_channel.h68
-rw-r--r--include/xen/interface/xen.h6
-rw-r--r--include/xen/platform_pci.h25
-rw-r--r--include/xen/xen.h14
45 files changed, 1952 insertions, 690 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index e945c6380f56..0207c30906ad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9559,6 +9559,7 @@ M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
9559M: Boris Ostrovsky <boris.ostrovsky@oracle.com> 9559M: Boris Ostrovsky <boris.ostrovsky@oracle.com>
9560M: David Vrabel <david.vrabel@citrix.com> 9560M: David Vrabel <david.vrabel@citrix.com>
9561L: xen-devel@lists.xenproject.org (moderated for non-subscribers) 9561L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9562T: git git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git
9562S: Supported 9563S: Supported
9563F: arch/x86/xen/ 9564F: arch/x86/xen/
9564F: drivers/*/xen-*front.c 9565F: drivers/*/xen-*front.c
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 3759cacdd7f8..e0965abacb7d 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -117,6 +117,7 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
117 return __set_phys_to_machine(pfn, mfn); 117 return __set_phys_to_machine(pfn, mfn);
118} 118}
119 119
120#define xen_remap(cookie, size) ioremap_cache((cookie), (size)); 120#define xen_remap(cookie, size) ioremap_cache((cookie), (size))
121#define xen_unmap(cookie) iounmap((cookie))
121 122
122#endif /* _ASM_ARM_XEN_PAGE_H */ 123#endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 85501238b425..2162172c0ddc 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -208,6 +208,7 @@ static int __init xen_guest_init(void)
208 const char *version = NULL; 208 const char *version = NULL;
209 const char *xen_prefix = "xen,xen-"; 209 const char *xen_prefix = "xen,xen-";
210 struct resource res; 210 struct resource res;
211 unsigned long grant_frames;
211 212
212 node = of_find_compatible_node(NULL, NULL, "xen,xen"); 213 node = of_find_compatible_node(NULL, NULL, "xen,xen");
213 if (!node) { 214 if (!node) {
@@ -224,10 +225,10 @@ static int __init xen_guest_init(void)
224 } 225 }
225 if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res)) 226 if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res))
226 return 0; 227 return 0;
227 xen_hvm_resume_frames = res.start; 228 grant_frames = res.start;
228 xen_events_irq = irq_of_parse_and_map(node, 0); 229 xen_events_irq = irq_of_parse_and_map(node, 0);
229 pr_info("Xen %s support found, events_irq=%d gnttab_frame_pfn=%lx\n", 230 pr_info("Xen %s support found, events_irq=%d gnttab_frame_pfn=%lx\n",
230 version, xen_events_irq, (xen_hvm_resume_frames >> PAGE_SHIFT)); 231 version, xen_events_irq, (grant_frames >> PAGE_SHIFT));
231 xen_domain_type = XEN_HVM_DOMAIN; 232 xen_domain_type = XEN_HVM_DOMAIN;
232 233
233 xen_setup_features(); 234 xen_setup_features();
@@ -265,6 +266,10 @@ static int __init xen_guest_init(void)
265 if (xen_vcpu_info == NULL) 266 if (xen_vcpu_info == NULL)
266 return -ENOMEM; 267 return -ENOMEM;
267 268
269 if (gnttab_setup_auto_xlat_frames(grant_frames)) {
270 free_percpu(xen_vcpu_info);
271 return -ENOMEM;
272 }
268 gnttab_init(); 273 gnttab_init();
269 if (!xen_initial_domain()) 274 if (!xen_initial_domain())
270 xenbus_probe(NULL); 275 xenbus_probe(NULL);
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index b913915e8e63..3e276eb23d1b 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -167,7 +167,12 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
167 */ 167 */
168static inline unsigned long mfn_to_local_pfn(unsigned long mfn) 168static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
169{ 169{
170 unsigned long pfn = mfn_to_pfn(mfn); 170 unsigned long pfn;
171
172 if (xen_feature(XENFEAT_auto_translated_physmap))
173 return mfn;
174
175 pfn = mfn_to_pfn(mfn);
171 if (get_phys_to_machine(pfn) != mfn) 176 if (get_phys_to_machine(pfn) != mfn)
172 return -1; /* force !pfn_valid() */ 177 return -1; /* force !pfn_valid() */
173 return pfn; 178 return pfn;
@@ -222,5 +227,6 @@ void make_lowmem_page_readonly(void *vaddr);
222void make_lowmem_page_readwrite(void *vaddr); 227void make_lowmem_page_readwrite(void *vaddr);
223 228
224#define xen_remap(cookie, size) ioremap((cookie), (size)); 229#define xen_remap(cookie, size) ioremap((cookie), (size));
230#define xen_unmap(cookie) iounmap((cookie))
225 231
226#endif /* _ASM_X86_XEN_PAGE_H */ 232#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 1a3c76505649..01b90261fa38 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -51,3 +51,7 @@ config XEN_DEBUG_FS
51 Enable statistics output and various tuning options in debugfs. 51 Enable statistics output and various tuning options in debugfs.
52 Enabling this option may incur a significant performance overhead. 52 Enabling this option may incur a significant performance overhead.
53 53
54config XEN_PVH
55 bool "Support for running as a PVH guest"
56 depends on X86_64 && XEN && XEN_PVHVM
57 def_bool n
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index fa6ade76ef3f..a4d7b647867f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -262,8 +262,9 @@ static void __init xen_banner(void)
262 struct xen_extraversion extra; 262 struct xen_extraversion extra;
263 HYPERVISOR_xen_version(XENVER_extraversion, &extra); 263 HYPERVISOR_xen_version(XENVER_extraversion, &extra);
264 264
265 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 265 pr_info("Booting paravirtualized kernel %son %s\n",
266 pv_info.name); 266 xen_feature(XENFEAT_auto_translated_physmap) ?
267 "with PVH extensions " : "", pv_info.name);
267 printk(KERN_INFO "Xen version: %d.%d%s%s\n", 268 printk(KERN_INFO "Xen version: %d.%d%s%s\n",
268 version >> 16, version & 0xffff, extra.extraversion, 269 version >> 16, version & 0xffff, extra.extraversion,
269 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 270 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
@@ -433,7 +434,7 @@ static void __init xen_init_cpuid_mask(void)
433 434
434 ax = 1; 435 ax = 1;
435 cx = 0; 436 cx = 0;
436 xen_cpuid(&ax, &bx, &cx, &dx); 437 cpuid(1, &ax, &bx, &cx, &dx);
437 438
438 xsave_mask = 439 xsave_mask =
439 (1 << (X86_FEATURE_XSAVE % 32)) | 440 (1 << (X86_FEATURE_XSAVE % 32)) |
@@ -1142,8 +1143,9 @@ void xen_setup_vcpu_info_placement(void)
1142 xen_vcpu_setup(cpu); 1143 xen_vcpu_setup(cpu);
1143 1144
1144 /* xen_vcpu_setup managed to place the vcpu_info within the 1145 /* xen_vcpu_setup managed to place the vcpu_info within the
1145 percpu area for all cpus, so make use of it */ 1146 * percpu area for all cpus, so make use of it. Note that for
1146 if (have_vcpu_info_placement) { 1147 * PVH we want to use native IRQ mechanism. */
1148 if (have_vcpu_info_placement && !xen_pvh_domain()) {
1147 pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); 1149 pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
1148 pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); 1150 pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
1149 pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); 1151 pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
@@ -1407,9 +1409,49 @@ static void __init xen_boot_params_init_edd(void)
1407 * Set up the GDT and segment registers for -fstack-protector. Until 1409 * Set up the GDT and segment registers for -fstack-protector. Until
1408 * we do this, we have to be careful not to call any stack-protected 1410 * we do this, we have to be careful not to call any stack-protected
1409 * function, which is most of the kernel. 1411 * function, which is most of the kernel.
1412 *
1413 * Note, that it is __ref because the only caller of this after init
1414 * is PVH which is not going to use xen_load_gdt_boot or other
1415 * __init functions.
1410 */ 1416 */
1411static void __init xen_setup_stackprotector(void) 1417static void __ref xen_setup_gdt(int cpu)
1412{ 1418{
1419 if (xen_feature(XENFEAT_auto_translated_physmap)) {
1420#ifdef CONFIG_X86_64
1421 unsigned long dummy;
1422
1423 load_percpu_segment(cpu); /* We need to access per-cpu area */
1424 switch_to_new_gdt(cpu); /* GDT and GS set */
1425
1426 /* We are switching of the Xen provided GDT to our HVM mode
1427 * GDT. The new GDT has __KERNEL_CS with CS.L = 1
1428 * and we are jumping to reload it.
1429 */
1430 asm volatile ("pushq %0\n"
1431 "leaq 1f(%%rip),%0\n"
1432 "pushq %0\n"
1433 "lretq\n"
1434 "1:\n"
1435 : "=&r" (dummy) : "0" (__KERNEL_CS));
1436
1437 /*
1438 * While not needed, we also set the %es, %ds, and %fs
1439 * to zero. We don't care about %ss as it is NULL.
1440 * Strictly speaking this is not needed as Xen zeros those
1441 * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE)
1442 *
1443 * Linux zeros them in cpu_init() and in secondary_startup_64
1444 * (for BSP).
1445 */
1446 loadsegment(es, 0);
1447 loadsegment(ds, 0);
1448 loadsegment(fs, 0);
1449#else
1450 /* PVH: TODO Implement. */
1451 BUG();
1452#endif
1453 return; /* PVH does not need any PV GDT ops. */
1454 }
1413 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; 1455 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
1414 pv_cpu_ops.load_gdt = xen_load_gdt_boot; 1456 pv_cpu_ops.load_gdt = xen_load_gdt_boot;
1415 1457
@@ -1420,6 +1462,46 @@ static void __init xen_setup_stackprotector(void)
1420 pv_cpu_ops.load_gdt = xen_load_gdt; 1462 pv_cpu_ops.load_gdt = xen_load_gdt;
1421} 1463}
1422 1464
1465/*
1466 * A PV guest starts with default flags that are not set for PVH, set them
1467 * here asap.
1468 */
1469static void xen_pvh_set_cr_flags(int cpu)
1470{
1471
1472 /* Some of these are setup in 'secondary_startup_64'. The others:
1473 * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
1474 * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
1475 write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
1476}
1477
1478/*
1479 * Note, that it is ref - because the only caller of this after init
1480 * is PVH which is not going to use xen_load_gdt_boot or other
1481 * __init functions.
1482 */
1483void __ref xen_pvh_secondary_vcpu_init(int cpu)
1484{
1485 xen_setup_gdt(cpu);
1486 xen_pvh_set_cr_flags(cpu);
1487}
1488
1489static void __init xen_pvh_early_guest_init(void)
1490{
1491 if (!xen_feature(XENFEAT_auto_translated_physmap))
1492 return;
1493
1494 if (!xen_feature(XENFEAT_hvm_callback_vector))
1495 return;
1496
1497 xen_have_vector_callback = 1;
1498 xen_pvh_set_cr_flags(0);
1499
1500#ifdef CONFIG_X86_32
1501 BUG(); /* PVH: Implement proper support. */
1502#endif
1503}
1504
1423/* First C function to be called on Xen boot */ 1505/* First C function to be called on Xen boot */
1424asmlinkage void __init xen_start_kernel(void) 1506asmlinkage void __init xen_start_kernel(void)
1425{ 1507{
@@ -1431,13 +1513,16 @@ asmlinkage void __init xen_start_kernel(void)
1431 1513
1432 xen_domain_type = XEN_PV_DOMAIN; 1514 xen_domain_type = XEN_PV_DOMAIN;
1433 1515
1516 xen_setup_features();
1517 xen_pvh_early_guest_init();
1434 xen_setup_machphys_mapping(); 1518 xen_setup_machphys_mapping();
1435 1519
1436 /* Install Xen paravirt ops */ 1520 /* Install Xen paravirt ops */
1437 pv_info = xen_info; 1521 pv_info = xen_info;
1438 pv_init_ops = xen_init_ops; 1522 pv_init_ops = xen_init_ops;
1439 pv_cpu_ops = xen_cpu_ops;
1440 pv_apic_ops = xen_apic_ops; 1523 pv_apic_ops = xen_apic_ops;
1524 if (!xen_pvh_domain())
1525 pv_cpu_ops = xen_cpu_ops;
1441 1526
1442 x86_init.resources.memory_setup = xen_memory_setup; 1527 x86_init.resources.memory_setup = xen_memory_setup;
1443 x86_init.oem.arch_setup = xen_arch_setup; 1528 x86_init.oem.arch_setup = xen_arch_setup;
@@ -1469,17 +1554,14 @@ asmlinkage void __init xen_start_kernel(void)
1469 /* Work out if we support NX */ 1554 /* Work out if we support NX */
1470 x86_configure_nx(); 1555 x86_configure_nx();
1471 1556
1472 xen_setup_features();
1473
1474 /* Get mfn list */ 1557 /* Get mfn list */
1475 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1558 xen_build_dynamic_phys_to_machine();
1476 xen_build_dynamic_phys_to_machine();
1477 1559
1478 /* 1560 /*
1479 * Set up kernel GDT and segment registers, mainly so that 1561 * Set up kernel GDT and segment registers, mainly so that
1480 * -fstack-protector code can be executed. 1562 * -fstack-protector code can be executed.
1481 */ 1563 */
1482 xen_setup_stackprotector(); 1564 xen_setup_gdt(0);
1483 1565
1484 xen_init_irq_ops(); 1566 xen_init_irq_ops();
1485 xen_init_cpuid_mask(); 1567 xen_init_cpuid_mask();
@@ -1548,14 +1630,18 @@ asmlinkage void __init xen_start_kernel(void)
1548 /* set the limit of our address space */ 1630 /* set the limit of our address space */
1549 xen_reserve_top(); 1631 xen_reserve_top();
1550 1632
1551 /* We used to do this in xen_arch_setup, but that is too late on AMD 1633 /* PVH: runs at default kernel iopl of 0 */
1552 * were early_cpu_init (run before ->arch_setup()) calls early_amd_init 1634 if (!xen_pvh_domain()) {
1553 * which pokes 0xcf8 port. 1635 /*
1554 */ 1636 * We used to do this in xen_arch_setup, but that is too late
1555 set_iopl.iopl = 1; 1637 * on AMD were early_cpu_init (run before ->arch_setup()) calls
1556 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 1638 * early_amd_init which pokes 0xcf8 port.
1557 if (rc != 0) 1639 */
1558 xen_raw_printk("physdev_op failed %d\n", rc); 1640 set_iopl.iopl = 1;
1641 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1642 if (rc != 0)
1643 xen_raw_printk("physdev_op failed %d\n", rc);
1644 }
1559 1645
1560#ifdef CONFIG_X86_32 1646#ifdef CONFIG_X86_32
1561 /* set up basic CPUID stuff */ 1647 /* set up basic CPUID stuff */
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 3a5f55d51907..103c93f874b2 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -125,3 +125,66 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
125 apply_to_page_range(&init_mm, (unsigned long)shared, 125 apply_to_page_range(&init_mm, (unsigned long)shared,
126 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); 126 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
127} 127}
128#ifdef CONFIG_XEN_PVH
129#include <xen/balloon.h>
130#include <xen/events.h>
131#include <xen/xen.h>
132#include <linux/slab.h>
133static int __init xlated_setup_gnttab_pages(void)
134{
135 struct page **pages;
136 xen_pfn_t *pfns;
137 int rc;
138 unsigned int i;
139 unsigned long nr_grant_frames = gnttab_max_grant_frames();
140
141 BUG_ON(nr_grant_frames == 0);
142 pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL);
143 if (!pages)
144 return -ENOMEM;
145
146 pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL);
147 if (!pfns) {
148 kfree(pages);
149 return -ENOMEM;
150 }
151 rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */);
152 if (rc) {
153 pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__,
154 nr_grant_frames, rc);
155 kfree(pages);
156 kfree(pfns);
157 return rc;
158 }
159 for (i = 0; i < nr_grant_frames; i++)
160 pfns[i] = page_to_pfn(pages[i]);
161
162 rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames,
163 &xen_auto_xlat_grant_frames.vaddr);
164
165 kfree(pages);
166 if (rc) {
167 pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__,
168 nr_grant_frames, rc);
169 free_xenballooned_pages(nr_grant_frames, pages);
170 kfree(pfns);
171 return rc;
172 }
173
174 xen_auto_xlat_grant_frames.pfn = pfns;
175 xen_auto_xlat_grant_frames.count = nr_grant_frames;
176
177 return 0;
178}
179
180static int __init xen_pvh_gnttab_setup(void)
181{
182 if (!xen_pvh_domain())
183 return -ENODEV;
184
185 return xlated_setup_gnttab_pages();
186}
187/* Call it _before_ __gnttab_init as we need to initialize the
188 * xen_auto_xlat_grant_frames first. */
189core_initcall(xen_pvh_gnttab_setup);
190#endif
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 0da7f863056f..76ca326105f7 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -5,6 +5,7 @@
5#include <xen/interface/xen.h> 5#include <xen/interface/xen.h>
6#include <xen/interface/sched.h> 6#include <xen/interface/sched.h>
7#include <xen/interface/vcpu.h> 7#include <xen/interface/vcpu.h>
8#include <xen/features.h>
8#include <xen/events.h> 9#include <xen/events.h>
9 10
10#include <asm/xen/hypercall.h> 11#include <asm/xen/hypercall.h>
@@ -128,6 +129,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
128 129
129void __init xen_init_irq_ops(void) 130void __init xen_init_irq_ops(void)
130{ 131{
131 pv_irq_ops = xen_irq_ops; 132 /* For PVH we use default pv_irq_ops settings. */
133 if (!xen_feature(XENFEAT_hvm_callback_vector))
134 pv_irq_ops = xen_irq_ops;
132 x86_init.irqs.intr_init = xen_init_IRQ; 135 x86_init.irqs.intr_init = xen_init_IRQ;
133} 136}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ce563be09cc1..c1d406f35523 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1198,44 +1198,40 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
1198 * instead of somewhere later and be confusing. */ 1198 * instead of somewhere later and be confusing. */
1199 xen_mc_flush(); 1199 xen_mc_flush();
1200} 1200}
1201#endif 1201static void __init xen_pagetable_p2m_copy(void)
1202static void __init xen_pagetable_init(void)
1203{ 1202{
1204#ifdef CONFIG_X86_64
1205 unsigned long size; 1203 unsigned long size;
1206 unsigned long addr; 1204 unsigned long addr;
1207#endif 1205 unsigned long new_mfn_list;
1208 paging_init(); 1206
1209 xen_setup_shared_info(); 1207 if (xen_feature(XENFEAT_auto_translated_physmap))
1210#ifdef CONFIG_X86_64 1208 return;
1211 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 1209
1212 unsigned long new_mfn_list; 1210 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1213 1211
1214 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1212 new_mfn_list = xen_revector_p2m_tree();
1215 1213 /* No memory or already called. */
1216 /* On 32-bit, we get zero so this never gets executed. */ 1214 if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list)
1217 new_mfn_list = xen_revector_p2m_tree(); 1215 return;
1218 if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { 1216
1219 /* using __ka address and sticking INVALID_P2M_ENTRY! */ 1217 /* using __ka address and sticking INVALID_P2M_ENTRY! */
1220 memset((void *)xen_start_info->mfn_list, 0xff, size); 1218 memset((void *)xen_start_info->mfn_list, 0xff, size);
1221 1219
1222 /* We should be in __ka space. */ 1220 /* We should be in __ka space. */
1223 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); 1221 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
1224 addr = xen_start_info->mfn_list; 1222 addr = xen_start_info->mfn_list;
1225 /* We roundup to the PMD, which means that if anybody at this stage is 1223 /* We roundup to the PMD, which means that if anybody at this stage is
1226 * using the __ka address of xen_start_info or xen_start_info->shared_info 1224 * using the __ka address of xen_start_info or xen_start_info->shared_info
1227 * they are in going to crash. Fortunatly we have already revectored 1225 * they are in going to crash. Fortunatly we have already revectored
1228 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ 1226 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
1229 size = roundup(size, PMD_SIZE); 1227 size = roundup(size, PMD_SIZE);
1230 xen_cleanhighmap(addr, addr + size); 1228 xen_cleanhighmap(addr, addr + size);
1231 1229
1232 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1230 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1233 memblock_free(__pa(xen_start_info->mfn_list), size); 1231 memblock_free(__pa(xen_start_info->mfn_list), size);
1234 /* And revector! Bye bye old array */ 1232 /* And revector! Bye bye old array */
1235 xen_start_info->mfn_list = new_mfn_list; 1233 xen_start_info->mfn_list = new_mfn_list;
1236 } else 1234
1237 goto skip;
1238 }
1239 /* At this stage, cleanup_highmap has already cleaned __ka space 1235 /* At this stage, cleanup_highmap has already cleaned __ka space
1240 * from _brk_limit way up to the max_pfn_mapped (which is the end of 1236 * from _brk_limit way up to the max_pfn_mapped (which is the end of
1241 * the ramdisk). We continue on, erasing PMD entries that point to page 1237 * the ramdisk). We continue on, erasing PMD entries that point to page
@@ -1255,7 +1251,15 @@ static void __init xen_pagetable_init(void)
1255 * anything at this stage. */ 1251 * anything at this stage. */
1256 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); 1252 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
1257#endif 1253#endif
1258skip: 1254}
1255#endif
1256
1257static void __init xen_pagetable_init(void)
1258{
1259 paging_init();
1260 xen_setup_shared_info();
1261#ifdef CONFIG_X86_64
1262 xen_pagetable_p2m_copy();
1259#endif 1263#endif
1260 xen_post_allocator_init(); 1264 xen_post_allocator_init();
1261} 1265}
@@ -1753,6 +1757,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags)
1753 unsigned long pfn = __pa(addr) >> PAGE_SHIFT; 1757 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1754 pte_t pte = pfn_pte(pfn, prot); 1758 pte_t pte = pfn_pte(pfn, prot);
1755 1759
1760 /* For PVH no need to set R/O or R/W to pin them or unpin them. */
1761 if (xen_feature(XENFEAT_auto_translated_physmap))
1762 return;
1763
1756 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) 1764 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
1757 BUG(); 1765 BUG();
1758} 1766}
@@ -1863,6 +1871,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1863 * but that's enough to get __va working. We need to fill in the rest 1871 * but that's enough to get __va working. We need to fill in the rest
1864 * of the physical mapping once some sort of allocator has been set 1872 * of the physical mapping once some sort of allocator has been set
1865 * up. 1873 * up.
1874 * NOTE: for PVH, the page tables are native.
1866 */ 1875 */
1867void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 1876void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1868{ 1877{
@@ -1884,17 +1893,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1884 /* Zap identity mapping */ 1893 /* Zap identity mapping */
1885 init_level4_pgt[0] = __pgd(0); 1894 init_level4_pgt[0] = __pgd(0);
1886 1895
1887 /* Pre-constructed entries are in pfn, so convert to mfn */ 1896 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1888 /* L4[272] -> level3_ident_pgt 1897 /* Pre-constructed entries are in pfn, so convert to mfn */
1889 * L4[511] -> level3_kernel_pgt */ 1898 /* L4[272] -> level3_ident_pgt
1890 convert_pfn_mfn(init_level4_pgt); 1899 * L4[511] -> level3_kernel_pgt */
1891 1900 convert_pfn_mfn(init_level4_pgt);
1892 /* L3_i[0] -> level2_ident_pgt */ 1901
1893 convert_pfn_mfn(level3_ident_pgt); 1902 /* L3_i[0] -> level2_ident_pgt */
1894 /* L3_k[510] -> level2_kernel_pgt 1903 convert_pfn_mfn(level3_ident_pgt);
1895 * L3_i[511] -> level2_fixmap_pgt */ 1904 /* L3_k[510] -> level2_kernel_pgt
1896 convert_pfn_mfn(level3_kernel_pgt); 1905 * L3_i[511] -> level2_fixmap_pgt */
1897 1906 convert_pfn_mfn(level3_kernel_pgt);
1907 }
1898 /* We get [511][511] and have Xen's version of level2_kernel_pgt */ 1908 /* We get [511][511] and have Xen's version of level2_kernel_pgt */
1899 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); 1909 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1900 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); 1910 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
@@ -1918,31 +1928,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1918 copy_page(level2_fixmap_pgt, l2); 1928 copy_page(level2_fixmap_pgt, l2);
1919 /* Note that we don't do anything with level1_fixmap_pgt which 1929 /* Note that we don't do anything with level1_fixmap_pgt which
1920 * we don't need. */ 1930 * we don't need. */
1931 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1932 /* Make pagetable pieces RO */
1933 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1934 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1935 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1936 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1937 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1938 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1939 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1940
1941 /* Pin down new L4 */
1942 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1943 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1944
1945 /* Unpin Xen-provided one */
1946 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1921 1947
1922 /* Make pagetable pieces RO */ 1948 /*
1923 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); 1949 * At this stage there can be no user pgd, and no page
1924 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); 1950 * structure to attach it to, so make sure we just set kernel
1925 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); 1951 * pgd.
1926 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); 1952 */
1927 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); 1953 xen_mc_batch();
1928 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1954 __xen_write_cr3(true, __pa(init_level4_pgt));
1929 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1955 xen_mc_issue(PARAVIRT_LAZY_CPU);
1930 1956 } else
1931 /* Pin down new L4 */ 1957 native_write_cr3(__pa(init_level4_pgt));
1932 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1933 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1934
1935 /* Unpin Xen-provided one */
1936 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1937
1938 /*
1939 * At this stage there can be no user pgd, and no page
1940 * structure to attach it to, so make sure we just set kernel
1941 * pgd.
1942 */
1943 xen_mc_batch();
1944 __xen_write_cr3(true, __pa(init_level4_pgt));
1945 xen_mc_issue(PARAVIRT_LAZY_CPU);
1946 1958
1947 /* We can't that easily rip out L3 and L2, as the Xen pagetables are 1959 /* We can't that easily rip out L3 and L2, as the Xen pagetables are
1948 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for 1960 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
@@ -2103,6 +2115,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2103 2115
2104static void __init xen_post_allocator_init(void) 2116static void __init xen_post_allocator_init(void)
2105{ 2117{
2118 if (xen_feature(XENFEAT_auto_translated_physmap))
2119 return;
2120
2106 pv_mmu_ops.set_pte = xen_set_pte; 2121 pv_mmu_ops.set_pte = xen_set_pte;
2107 pv_mmu_ops.set_pmd = xen_set_pmd; 2122 pv_mmu_ops.set_pmd = xen_set_pmd;
2108 pv_mmu_ops.set_pud = xen_set_pud; 2123 pv_mmu_ops.set_pud = xen_set_pud;
@@ -2207,6 +2222,15 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2207void __init xen_init_mmu_ops(void) 2222void __init xen_init_mmu_ops(void)
2208{ 2223{
2209 x86_init.paging.pagetable_init = xen_pagetable_init; 2224 x86_init.paging.pagetable_init = xen_pagetable_init;
2225
2226 /* Optimization - we can use the HVM one but it has no idea which
2227 * VCPUs are descheduled - which means that it will needlessly IPI
2228 * them. Xen knows so let it do the job.
2229 */
2230 if (xen_feature(XENFEAT_auto_translated_physmap)) {
2231 pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
2232 return;
2233 }
2210 pv_mmu_ops = xen_mmu_ops; 2234 pv_mmu_ops = xen_mmu_ops;
2211 2235
2212 memset(dummy_mapping, 0xff, PAGE_SIZE); 2236 memset(dummy_mapping, 0xff, PAGE_SIZE);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 2ae8699e8767..696c694986d0 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -280,6 +280,9 @@ void __ref xen_build_mfn_list_list(void)
280{ 280{
281 unsigned long pfn; 281 unsigned long pfn;
282 282
283 if (xen_feature(XENFEAT_auto_translated_physmap))
284 return;
285
283 /* Pre-initialize p2m_top_mfn to be completely missing */ 286 /* Pre-initialize p2m_top_mfn to be completely missing */
284 if (p2m_top_mfn == NULL) { 287 if (p2m_top_mfn == NULL) {
285 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 288 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
@@ -336,6 +339,9 @@ void __ref xen_build_mfn_list_list(void)
336 339
337void xen_setup_mfn_list_list(void) 340void xen_setup_mfn_list_list(void)
338{ 341{
342 if (xen_feature(XENFEAT_auto_translated_physmap))
343 return;
344
339 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); 345 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
340 346
341 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 347 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -346,10 +352,15 @@ void xen_setup_mfn_list_list(void)
346/* Set up p2m_top to point to the domain-builder provided p2m pages */ 352/* Set up p2m_top to point to the domain-builder provided p2m pages */
347void __init xen_build_dynamic_phys_to_machine(void) 353void __init xen_build_dynamic_phys_to_machine(void)
348{ 354{
349 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; 355 unsigned long *mfn_list;
350 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); 356 unsigned long max_pfn;
351 unsigned long pfn; 357 unsigned long pfn;
352 358
359 if (xen_feature(XENFEAT_auto_translated_physmap))
360 return;
361
362 mfn_list = (unsigned long *)xen_start_info->mfn_list;
363 max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
353 xen_max_p2m_pfn = max_pfn; 364 xen_max_p2m_pfn = max_pfn;
354 365
355 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); 366 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 0a7852483ffe..a8261716d58d 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -30,10 +30,9 @@
30#define XEN_PLATFORM_ERR_PROTOCOL -2 30#define XEN_PLATFORM_ERR_PROTOCOL -2
31#define XEN_PLATFORM_ERR_BLACKLIST -3 31#define XEN_PLATFORM_ERR_BLACKLIST -3
32 32
33/* store the value of xen_emul_unplug after the unplug is done */
34int xen_platform_pci_unplug;
35EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
36#ifdef CONFIG_XEN_PVHVM 33#ifdef CONFIG_XEN_PVHVM
34/* store the value of xen_emul_unplug after the unplug is done */
35static int xen_platform_pci_unplug;
37static int xen_emul_unplug; 36static int xen_emul_unplug;
38 37
39static int check_platform_magic(void) 38static int check_platform_magic(void)
@@ -69,6 +68,80 @@ static int check_platform_magic(void)
69 return 0; 68 return 0;
70} 69}
71 70
71bool xen_has_pv_devices()
72{
73 if (!xen_domain())
74 return false;
75
76 /* PV domains always have them. */
77 if (xen_pv_domain())
78 return true;
79
80 /* And user has xen_platform_pci=0 set in guest config as
81 * driver did not modify the value. */
82 if (xen_platform_pci_unplug == 0)
83 return false;
84
85 if (xen_platform_pci_unplug & XEN_UNPLUG_NEVER)
86 return false;
87
88 if (xen_platform_pci_unplug & XEN_UNPLUG_ALL)
89 return true;
90
91 /* This is an odd one - we are going to run legacy
92 * and PV drivers at the same time. */
93 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY)
94 return true;
95
96 /* And the caller has to follow with xen_pv_{disk,nic}_devices
97 * to be certain which driver can load. */
98 return false;
99}
100EXPORT_SYMBOL_GPL(xen_has_pv_devices);
101
102static bool __xen_has_pv_device(int state)
103{
104 /* HVM domains might or might not */
105 if (xen_hvm_domain() && (xen_platform_pci_unplug & state))
106 return true;
107
108 return xen_has_pv_devices();
109}
110
111bool xen_has_pv_nic_devices(void)
112{
113 return __xen_has_pv_device(XEN_UNPLUG_ALL_NICS | XEN_UNPLUG_ALL);
114}
115EXPORT_SYMBOL_GPL(xen_has_pv_nic_devices);
116
117bool xen_has_pv_disk_devices(void)
118{
119 return __xen_has_pv_device(XEN_UNPLUG_ALL_IDE_DISKS |
120 XEN_UNPLUG_AUX_IDE_DISKS | XEN_UNPLUG_ALL);
121}
122EXPORT_SYMBOL_GPL(xen_has_pv_disk_devices);
123
124/*
125 * This one is odd - it determines whether you want to run PV _and_
126 * legacy (IDE) drivers together. This combination is only possible
127 * under HVM.
128 */
129bool xen_has_pv_and_legacy_disk_devices(void)
130{
131 if (!xen_domain())
132 return false;
133
134 /* N.B. This is only ever used in HVM mode */
135 if (xen_pv_domain())
136 return false;
137
138 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY)
139 return true;
140
141 return false;
142}
143EXPORT_SYMBOL_GPL(xen_has_pv_and_legacy_disk_devices);
144
72void xen_unplug_emulated_devices(void) 145void xen_unplug_emulated_devices(void)
73{ 146{
74 int r; 147 int r;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 68c054f59de6..dd5f905e33d5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -27,6 +27,7 @@
27#include <xen/interface/memory.h> 27#include <xen/interface/memory.h>
28#include <xen/interface/physdev.h> 28#include <xen/interface/physdev.h>
29#include <xen/features.h> 29#include <xen/features.h>
30#include "mmu.h"
30#include "xen-ops.h" 31#include "xen-ops.h"
31#include "vdso.h" 32#include "vdso.h"
32 33
@@ -81,6 +82,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
81 82
82 memblock_reserve(start, size); 83 memblock_reserve(start, size);
83 84
85 if (xen_feature(XENFEAT_auto_translated_physmap))
86 return;
87
84 xen_max_p2m_pfn = PFN_DOWN(start + size); 88 xen_max_p2m_pfn = PFN_DOWN(start + size);
85 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { 89 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
86 unsigned long mfn = pfn_to_mfn(pfn); 90 unsigned long mfn = pfn_to_mfn(pfn);
@@ -103,6 +107,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
103 .domid = DOMID_SELF 107 .domid = DOMID_SELF
104 }; 108 };
105 unsigned long len = 0; 109 unsigned long len = 0;
110 int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
106 unsigned long pfn; 111 unsigned long pfn;
107 int ret; 112 int ret;
108 113
@@ -116,7 +121,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
116 continue; 121 continue;
117 frame = mfn; 122 frame = mfn;
118 } else { 123 } else {
119 if (mfn != INVALID_P2M_ENTRY) 124 if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
120 continue; 125 continue;
121 frame = pfn; 126 frame = pfn;
122 } 127 }
@@ -154,6 +159,13 @@ static unsigned long __init xen_do_chunk(unsigned long start,
154static unsigned long __init xen_release_chunk(unsigned long start, 159static unsigned long __init xen_release_chunk(unsigned long start,
155 unsigned long end) 160 unsigned long end)
156{ 161{
162 /*
163 * Xen already ballooned out the E820 non RAM regions for us
164 * and set them up properly in EPT.
165 */
166 if (xen_feature(XENFEAT_auto_translated_physmap))
167 return end - start;
168
157 return xen_do_chunk(start, end, true); 169 return xen_do_chunk(start, end, true);
158} 170}
159 171
@@ -222,7 +234,13 @@ static void __init xen_set_identity_and_release_chunk(
222 * (except for the ISA region which must be 1:1 mapped) to 234 * (except for the ISA region which must be 1:1 mapped) to
223 * release the refcounts (in Xen) on the original frames. 235 * release the refcounts (in Xen) on the original frames.
224 */ 236 */
225 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { 237
238 /*
239 * PVH E820 matches the hypervisor's P2M which means we need to
240 * account for the proper values of *release and *identity.
241 */
242 for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) &&
243 pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
226 pte_t pte = __pte_ma(0); 244 pte_t pte = __pte_ma(0);
227 245
228 if (pfn < PFN_UP(ISA_END_ADDRESS)) 246 if (pfn < PFN_UP(ISA_END_ADDRESS))
@@ -563,16 +581,13 @@ void xen_enable_nmi(void)
563 BUG(); 581 BUG();
564#endif 582#endif
565} 583}
566void __init xen_arch_setup(void) 584void __init xen_pvmmu_arch_setup(void)
567{ 585{
568 xen_panic_handler_init();
569
570 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 586 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
571 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 587 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
572 588
573 if (!xen_feature(XENFEAT_auto_translated_physmap)) 589 HYPERVISOR_vm_assist(VMASST_CMD_enable,
574 HYPERVISOR_vm_assist(VMASST_CMD_enable, 590 VMASST_TYPE_pae_extended_cr3);
575 VMASST_TYPE_pae_extended_cr3);
576 591
577 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 592 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
578 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 593 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
@@ -581,6 +596,15 @@ void __init xen_arch_setup(void)
581 xen_enable_sysenter(); 596 xen_enable_sysenter();
582 xen_enable_syscall(); 597 xen_enable_syscall();
583 xen_enable_nmi(); 598 xen_enable_nmi();
599}
600
601/* This function is not called for HVM domains */
602void __init xen_arch_setup(void)
603{
604 xen_panic_handler_init();
605 if (!xen_feature(XENFEAT_auto_translated_physmap))
606 xen_pvmmu_arch_setup();
607
584#ifdef CONFIG_ACPI 608#ifdef CONFIG_ACPI
585 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 609 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
586 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 610 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c36b325abd83..a18eadd8bb40 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -73,9 +73,11 @@ static void cpu_bringup(void)
73 touch_softlockup_watchdog(); 73 touch_softlockup_watchdog();
74 preempt_disable(); 74 preempt_disable();
75 75
76 xen_enable_sysenter(); 76 /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
77 xen_enable_syscall(); 77 if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
78 78 xen_enable_sysenter();
79 xen_enable_syscall();
80 }
79 cpu = smp_processor_id(); 81 cpu = smp_processor_id();
80 smp_store_cpu_info(cpu); 82 smp_store_cpu_info(cpu);
81 cpu_data(cpu).x86_max_cores = 1; 83 cpu_data(cpu).x86_max_cores = 1;
@@ -97,8 +99,14 @@ static void cpu_bringup(void)
97 wmb(); /* make sure everything is out */ 99 wmb(); /* make sure everything is out */
98} 100}
99 101
100static void cpu_bringup_and_idle(void) 102/* Note: cpu parameter is only relevant for PVH */
103static void cpu_bringup_and_idle(int cpu)
101{ 104{
105#ifdef CONFIG_X86_64
106 if (xen_feature(XENFEAT_auto_translated_physmap) &&
107 xen_feature(XENFEAT_supervisor_mode_kernel))
108 xen_pvh_secondary_vcpu_init(cpu);
109#endif
102 cpu_bringup(); 110 cpu_bringup();
103 cpu_startup_entry(CPUHP_ONLINE); 111 cpu_startup_entry(CPUHP_ONLINE);
104} 112}
@@ -274,9 +282,10 @@ static void __init xen_smp_prepare_boot_cpu(void)
274 native_smp_prepare_boot_cpu(); 282 native_smp_prepare_boot_cpu();
275 283
276 if (xen_pv_domain()) { 284 if (xen_pv_domain()) {
277 /* We've switched to the "real" per-cpu gdt, so make sure the 285 if (!xen_feature(XENFEAT_writable_page_tables))
278 old memory can be recycled */ 286 /* We've switched to the "real" per-cpu gdt, so make
279 make_lowmem_page_readwrite(xen_initial_gdt); 287 * sure the old memory can be recycled. */
288 make_lowmem_page_readwrite(xen_initial_gdt);
280 289
281#ifdef CONFIG_X86_32 290#ifdef CONFIG_X86_32
282 /* 291 /*
@@ -360,22 +369,21 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
360 369
361 gdt = get_cpu_gdt_table(cpu); 370 gdt = get_cpu_gdt_table(cpu);
362 371
363 ctxt->flags = VGCF_IN_KERNEL;
364 ctxt->user_regs.ss = __KERNEL_DS;
365#ifdef CONFIG_X86_32 372#ifdef CONFIG_X86_32
373 /* Note: PVH is not yet supported on x86_32. */
366 ctxt->user_regs.fs = __KERNEL_PERCPU; 374 ctxt->user_regs.fs = __KERNEL_PERCPU;
367 ctxt->user_regs.gs = __KERNEL_STACK_CANARY; 375 ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
368#else
369 ctxt->gs_base_kernel = per_cpu_offset(cpu);
370#endif 376#endif
371 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 377 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
372 378
373 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 379 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
374 380
375 { 381 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
382 ctxt->flags = VGCF_IN_KERNEL;
376 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ 383 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
377 ctxt->user_regs.ds = __USER_DS; 384 ctxt->user_regs.ds = __USER_DS;
378 ctxt->user_regs.es = __USER_DS; 385 ctxt->user_regs.es = __USER_DS;
386 ctxt->user_regs.ss = __KERNEL_DS;
379 387
380 xen_copy_trap_info(ctxt->trap_ctxt); 388 xen_copy_trap_info(ctxt->trap_ctxt);
381 389
@@ -396,18 +404,27 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
396#ifdef CONFIG_X86_32 404#ifdef CONFIG_X86_32
397 ctxt->event_callback_cs = __KERNEL_CS; 405 ctxt->event_callback_cs = __KERNEL_CS;
398 ctxt->failsafe_callback_cs = __KERNEL_CS; 406 ctxt->failsafe_callback_cs = __KERNEL_CS;
407#else
408 ctxt->gs_base_kernel = per_cpu_offset(cpu);
399#endif 409#endif
400 ctxt->event_callback_eip = 410 ctxt->event_callback_eip =
401 (unsigned long)xen_hypervisor_callback; 411 (unsigned long)xen_hypervisor_callback;
402 ctxt->failsafe_callback_eip = 412 ctxt->failsafe_callback_eip =
403 (unsigned long)xen_failsafe_callback; 413 (unsigned long)xen_failsafe_callback;
414 ctxt->user_regs.cs = __KERNEL_CS;
415 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
416#ifdef CONFIG_X86_32
404 } 417 }
405 ctxt->user_regs.cs = __KERNEL_CS; 418#else
419 } else
420 /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with
421 * %rdi having the cpu number - which means are passing in
422 * as the first parameter the cpu. Subtle!
423 */
424 ctxt->user_regs.rdi = cpu;
425#endif
406 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 426 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
407
408 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
409 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 427 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
410
411 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) 428 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
412 BUG(); 429 BUG();
413 430
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 12a1ca707b94..7b78f88c1707 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -446,6 +446,7 @@ void xen_setup_timer(int cpu)
446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| 446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
447 IRQF_FORCE_RESUME, 447 IRQF_FORCE_RESUME,
448 name, NULL); 448 name, NULL);
449 (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
449 450
450 memcpy(evt, xen_clockevent, sizeof(*evt)); 451 memcpy(evt, xen_clockevent, sizeof(*evt));
451 452
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7faed5869e5b..485b69585540 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -11,8 +11,28 @@
11#include <asm/page_types.h> 11#include <asm/page_types.h>
12 12
13#include <xen/interface/elfnote.h> 13#include <xen/interface/elfnote.h>
14#include <xen/interface/features.h>
14#include <asm/xen/interface.h> 15#include <asm/xen/interface.h>
15 16
17#ifdef CONFIG_XEN_PVH
18#define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel"
19/* Note the lack of 'hvm_callback_vector'. Older hypervisor will
20 * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in
21 * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore.
22 */
23#define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \
24 (1 << XENFEAT_auto_translated_physmap) | \
25 (1 << XENFEAT_supervisor_mode_kernel) | \
26 (1 << XENFEAT_hvm_callback_vector))
27/* The XENFEAT_writable_page_tables is not stricly neccessary as we set that
28 * up regardless whether this CONFIG option is enabled or not, but it
29 * clarifies what the right flags need to be.
30 */
31#else
32#define PVH_FEATURES_STR ""
33#define PVH_FEATURES (0)
34#endif
35
16 __INIT 36 __INIT
17ENTRY(startup_xen) 37ENTRY(startup_xen)
18 cld 38 cld
@@ -95,7 +115,10 @@ NEXT_HYPERCALL(arch_6)
95#endif 115#endif
96 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) 116 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
97 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) 117 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
98 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") 118 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR)
119 ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) |
120 (1 << XENFEAT_writable_page_tables) |
121 (1 << XENFEAT_dom0))
99 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") 122 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
100 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") 123 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
101 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, 124 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 95f8c6142328..1cb6f4c37300 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -123,4 +123,5 @@ __visible void xen_adjust_exception_frame(void);
123 123
124extern int xen_panic_handler_init(void); 124extern int xen_panic_handler_init(void);
125 125
126void xen_pvh_secondary_vcpu_init(int cpu);
126#endif /* XEN_OPS_H */ 127#endif /* XEN_OPS_H */
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c4a4c9006288..f9c43f91f03e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1356,7 +1356,7 @@ static int blkfront_probe(struct xenbus_device *dev,
1356 char *type; 1356 char *type;
1357 int len; 1357 int len;
1358 /* no unplug has been done: do not hook devices != xen vbds */ 1358 /* no unplug has been done: do not hook devices != xen vbds */
1359 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { 1359 if (xen_has_pv_and_legacy_disk_devices()) {
1360 int major; 1360 int major;
1361 1361
1362 if (!VDEV_IS_EXTENDED(vdevice)) 1362 if (!VDEV_IS_EXTENDED(vdevice))
@@ -2079,7 +2079,7 @@ static int __init xlblk_init(void)
2079 if (!xen_domain()) 2079 if (!xen_domain())
2080 return -ENODEV; 2080 return -ENODEV;
2081 2081
2082 if (xen_hvm_domain() && !xen_platform_pci_unplug) 2082 if (!xen_has_pv_disk_devices())
2083 return -ENODEV; 2083 return -ENODEV;
2084 2084
2085 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { 2085 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 92b097064df5..2064b4527040 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -17,6 +17,7 @@
17#include <xen/xenbus.h> 17#include <xen/xenbus.h>
18#include <xen/page.h> 18#include <xen/page.h>
19#include "tpm.h" 19#include "tpm.h"
20#include <xen/platform_pci.h>
20 21
21struct tpm_private { 22struct tpm_private {
22 struct tpm_chip *chip; 23 struct tpm_chip *chip;
@@ -378,6 +379,9 @@ static int __init xen_tpmfront_init(void)
378 if (!xen_domain()) 379 if (!xen_domain())
379 return -ENODEV; 380 return -ENODEV;
380 381
382 if (!xen_has_pv_devices())
383 return -ENODEV;
384
381 return xenbus_register_frontend(&tpmfront_driver); 385 return xenbus_register_frontend(&tpmfront_driver);
382} 386}
383module_init(xen_tpmfront_init); 387module_init(xen_tpmfront_init);
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index e21c1816a8f9..fbfdc10573be 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -29,6 +29,7 @@
29#include <xen/interface/io/fbif.h> 29#include <xen/interface/io/fbif.h>
30#include <xen/interface/io/kbdif.h> 30#include <xen/interface/io/kbdif.h>
31#include <xen/xenbus.h> 31#include <xen/xenbus.h>
32#include <xen/platform_pci.h>
32 33
33struct xenkbd_info { 34struct xenkbd_info {
34 struct input_dev *kbd; 35 struct input_dev *kbd;
@@ -380,6 +381,9 @@ static int __init xenkbd_init(void)
380 if (xen_initial_domain()) 381 if (xen_initial_domain())
381 return -ENODEV; 382 return -ENODEV;
382 383
384 if (!xen_has_pv_devices())
385 return -ENODEV;
386
383 return xenbus_register_frontend(&xenkbd_driver); 387 return xenbus_register_frontend(&xenkbd_driver);
384} 388}
385 389
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index e59acb1daa23..2ab82fe75ede 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -2115,7 +2115,7 @@ static int __init netif_init(void)
2115 if (!xen_domain()) 2115 if (!xen_domain())
2116 return -ENODEV; 2116 return -ENODEV;
2117 2117
2118 if (xen_hvm_domain() && !xen_platform_pci_unplug) 2118 if (!xen_has_pv_nic_devices())
2119 return -ENODEV; 2119 return -ENODEV;
2120 2120
2121 pr_info("Initialising Xen virtual ethernet driver\n"); 2121 pr_info("Initialising Xen virtual ethernet driver\n");
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index d1cd60f51f87..179b8edc2262 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -20,6 +20,7 @@
20#include <linux/workqueue.h> 20#include <linux/workqueue.h>
21#include <linux/bitops.h> 21#include <linux/bitops.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <xen/platform_pci.h>
23 24
24#include <asm/xen/swiotlb-xen.h> 25#include <asm/xen/swiotlb-xen.h>
25#define INVALID_GRANT_REF (0) 26#define INVALID_GRANT_REF (0)
@@ -1146,6 +1147,9 @@ static int __init pcifront_init(void)
1146 if (!xen_pv_domain() || xen_initial_domain()) 1147 if (!xen_pv_domain() || xen_initial_domain())
1147 return -ENODEV; 1148 return -ENODEV;
1148 1149
1150 if (!xen_has_pv_devices())
1151 return -ENODEV;
1152
1149 pci_frontend_registrar(1 /* enable */); 1153 pci_frontend_registrar(1 /* enable */);
1150 1154
1151 return xenbus_register_frontend(&xenpci_driver); 1155 return xenbus_register_frontend(&xenpci_driver);
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index cd005c227a23..901014bbc821 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -35,6 +35,7 @@
35#include <xen/interface/io/fbif.h> 35#include <xen/interface/io/fbif.h>
36#include <xen/interface/io/protocols.h> 36#include <xen/interface/io/protocols.h>
37#include <xen/xenbus.h> 37#include <xen/xenbus.h>
38#include <xen/platform_pci.h>
38 39
39struct xenfb_info { 40struct xenfb_info {
40 unsigned char *fb; 41 unsigned char *fb;
@@ -692,13 +693,16 @@ static DEFINE_XENBUS_DRIVER(xenfb, ,
692 693
693static int __init xenfb_init(void) 694static int __init xenfb_init(void)
694{ 695{
695 if (!xen_pv_domain()) 696 if (!xen_domain())
696 return -ENODEV; 697 return -ENODEV;
697 698
698 /* Nothing to do if running in dom0. */ 699 /* Nothing to do if running in dom0. */
699 if (xen_initial_domain()) 700 if (xen_initial_domain())
700 return -ENODEV; 701 return -ENODEV;
701 702
703 if (!xen_has_pv_devices())
704 return -ENODEV;
705
702 return xenbus_register_frontend(&xenfb_driver); 706 return xenbus_register_frontend(&xenfb_driver);
703} 707}
704 708
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 12ba6db65142..38fb36e1c592 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -3,7 +3,6 @@ menu "Xen driver support"
3 3
4config XEN_BALLOON 4config XEN_BALLOON
5 bool "Xen memory balloon driver" 5 bool "Xen memory balloon driver"
6 depends on !ARM
7 default y 6 default y
8 help 7 help
9 The balloon driver allows the Xen domain to request more memory from 8 The balloon driver allows the Xen domain to request more memory from
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 14fe79d8634a..d75c811bfa56 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -2,7 +2,8 @@ ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
2obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 2obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
3endif 3endif
4obj-$(CONFIG_X86) += fallback.o 4obj-$(CONFIG_X86) += fallback.o
5obj-y += grant-table.o features.o events.o balloon.o manage.o 5obj-y += grant-table.o features.o balloon.o manage.o
6obj-y += events/
6obj-y += xenbus/ 7obj-y += xenbus/
7 8
8nostackp := $(call cc-option, -fno-stack-protector) 9nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 4c02e2b94103..37d06ea624aa 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -157,13 +157,6 @@ static struct page *balloon_retrieve(bool prefer_highmem)
157 return page; 157 return page;
158} 158}
159 159
160static struct page *balloon_first_page(void)
161{
162 if (list_empty(&ballooned_pages))
163 return NULL;
164 return list_entry(ballooned_pages.next, struct page, lru);
165}
166
167static struct page *balloon_next_page(struct page *page) 160static struct page *balloon_next_page(struct page *page)
168{ 161{
169 struct list_head *next = page->lru.next; 162 struct list_head *next = page->lru.next;
@@ -328,7 +321,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
328 if (nr_pages > ARRAY_SIZE(frame_list)) 321 if (nr_pages > ARRAY_SIZE(frame_list))
329 nr_pages = ARRAY_SIZE(frame_list); 322 nr_pages = ARRAY_SIZE(frame_list);
330 323
331 page = balloon_first_page(); 324 page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
332 for (i = 0; i < nr_pages; i++) { 325 for (i = 0; i < nr_pages; i++) {
333 if (!page) { 326 if (!page) {
334 nr_pages = i; 327 nr_pages = i;
diff --git a/drivers/xen/dbgp.c b/drivers/xen/dbgp.c
index f3ccc80a455f..8145a59fd9f6 100644
--- a/drivers/xen/dbgp.c
+++ b/drivers/xen/dbgp.c
@@ -19,7 +19,7 @@ static int xen_dbgp_op(struct usb_hcd *hcd, int op)
19 dbgp.op = op; 19 dbgp.op = op;
20 20
21#ifdef CONFIG_PCI 21#ifdef CONFIG_PCI
22 if (ctrlr->bus == &pci_bus_type) { 22 if (dev_is_pci(ctrlr)) {
23 const struct pci_dev *pdev = to_pci_dev(ctrlr); 23 const struct pci_dev *pdev = to_pci_dev(ctrlr);
24 24
25 dbgp.u.pci.seg = pci_domain_nr(pdev->bus); 25 dbgp.u.pci.seg = pci_domain_nr(pdev->bus);
diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile
new file mode 100644
index 000000000000..62be55cd981d
--- /dev/null
+++ b/drivers/xen/events/Makefile
@@ -0,0 +1,5 @@
1obj-y += events.o
2
3events-y += events_base.o
4events-y += events_2l.o
5events-y += events_fifo.o
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
new file mode 100644
index 000000000000..d7ff91757307
--- /dev/null
+++ b/drivers/xen/events/events_2l.c
@@ -0,0 +1,372 @@
1/*
2 * Xen event channels (2-level ABI)
3 *
4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
5 */
6
7#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
8
9#include <linux/linkage.h>
10#include <linux/interrupt.h>
11#include <linux/irq.h>
12#include <linux/module.h>
13
14#include <asm/sync_bitops.h>
15#include <asm/xen/hypercall.h>
16#include <asm/xen/hypervisor.h>
17
18#include <xen/xen.h>
19#include <xen/xen-ops.h>
20#include <xen/events.h>
21#include <xen/interface/xen.h>
22#include <xen/interface/event_channel.h>
23
24#include "events_internal.h"
25
26/*
27 * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
28 * careful to only use bitops which allow for this (e.g
29 * test_bit/find_first_bit and friends but not __ffs) and to pass
30 * BITS_PER_EVTCHN_WORD as the bitmask length.
31 */
32#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
33/*
34 * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
35 * array. Primarily to avoid long lines (hence the terse name).
36 */
37#define BM(x) (unsigned long *)(x)
38/* Find the first set bit in a evtchn mask */
39#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
40
41static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD],
42 cpu_evtchn_mask);
43
44static unsigned evtchn_2l_max_channels(void)
45{
46 return EVTCHN_2L_NR_CHANNELS;
47}
48
49static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu)
50{
51 clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu)));
52 set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
53}
54
55static void evtchn_2l_clear_pending(unsigned port)
56{
57 struct shared_info *s = HYPERVISOR_shared_info;
58 sync_clear_bit(port, BM(&s->evtchn_pending[0]));
59}
60
61static void evtchn_2l_set_pending(unsigned port)
62{
63 struct shared_info *s = HYPERVISOR_shared_info;
64 sync_set_bit(port, BM(&s->evtchn_pending[0]));
65}
66
67static bool evtchn_2l_is_pending(unsigned port)
68{
69 struct shared_info *s = HYPERVISOR_shared_info;
70 return sync_test_bit(port, BM(&s->evtchn_pending[0]));
71}
72
73static bool evtchn_2l_test_and_set_mask(unsigned port)
74{
75 struct shared_info *s = HYPERVISOR_shared_info;
76 return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
77}
78
79static void evtchn_2l_mask(unsigned port)
80{
81 struct shared_info *s = HYPERVISOR_shared_info;
82 sync_set_bit(port, BM(&s->evtchn_mask[0]));
83}
84
85static void evtchn_2l_unmask(unsigned port)
86{
87 struct shared_info *s = HYPERVISOR_shared_info;
88 unsigned int cpu = get_cpu();
89 int do_hypercall = 0, evtchn_pending = 0;
90
91 BUG_ON(!irqs_disabled());
92
93 if (unlikely((cpu != cpu_from_evtchn(port))))
94 do_hypercall = 1;
95 else {
96 /*
97 * Need to clear the mask before checking pending to
98 * avoid a race with an event becoming pending.
99 *
100 * EVTCHNOP_unmask will only trigger an upcall if the
101 * mask bit was set, so if a hypercall is needed
102 * remask the event.
103 */
104 sync_clear_bit(port, BM(&s->evtchn_mask[0]));
105 evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
106
107 if (unlikely(evtchn_pending && xen_hvm_domain())) {
108 sync_set_bit(port, BM(&s->evtchn_mask[0]));
109 do_hypercall = 1;
110 }
111 }
112
113 /* Slow path (hypercall) if this is a non-local port or if this is
114 * an hvm domain and an event is pending (hvm domains don't have
115 * their own implementation of irq_enable). */
116 if (do_hypercall) {
117 struct evtchn_unmask unmask = { .port = port };
118 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
119 } else {
120 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
121
122 /*
123 * The following is basically the equivalent of
124 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
125 * the interrupt edge' if the channel is masked.
126 */
127 if (evtchn_pending &&
128 !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
129 BM(&vcpu_info->evtchn_pending_sel)))
130 vcpu_info->evtchn_upcall_pending = 1;
131 }
132
133 put_cpu();
134}
135
136static DEFINE_PER_CPU(unsigned int, current_word_idx);
137static DEFINE_PER_CPU(unsigned int, current_bit_idx);
138
139/*
140 * Mask out the i least significant bits of w
141 */
142#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
143
144static inline xen_ulong_t active_evtchns(unsigned int cpu,
145 struct shared_info *sh,
146 unsigned int idx)
147{
148 return sh->evtchn_pending[idx] &
149 per_cpu(cpu_evtchn_mask, cpu)[idx] &
150 ~sh->evtchn_mask[idx];
151}
152
153/*
154 * Search the CPU's pending events bitmasks. For each one found, map
155 * the event number to an irq, and feed it into do_IRQ() for handling.
156 *
157 * Xen uses a two-level bitmap to speed searching. The first level is
158 * a bitset of words which contain pending event bits. The second
159 * level is a bitset of pending events themselves.
160 */
161static void evtchn_2l_handle_events(unsigned cpu)
162{
163 int irq;
164 xen_ulong_t pending_words;
165 xen_ulong_t pending_bits;
166 int start_word_idx, start_bit_idx;
167 int word_idx, bit_idx;
168 int i;
169 struct irq_desc *desc;
170 struct shared_info *s = HYPERVISOR_shared_info;
171 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
172
173 /* Timer interrupt has highest priority. */
174 irq = irq_from_virq(cpu, VIRQ_TIMER);
175 if (irq != -1) {
176 unsigned int evtchn = evtchn_from_irq(irq);
177 word_idx = evtchn / BITS_PER_LONG;
178 bit_idx = evtchn % BITS_PER_LONG;
179 if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) {
180 desc = irq_to_desc(irq);
181 if (desc)
182 generic_handle_irq_desc(irq, desc);
183 }
184 }
185
186 /*
187 * Master flag must be cleared /before/ clearing
188 * selector flag. xchg_xen_ulong must contain an
189 * appropriate barrier.
190 */
191 pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
192
193 start_word_idx = __this_cpu_read(current_word_idx);
194 start_bit_idx = __this_cpu_read(current_bit_idx);
195
196 word_idx = start_word_idx;
197
198 for (i = 0; pending_words != 0; i++) {
199 xen_ulong_t words;
200
201 words = MASK_LSBS(pending_words, word_idx);
202
203 /*
204 * If we masked out all events, wrap to beginning.
205 */
206 if (words == 0) {
207 word_idx = 0;
208 bit_idx = 0;
209 continue;
210 }
211 word_idx = EVTCHN_FIRST_BIT(words);
212
213 pending_bits = active_evtchns(cpu, s, word_idx);
214 bit_idx = 0; /* usually scan entire word from start */
215 /*
216 * We scan the starting word in two parts.
217 *
218 * 1st time: start in the middle, scanning the
219 * upper bits.
220 *
221 * 2nd time: scan the whole word (not just the
222 * parts skipped in the first pass) -- if an
223 * event in the previously scanned bits is
224 * pending again it would just be scanned on
225 * the next loop anyway.
226 */
227 if (word_idx == start_word_idx) {
228 if (i == 0)
229 bit_idx = start_bit_idx;
230 }
231
232 do {
233 xen_ulong_t bits;
234 int port;
235
236 bits = MASK_LSBS(pending_bits, bit_idx);
237
238 /* If we masked out all events, move on. */
239 if (bits == 0)
240 break;
241
242 bit_idx = EVTCHN_FIRST_BIT(bits);
243
244 /* Process port. */
245 port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
246 irq = get_evtchn_to_irq(port);
247
248 if (irq != -1) {
249 desc = irq_to_desc(irq);
250 if (desc)
251 generic_handle_irq_desc(irq, desc);
252 }
253
254 bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
255
256 /* Next caller starts at last processed + 1 */
257 __this_cpu_write(current_word_idx,
258 bit_idx ? word_idx :
259 (word_idx+1) % BITS_PER_EVTCHN_WORD);
260 __this_cpu_write(current_bit_idx, bit_idx);
261 } while (bit_idx != 0);
262
263 /* Scan start_l1i twice; all others once. */
264 if ((word_idx != start_word_idx) || (i != 0))
265 pending_words &= ~(1UL << word_idx);
266
267 word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
268 }
269}
270
271irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
272{
273 struct shared_info *sh = HYPERVISOR_shared_info;
274 int cpu = smp_processor_id();
275 xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
276 int i;
277 unsigned long flags;
278 static DEFINE_SPINLOCK(debug_lock);
279 struct vcpu_info *v;
280
281 spin_lock_irqsave(&debug_lock, flags);
282
283 printk("\nvcpu %d\n ", cpu);
284
285 for_each_online_cpu(i) {
286 int pending;
287 v = per_cpu(xen_vcpu, i);
288 pending = (get_irq_regs() && i == cpu)
289 ? xen_irqs_disabled(get_irq_regs())
290 : v->evtchn_upcall_mask;
291 printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
292 pending, v->evtchn_upcall_pending,
293 (int)(sizeof(v->evtchn_pending_sel)*2),
294 v->evtchn_pending_sel);
295 }
296 v = per_cpu(xen_vcpu, cpu);
297
298 printk("\npending:\n ");
299 for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
300 printk("%0*"PRI_xen_ulong"%s",
301 (int)sizeof(sh->evtchn_pending[0])*2,
302 sh->evtchn_pending[i],
303 i % 8 == 0 ? "\n " : " ");
304 printk("\nglobal mask:\n ");
305 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
306 printk("%0*"PRI_xen_ulong"%s",
307 (int)(sizeof(sh->evtchn_mask[0])*2),
308 sh->evtchn_mask[i],
309 i % 8 == 0 ? "\n " : " ");
310
311 printk("\nglobally unmasked:\n ");
312 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
313 printk("%0*"PRI_xen_ulong"%s",
314 (int)(sizeof(sh->evtchn_mask[0])*2),
315 sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
316 i % 8 == 0 ? "\n " : " ");
317
318 printk("\nlocal cpu%d mask:\n ", cpu);
319 for (i = (EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
320 printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
321 cpu_evtchn[i],
322 i % 8 == 0 ? "\n " : " ");
323
324 printk("\nlocally unmasked:\n ");
325 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
326 xen_ulong_t pending = sh->evtchn_pending[i]
327 & ~sh->evtchn_mask[i]
328 & cpu_evtchn[i];
329 printk("%0*"PRI_xen_ulong"%s",
330 (int)(sizeof(sh->evtchn_mask[0])*2),
331 pending, i % 8 == 0 ? "\n " : " ");
332 }
333
334 printk("\npending list:\n");
335 for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
336 if (sync_test_bit(i, BM(sh->evtchn_pending))) {
337 int word_idx = i / BITS_PER_EVTCHN_WORD;
338 printk(" %d: event %d -> irq %d%s%s%s\n",
339 cpu_from_evtchn(i), i,
340 get_evtchn_to_irq(i),
341 sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
342 ? "" : " l2-clear",
343 !sync_test_bit(i, BM(sh->evtchn_mask))
344 ? "" : " globally-masked",
345 sync_test_bit(i, BM(cpu_evtchn))
346 ? "" : " locally-masked");
347 }
348 }
349
350 spin_unlock_irqrestore(&debug_lock, flags);
351
352 return IRQ_HANDLED;
353}
354
355static const struct evtchn_ops evtchn_ops_2l = {
356 .max_channels = evtchn_2l_max_channels,
357 .nr_channels = evtchn_2l_max_channels,
358 .bind_to_cpu = evtchn_2l_bind_to_cpu,
359 .clear_pending = evtchn_2l_clear_pending,
360 .set_pending = evtchn_2l_set_pending,
361 .is_pending = evtchn_2l_is_pending,
362 .test_and_set_mask = evtchn_2l_test_and_set_mask,
363 .mask = evtchn_2l_mask,
364 .unmask = evtchn_2l_unmask,
365 .handle_events = evtchn_2l_handle_events,
366};
367
368void __init xen_evtchn_2l_init(void)
369{
370 pr_info("Using 2-level ABI\n");
371 evtchn_ops = &evtchn_ops_2l;
372}
diff --git a/drivers/xen/events.c b/drivers/xen/events/events_base.c
index 4035e833ea26..4672e003c0ad 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events/events_base.c
@@ -59,6 +59,10 @@
59#include <xen/interface/vcpu.h> 59#include <xen/interface/vcpu.h>
60#include <asm/hw_irq.h> 60#include <asm/hw_irq.h>
61 61
62#include "events_internal.h"
63
64const struct evtchn_ops *evtchn_ops;
65
62/* 66/*
63 * This lock protects updates to the following mapping and reference-count 67 * This lock protects updates to the following mapping and reference-count
64 * arrays. The lock does not need to be acquired to read the mapping tables. 68 * arrays. The lock does not need to be acquired to read the mapping tables.
@@ -73,71 +77,15 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
73/* IRQ <-> IPI mapping */ 77/* IRQ <-> IPI mapping */
74static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; 78static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
75 79
76/* Interrupt types. */ 80int **evtchn_to_irq;
77enum xen_irq_type {
78 IRQT_UNBOUND = 0,
79 IRQT_PIRQ,
80 IRQT_VIRQ,
81 IRQT_IPI,
82 IRQT_EVTCHN
83};
84
85/*
86 * Packed IRQ information:
87 * type - enum xen_irq_type
88 * event channel - irq->event channel mapping
89 * cpu - cpu this event channel is bound to
90 * index - type-specific information:
91 * PIRQ - physical IRQ, GSI, flags, and owner domain
92 * VIRQ - virq number
93 * IPI - IPI vector
94 * EVTCHN -
95 */
96struct irq_info {
97 struct list_head list;
98 int refcnt;
99 enum xen_irq_type type; /* type */
100 unsigned irq;
101 unsigned short evtchn; /* event channel */
102 unsigned short cpu; /* cpu bound */
103
104 union {
105 unsigned short virq;
106 enum ipi_vector ipi;
107 struct {
108 unsigned short pirq;
109 unsigned short gsi;
110 unsigned char flags;
111 uint16_t domid;
112 } pirq;
113 } u;
114};
115#define PIRQ_NEEDS_EOI (1 << 0)
116#define PIRQ_SHAREABLE (1 << 1)
117
118static int *evtchn_to_irq;
119#ifdef CONFIG_X86 81#ifdef CONFIG_X86
120static unsigned long *pirq_eoi_map; 82static unsigned long *pirq_eoi_map;
121#endif 83#endif
122static bool (*pirq_needs_eoi)(unsigned irq); 84static bool (*pirq_needs_eoi)(unsigned irq);
123 85
124/* 86#define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
125 * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be 87#define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
126 * careful to only use bitops which allow for this (e.g 88#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
127 * test_bit/find_first_bit and friends but not __ffs) and to pass
128 * BITS_PER_EVTCHN_WORD as the bitmask length.
129 */
130#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
131/*
132 * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
133 * array. Primarily to avoid long lines (hence the terse name).
134 */
135#define BM(x) (unsigned long *)(x)
136/* Find the first set bit in a evtchn mask */
137#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
138
139static DEFINE_PER_CPU(xen_ulong_t [NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD],
140 cpu_evtchn_mask);
141 89
142/* Xen will never allocate port zero for any purpose. */ 90/* Xen will never allocate port zero for any purpose. */
143#define VALID_EVTCHN(chn) ((chn) != 0) 91#define VALID_EVTCHN(chn) ((chn) != 0)
@@ -148,19 +96,75 @@ static struct irq_chip xen_pirq_chip;
148static void enable_dynirq(struct irq_data *data); 96static void enable_dynirq(struct irq_data *data);
149static void disable_dynirq(struct irq_data *data); 97static void disable_dynirq(struct irq_data *data);
150 98
99static void clear_evtchn_to_irq_row(unsigned row)
100{
101 unsigned col;
102
103 for (col = 0; col < EVTCHN_PER_ROW; col++)
104 evtchn_to_irq[row][col] = -1;
105}
106
107static void clear_evtchn_to_irq_all(void)
108{
109 unsigned row;
110
111 for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
112 if (evtchn_to_irq[row] == NULL)
113 continue;
114 clear_evtchn_to_irq_row(row);
115 }
116}
117
118static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
119{
120 unsigned row;
121 unsigned col;
122
123 if (evtchn >= xen_evtchn_max_channels())
124 return -EINVAL;
125
126 row = EVTCHN_ROW(evtchn);
127 col = EVTCHN_COL(evtchn);
128
129 if (evtchn_to_irq[row] == NULL) {
130 /* Unallocated irq entries return -1 anyway */
131 if (irq == -1)
132 return 0;
133
134 evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
135 if (evtchn_to_irq[row] == NULL)
136 return -ENOMEM;
137
138 clear_evtchn_to_irq_row(row);
139 }
140
141 evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq;
142 return 0;
143}
144
145int get_evtchn_to_irq(unsigned evtchn)
146{
147 if (evtchn >= xen_evtchn_max_channels())
148 return -1;
149 if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
150 return -1;
151 return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
152}
153
151/* Get info for IRQ */ 154/* Get info for IRQ */
152static struct irq_info *info_for_irq(unsigned irq) 155struct irq_info *info_for_irq(unsigned irq)
153{ 156{
154 return irq_get_handler_data(irq); 157 return irq_get_handler_data(irq);
155} 158}
156 159
157/* Constructors for packed IRQ information. */ 160/* Constructors for packed IRQ information. */
158static void xen_irq_info_common_init(struct irq_info *info, 161static int xen_irq_info_common_setup(struct irq_info *info,
159 unsigned irq, 162 unsigned irq,
160 enum xen_irq_type type, 163 enum xen_irq_type type,
161 unsigned short evtchn, 164 unsigned evtchn,
162 unsigned short cpu) 165 unsigned short cpu)
163{ 166{
167 int ret;
164 168
165 BUG_ON(info->type != IRQT_UNBOUND && info->type != type); 169 BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
166 170
@@ -169,68 +173,78 @@ static void xen_irq_info_common_init(struct irq_info *info,
169 info->evtchn = evtchn; 173 info->evtchn = evtchn;
170 info->cpu = cpu; 174 info->cpu = cpu;
171 175
172 evtchn_to_irq[evtchn] = irq; 176 ret = set_evtchn_to_irq(evtchn, irq);
177 if (ret < 0)
178 return ret;
173 179
174 irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); 180 irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
181
182 return xen_evtchn_port_setup(info);
175} 183}
176 184
177static void xen_irq_info_evtchn_init(unsigned irq, 185static int xen_irq_info_evtchn_setup(unsigned irq,
178 unsigned short evtchn) 186 unsigned evtchn)
179{ 187{
180 struct irq_info *info = info_for_irq(irq); 188 struct irq_info *info = info_for_irq(irq);
181 189
182 xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0); 190 return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
183} 191}
184 192
185static void xen_irq_info_ipi_init(unsigned cpu, 193static int xen_irq_info_ipi_setup(unsigned cpu,
186 unsigned irq, 194 unsigned irq,
187 unsigned short evtchn, 195 unsigned evtchn,
188 enum ipi_vector ipi) 196 enum ipi_vector ipi)
189{ 197{
190 struct irq_info *info = info_for_irq(irq); 198 struct irq_info *info = info_for_irq(irq);
191 199
192 xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
193
194 info->u.ipi = ipi; 200 info->u.ipi = ipi;
195 201
196 per_cpu(ipi_to_irq, cpu)[ipi] = irq; 202 per_cpu(ipi_to_irq, cpu)[ipi] = irq;
203
204 return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
197} 205}
198 206
199static void xen_irq_info_virq_init(unsigned cpu, 207static int xen_irq_info_virq_setup(unsigned cpu,
200 unsigned irq, 208 unsigned irq,
201 unsigned short evtchn, 209 unsigned evtchn,
202 unsigned short virq) 210 unsigned virq)
203{ 211{
204 struct irq_info *info = info_for_irq(irq); 212 struct irq_info *info = info_for_irq(irq);
205 213
206 xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
207
208 info->u.virq = virq; 214 info->u.virq = virq;
209 215
210 per_cpu(virq_to_irq, cpu)[virq] = irq; 216 per_cpu(virq_to_irq, cpu)[virq] = irq;
217
218 return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
211} 219}
212 220
213static void xen_irq_info_pirq_init(unsigned irq, 221static int xen_irq_info_pirq_setup(unsigned irq,
214 unsigned short evtchn, 222 unsigned evtchn,
215 unsigned short pirq, 223 unsigned pirq,
216 unsigned short gsi, 224 unsigned gsi,
217 uint16_t domid, 225 uint16_t domid,
218 unsigned char flags) 226 unsigned char flags)
219{ 227{
220 struct irq_info *info = info_for_irq(irq); 228 struct irq_info *info = info_for_irq(irq);
221 229
222 xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
223
224 info->u.pirq.pirq = pirq; 230 info->u.pirq.pirq = pirq;
225 info->u.pirq.gsi = gsi; 231 info->u.pirq.gsi = gsi;
226 info->u.pirq.domid = domid; 232 info->u.pirq.domid = domid;
227 info->u.pirq.flags = flags; 233 info->u.pirq.flags = flags;
234
235 return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
236}
237
238static void xen_irq_info_cleanup(struct irq_info *info)
239{
240 set_evtchn_to_irq(info->evtchn, -1);
241 info->evtchn = 0;
228} 242}
229 243
230/* 244/*
231 * Accessors for packed IRQ information. 245 * Accessors for packed IRQ information.
232 */ 246 */
233static unsigned int evtchn_from_irq(unsigned irq) 247unsigned int evtchn_from_irq(unsigned irq)
234{ 248{
235 if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq))) 249 if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
236 return 0; 250 return 0;
@@ -240,10 +254,15 @@ static unsigned int evtchn_from_irq(unsigned irq)
240 254
241unsigned irq_from_evtchn(unsigned int evtchn) 255unsigned irq_from_evtchn(unsigned int evtchn)
242{ 256{
243 return evtchn_to_irq[evtchn]; 257 return get_evtchn_to_irq(evtchn);
244} 258}
245EXPORT_SYMBOL_GPL(irq_from_evtchn); 259EXPORT_SYMBOL_GPL(irq_from_evtchn);
246 260
261int irq_from_virq(unsigned int cpu, unsigned int virq)
262{
263 return per_cpu(virq_to_irq, cpu)[virq];
264}
265
247static enum ipi_vector ipi_from_irq(unsigned irq) 266static enum ipi_vector ipi_from_irq(unsigned irq)
248{ 267{
249 struct irq_info *info = info_for_irq(irq); 268 struct irq_info *info = info_for_irq(irq);
@@ -279,14 +298,14 @@ static enum xen_irq_type type_from_irq(unsigned irq)
279 return info_for_irq(irq)->type; 298 return info_for_irq(irq)->type;
280} 299}
281 300
282static unsigned cpu_from_irq(unsigned irq) 301unsigned cpu_from_irq(unsigned irq)
283{ 302{
284 return info_for_irq(irq)->cpu; 303 return info_for_irq(irq)->cpu;
285} 304}
286 305
287static unsigned int cpu_from_evtchn(unsigned int evtchn) 306unsigned int cpu_from_evtchn(unsigned int evtchn)
288{ 307{
289 int irq = evtchn_to_irq[evtchn]; 308 int irq = get_evtchn_to_irq(evtchn);
290 unsigned ret = 0; 309 unsigned ret = 0;
291 310
292 if (irq != -1) 311 if (irq != -1)
@@ -310,67 +329,29 @@ static bool pirq_needs_eoi_flag(unsigned irq)
310 return info->u.pirq.flags & PIRQ_NEEDS_EOI; 329 return info->u.pirq.flags & PIRQ_NEEDS_EOI;
311} 330}
312 331
313static inline xen_ulong_t active_evtchns(unsigned int cpu,
314 struct shared_info *sh,
315 unsigned int idx)
316{
317 return sh->evtchn_pending[idx] &
318 per_cpu(cpu_evtchn_mask, cpu)[idx] &
319 ~sh->evtchn_mask[idx];
320}
321
322static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) 332static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
323{ 333{
324 int irq = evtchn_to_irq[chn]; 334 int irq = get_evtchn_to_irq(chn);
335 struct irq_info *info = info_for_irq(irq);
325 336
326 BUG_ON(irq == -1); 337 BUG_ON(irq == -1);
327#ifdef CONFIG_SMP 338#ifdef CONFIG_SMP
328 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu)); 339 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
329#endif 340#endif
330 341
331 clear_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu_from_irq(irq)))); 342 xen_evtchn_port_bind_to_cpu(info, cpu);
332 set_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu)));
333
334 info_for_irq(irq)->cpu = cpu;
335}
336
337static void init_evtchn_cpu_bindings(void)
338{
339 int i;
340#ifdef CONFIG_SMP
341 struct irq_info *info;
342
343 /* By default all event channels notify CPU#0. */
344 list_for_each_entry(info, &xen_irq_list_head, list) {
345 struct irq_desc *desc = irq_to_desc(info->irq);
346 cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
347 }
348#endif
349
350 for_each_possible_cpu(i)
351 memset(per_cpu(cpu_evtchn_mask, i),
352 (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8);
353}
354 343
355static inline void clear_evtchn(int port) 344 info->cpu = cpu;
356{
357 struct shared_info *s = HYPERVISOR_shared_info;
358 sync_clear_bit(port, BM(&s->evtchn_pending[0]));
359} 345}
360 346
361static inline void set_evtchn(int port) 347static void xen_evtchn_mask_all(void)
362{ 348{
363 struct shared_info *s = HYPERVISOR_shared_info; 349 unsigned int evtchn;
364 sync_set_bit(port, BM(&s->evtchn_pending[0]));
365}
366 350
367static inline int test_evtchn(int port) 351 for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
368{ 352 mask_evtchn(evtchn);
369 struct shared_info *s = HYPERVISOR_shared_info;
370 return sync_test_bit(port, BM(&s->evtchn_pending[0]));
371} 353}
372 354
373
374/** 355/**
375 * notify_remote_via_irq - send event to remote end of event channel via irq 356 * notify_remote_via_irq - send event to remote end of event channel via irq
376 * @irq: irq of event channel to send event to 357 * @irq: irq of event channel to send event to
@@ -388,63 +369,6 @@ void notify_remote_via_irq(int irq)
388} 369}
389EXPORT_SYMBOL_GPL(notify_remote_via_irq); 370EXPORT_SYMBOL_GPL(notify_remote_via_irq);
390 371
391static void mask_evtchn(int port)
392{
393 struct shared_info *s = HYPERVISOR_shared_info;
394 sync_set_bit(port, BM(&s->evtchn_mask[0]));
395}
396
397static void unmask_evtchn(int port)
398{
399 struct shared_info *s = HYPERVISOR_shared_info;
400 unsigned int cpu = get_cpu();
401 int do_hypercall = 0, evtchn_pending = 0;
402
403 BUG_ON(!irqs_disabled());
404
405 if (unlikely((cpu != cpu_from_evtchn(port))))
406 do_hypercall = 1;
407 else {
408 /*
409 * Need to clear the mask before checking pending to
410 * avoid a race with an event becoming pending.
411 *
412 * EVTCHNOP_unmask will only trigger an upcall if the
413 * mask bit was set, so if a hypercall is needed
414 * remask the event.
415 */
416 sync_clear_bit(port, BM(&s->evtchn_mask[0]));
417 evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
418
419 if (unlikely(evtchn_pending && xen_hvm_domain())) {
420 sync_set_bit(port, BM(&s->evtchn_mask[0]));
421 do_hypercall = 1;
422 }
423 }
424
425 /* Slow path (hypercall) if this is a non-local port or if this is
426 * an hvm domain and an event is pending (hvm domains don't have
427 * their own implementation of irq_enable). */
428 if (do_hypercall) {
429 struct evtchn_unmask unmask = { .port = port };
430 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
431 } else {
432 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
433
434 /*
435 * The following is basically the equivalent of
436 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
437 * the interrupt edge' if the channel is masked.
438 */
439 if (evtchn_pending &&
440 !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
441 BM(&vcpu_info->evtchn_pending_sel)))
442 vcpu_info->evtchn_upcall_pending = 1;
443 }
444
445 put_cpu();
446}
447
448static void xen_irq_init(unsigned irq) 372static void xen_irq_init(unsigned irq)
449{ 373{
450 struct irq_info *info; 374 struct irq_info *info;
@@ -538,6 +462,18 @@ static void xen_free_irq(unsigned irq)
538 irq_free_desc(irq); 462 irq_free_desc(irq);
539} 463}
540 464
465static void xen_evtchn_close(unsigned int port)
466{
467 struct evtchn_close close;
468
469 close.port = port;
470 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
471 BUG();
472
473 /* Closed ports are implicitly re-bound to VCPU0. */
474 bind_evtchn_to_cpu(port, 0);
475}
476
541static void pirq_query_unmask(int irq) 477static void pirq_query_unmask(int irq)
542{ 478{
543 struct physdev_irq_status_query irq_status; 479 struct physdev_irq_status_query irq_status;
@@ -610,7 +546,13 @@ static unsigned int __startup_pirq(unsigned int irq)
610 546
611 pirq_query_unmask(irq); 547 pirq_query_unmask(irq);
612 548
613 evtchn_to_irq[evtchn] = irq; 549 rc = set_evtchn_to_irq(evtchn, irq);
550 if (rc != 0) {
551 pr_err("irq%d: Failed to set port to irq mapping (%d)\n",
552 irq, rc);
553 xen_evtchn_close(evtchn);
554 return 0;
555 }
614 bind_evtchn_to_cpu(evtchn, 0); 556 bind_evtchn_to_cpu(evtchn, 0);
615 info->evtchn = evtchn; 557 info->evtchn = evtchn;
616 558
@@ -628,10 +570,9 @@ static unsigned int startup_pirq(struct irq_data *data)
628 570
629static void shutdown_pirq(struct irq_data *data) 571static void shutdown_pirq(struct irq_data *data)
630{ 572{
631 struct evtchn_close close;
632 unsigned int irq = data->irq; 573 unsigned int irq = data->irq;
633 struct irq_info *info = info_for_irq(irq); 574 struct irq_info *info = info_for_irq(irq);
634 int evtchn = evtchn_from_irq(irq); 575 unsigned evtchn = evtchn_from_irq(irq);
635 576
636 BUG_ON(info->type != IRQT_PIRQ); 577 BUG_ON(info->type != IRQT_PIRQ);
637 578
@@ -639,14 +580,8 @@ static void shutdown_pirq(struct irq_data *data)
639 return; 580 return;
640 581
641 mask_evtchn(evtchn); 582 mask_evtchn(evtchn);
642 583 xen_evtchn_close(evtchn);
643 close.port = evtchn; 584 xen_irq_info_cleanup(info);
644 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
645 BUG();
646
647 bind_evtchn_to_cpu(evtchn, 0);
648 evtchn_to_irq[evtchn] = -1;
649 info->evtchn = 0;
650} 585}
651 586
652static void enable_pirq(struct irq_data *data) 587static void enable_pirq(struct irq_data *data)
@@ -675,6 +610,41 @@ int xen_irq_from_gsi(unsigned gsi)
675} 610}
676EXPORT_SYMBOL_GPL(xen_irq_from_gsi); 611EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
677 612
613static void __unbind_from_irq(unsigned int irq)
614{
615 int evtchn = evtchn_from_irq(irq);
616 struct irq_info *info = irq_get_handler_data(irq);
617
618 if (info->refcnt > 0) {
619 info->refcnt--;
620 if (info->refcnt != 0)
621 return;
622 }
623
624 if (VALID_EVTCHN(evtchn)) {
625 unsigned int cpu = cpu_from_irq(irq);
626
627 xen_evtchn_close(evtchn);
628
629 switch (type_from_irq(irq)) {
630 case IRQT_VIRQ:
631 per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
632 break;
633 case IRQT_IPI:
634 per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
635 break;
636 default:
637 break;
638 }
639
640 xen_irq_info_cleanup(info);
641 }
642
643 BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
644
645 xen_free_irq(irq);
646}
647
678/* 648/*
679 * Do not make any assumptions regarding the relationship between the 649 * Do not make any assumptions regarding the relationship between the
680 * IRQ number returned here and the Xen pirq argument. 650 * IRQ number returned here and the Xen pirq argument.
@@ -690,6 +660,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
690{ 660{
691 int irq = -1; 661 int irq = -1;
692 struct physdev_irq irq_op; 662 struct physdev_irq irq_op;
663 int ret;
693 664
694 mutex_lock(&irq_mapping_update_lock); 665 mutex_lock(&irq_mapping_update_lock);
695 666
@@ -717,8 +688,13 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
717 goto out; 688 goto out;
718 } 689 }
719 690
720 xen_irq_info_pirq_init(irq, 0, pirq, gsi, DOMID_SELF, 691 ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
721 shareable ? PIRQ_SHAREABLE : 0); 692 shareable ? PIRQ_SHAREABLE : 0);
693 if (ret < 0) {
694 __unbind_from_irq(irq);
695 irq = ret;
696 goto out;
697 }
722 698
723 pirq_query_unmask(irq); 699 pirq_query_unmask(irq);
724 /* We try to use the handler with the appropriate semantic for the 700 /* We try to use the handler with the appropriate semantic for the
@@ -778,7 +754,9 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
778 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, 754 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
779 name); 755 name);
780 756
781 xen_irq_info_pirq_init(irq, 0, pirq, 0, domid, 0); 757 ret = xen_irq_info_pirq_setup(irq, 0, pirq, 0, domid, 0);
758 if (ret < 0)
759 goto error_irq;
782 ret = irq_set_msi_desc(irq, msidesc); 760 ret = irq_set_msi_desc(irq, msidesc);
783 if (ret < 0) 761 if (ret < 0)
784 goto error_irq; 762 goto error_irq;
@@ -786,8 +764,8 @@ out:
786 mutex_unlock(&irq_mapping_update_lock); 764 mutex_unlock(&irq_mapping_update_lock);
787 return irq; 765 return irq;
788error_irq: 766error_irq:
767 __unbind_from_irq(irq);
789 mutex_unlock(&irq_mapping_update_lock); 768 mutex_unlock(&irq_mapping_update_lock);
790 xen_free_irq(irq);
791 return ret; 769 return ret;
792} 770}
793#endif 771#endif
@@ -857,13 +835,18 @@ int xen_pirq_from_irq(unsigned irq)
857 return pirq_from_irq(irq); 835 return pirq_from_irq(irq);
858} 836}
859EXPORT_SYMBOL_GPL(xen_pirq_from_irq); 837EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
838
860int bind_evtchn_to_irq(unsigned int evtchn) 839int bind_evtchn_to_irq(unsigned int evtchn)
861{ 840{
862 int irq; 841 int irq;
842 int ret;
843
844 if (evtchn >= xen_evtchn_max_channels())
845 return -ENOMEM;
863 846
864 mutex_lock(&irq_mapping_update_lock); 847 mutex_lock(&irq_mapping_update_lock);
865 848
866 irq = evtchn_to_irq[evtchn]; 849 irq = get_evtchn_to_irq(evtchn);
867 850
868 if (irq == -1) { 851 if (irq == -1) {
869 irq = xen_allocate_irq_dynamic(); 852 irq = xen_allocate_irq_dynamic();
@@ -873,7 +856,12 @@ int bind_evtchn_to_irq(unsigned int evtchn)
873 irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, 856 irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
874 handle_edge_irq, "event"); 857 handle_edge_irq, "event");
875 858
876 xen_irq_info_evtchn_init(irq, evtchn); 859 ret = xen_irq_info_evtchn_setup(irq, evtchn);
860 if (ret < 0) {
861 __unbind_from_irq(irq);
862 irq = ret;
863 goto out;
864 }
877 } else { 865 } else {
878 struct irq_info *info = info_for_irq(irq); 866 struct irq_info *info = info_for_irq(irq);
879 WARN_ON(info == NULL || info->type != IRQT_EVTCHN); 867 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@ -890,6 +878,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
890{ 878{
891 struct evtchn_bind_ipi bind_ipi; 879 struct evtchn_bind_ipi bind_ipi;
892 int evtchn, irq; 880 int evtchn, irq;
881 int ret;
893 882
894 mutex_lock(&irq_mapping_update_lock); 883 mutex_lock(&irq_mapping_update_lock);
895 884
@@ -909,8 +898,12 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
909 BUG(); 898 BUG();
910 evtchn = bind_ipi.port; 899 evtchn = bind_ipi.port;
911 900
912 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); 901 ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
913 902 if (ret < 0) {
903 __unbind_from_irq(irq);
904 irq = ret;
905 goto out;
906 }
914 bind_evtchn_to_cpu(evtchn, cpu); 907 bind_evtchn_to_cpu(evtchn, cpu);
915 } else { 908 } else {
916 struct irq_info *info = info_for_irq(irq); 909 struct irq_info *info = info_for_irq(irq);
@@ -943,7 +936,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
943 int port, rc = -ENOENT; 936 int port, rc = -ENOENT;
944 937
945 memset(&status, 0, sizeof(status)); 938 memset(&status, 0, sizeof(status));
946 for (port = 0; port <= NR_EVENT_CHANNELS; port++) { 939 for (port = 0; port < xen_evtchn_max_channels(); port++) {
947 status.dom = DOMID_SELF; 940 status.dom = DOMID_SELF;
948 status.port = port; 941 status.port = port;
949 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); 942 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
@@ -959,6 +952,19 @@ static int find_virq(unsigned int virq, unsigned int cpu)
959 return rc; 952 return rc;
960} 953}
961 954
955/**
956 * xen_evtchn_nr_channels - number of usable event channel ports
957 *
958 * This may be less than the maximum supported by the current
959 * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
960 * supported.
961 */
962unsigned xen_evtchn_nr_channels(void)
963{
964 return evtchn_ops->nr_channels();
965}
966EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
967
962int bind_virq_to_irq(unsigned int virq, unsigned int cpu) 968int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
963{ 969{
964 struct evtchn_bind_virq bind_virq; 970 struct evtchn_bind_virq bind_virq;
@@ -989,7 +995,12 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
989 evtchn = ret; 995 evtchn = ret;
990 } 996 }
991 997
992 xen_irq_info_virq_init(cpu, irq, evtchn, virq); 998 ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
999 if (ret < 0) {
1000 __unbind_from_irq(irq);
1001 irq = ret;
1002 goto out;
1003 }
993 1004
994 bind_evtchn_to_cpu(evtchn, cpu); 1005 bind_evtchn_to_cpu(evtchn, cpu);
995 } else { 1006 } else {
@@ -1005,50 +1016,8 @@ out:
1005 1016
1006static void unbind_from_irq(unsigned int irq) 1017static void unbind_from_irq(unsigned int irq)
1007{ 1018{
1008 struct evtchn_close close;
1009 int evtchn = evtchn_from_irq(irq);
1010 struct irq_info *info = irq_get_handler_data(irq);
1011
1012 if (WARN_ON(!info))
1013 return;
1014
1015 mutex_lock(&irq_mapping_update_lock); 1019 mutex_lock(&irq_mapping_update_lock);
1016 1020 __unbind_from_irq(irq);
1017 if (info->refcnt > 0) {
1018 info->refcnt--;
1019 if (info->refcnt != 0)
1020 goto done;
1021 }
1022
1023 if (VALID_EVTCHN(evtchn)) {
1024 close.port = evtchn;
1025 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
1026 BUG();
1027
1028 switch (type_from_irq(irq)) {
1029 case IRQT_VIRQ:
1030 per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
1031 [virq_from_irq(irq)] = -1;
1032 break;
1033 case IRQT_IPI:
1034 per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
1035 [ipi_from_irq(irq)] = -1;
1036 break;
1037 default:
1038 break;
1039 }
1040
1041 /* Closed ports are implicitly re-bound to VCPU0. */
1042 bind_evtchn_to_cpu(evtchn, 0);
1043
1044 evtchn_to_irq[evtchn] = -1;
1045 }
1046
1047 BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
1048
1049 xen_free_irq(irq);
1050
1051 done:
1052 mutex_unlock(&irq_mapping_update_lock); 1021 mutex_unlock(&irq_mapping_update_lock);
1053} 1022}
1054 1023
@@ -1148,9 +1117,26 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1148} 1117}
1149EXPORT_SYMBOL_GPL(unbind_from_irqhandler); 1118EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1150 1119
1120/**
1121 * xen_set_irq_priority() - set an event channel priority.
1122 * @irq:irq bound to an event channel.
1123 * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1124 */
1125int xen_set_irq_priority(unsigned irq, unsigned priority)
1126{
1127 struct evtchn_set_priority set_priority;
1128
1129 set_priority.port = evtchn_from_irq(irq);
1130 set_priority.priority = priority;
1131
1132 return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1133 &set_priority);
1134}
1135EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1136
1151int evtchn_make_refcounted(unsigned int evtchn) 1137int evtchn_make_refcounted(unsigned int evtchn)
1152{ 1138{
1153 int irq = evtchn_to_irq[evtchn]; 1139 int irq = get_evtchn_to_irq(evtchn);
1154 struct irq_info *info; 1140 struct irq_info *info;
1155 1141
1156 if (irq == -1) 1142 if (irq == -1)
@@ -1175,12 +1161,12 @@ int evtchn_get(unsigned int evtchn)
1175 struct irq_info *info; 1161 struct irq_info *info;
1176 int err = -ENOENT; 1162 int err = -ENOENT;
1177 1163
1178 if (evtchn >= NR_EVENT_CHANNELS) 1164 if (evtchn >= xen_evtchn_max_channels())
1179 return -EINVAL; 1165 return -EINVAL;
1180 1166
1181 mutex_lock(&irq_mapping_update_lock); 1167 mutex_lock(&irq_mapping_update_lock);
1182 1168
1183 irq = evtchn_to_irq[evtchn]; 1169 irq = get_evtchn_to_irq(evtchn);
1184 if (irq == -1) 1170 if (irq == -1)
1185 goto done; 1171 goto done;
1186 1172
@@ -1204,7 +1190,7 @@ EXPORT_SYMBOL_GPL(evtchn_get);
1204 1190
1205void evtchn_put(unsigned int evtchn) 1191void evtchn_put(unsigned int evtchn)
1206{ 1192{
1207 int irq = evtchn_to_irq[evtchn]; 1193 int irq = get_evtchn_to_irq(evtchn);
1208 if (WARN_ON(irq == -1)) 1194 if (WARN_ON(irq == -1))
1209 return; 1195 return;
1210 unbind_from_irq(irq); 1196 unbind_from_irq(irq);
@@ -1228,222 +1214,21 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1228 notify_remote_via_irq(irq); 1214 notify_remote_via_irq(irq);
1229} 1215}
1230 1216
1231irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
1232{
1233 struct shared_info *sh = HYPERVISOR_shared_info;
1234 int cpu = smp_processor_id();
1235 xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
1236 int i;
1237 unsigned long flags;
1238 static DEFINE_SPINLOCK(debug_lock);
1239 struct vcpu_info *v;
1240
1241 spin_lock_irqsave(&debug_lock, flags);
1242
1243 printk("\nvcpu %d\n ", cpu);
1244
1245 for_each_online_cpu(i) {
1246 int pending;
1247 v = per_cpu(xen_vcpu, i);
1248 pending = (get_irq_regs() && i == cpu)
1249 ? xen_irqs_disabled(get_irq_regs())
1250 : v->evtchn_upcall_mask;
1251 printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
1252 pending, v->evtchn_upcall_pending,
1253 (int)(sizeof(v->evtchn_pending_sel)*2),
1254 v->evtchn_pending_sel);
1255 }
1256 v = per_cpu(xen_vcpu, cpu);
1257
1258 printk("\npending:\n ");
1259 for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
1260 printk("%0*"PRI_xen_ulong"%s",
1261 (int)sizeof(sh->evtchn_pending[0])*2,
1262 sh->evtchn_pending[i],
1263 i % 8 == 0 ? "\n " : " ");
1264 printk("\nglobal mask:\n ");
1265 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
1266 printk("%0*"PRI_xen_ulong"%s",
1267 (int)(sizeof(sh->evtchn_mask[0])*2),
1268 sh->evtchn_mask[i],
1269 i % 8 == 0 ? "\n " : " ");
1270
1271 printk("\nglobally unmasked:\n ");
1272 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
1273 printk("%0*"PRI_xen_ulong"%s",
1274 (int)(sizeof(sh->evtchn_mask[0])*2),
1275 sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
1276 i % 8 == 0 ? "\n " : " ");
1277
1278 printk("\nlocal cpu%d mask:\n ", cpu);
1279 for (i = (NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
1280 printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
1281 cpu_evtchn[i],
1282 i % 8 == 0 ? "\n " : " ");
1283
1284 printk("\nlocally unmasked:\n ");
1285 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
1286 xen_ulong_t pending = sh->evtchn_pending[i]
1287 & ~sh->evtchn_mask[i]
1288 & cpu_evtchn[i];
1289 printk("%0*"PRI_xen_ulong"%s",
1290 (int)(sizeof(sh->evtchn_mask[0])*2),
1291 pending, i % 8 == 0 ? "\n " : " ");
1292 }
1293
1294 printk("\npending list:\n");
1295 for (i = 0; i < NR_EVENT_CHANNELS; i++) {
1296 if (sync_test_bit(i, BM(sh->evtchn_pending))) {
1297 int word_idx = i / BITS_PER_EVTCHN_WORD;
1298 printk(" %d: event %d -> irq %d%s%s%s\n",
1299 cpu_from_evtchn(i), i,
1300 evtchn_to_irq[i],
1301 sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
1302 ? "" : " l2-clear",
1303 !sync_test_bit(i, BM(sh->evtchn_mask))
1304 ? "" : " globally-masked",
1305 sync_test_bit(i, BM(cpu_evtchn))
1306 ? "" : " locally-masked");
1307 }
1308 }
1309
1310 spin_unlock_irqrestore(&debug_lock, flags);
1311
1312 return IRQ_HANDLED;
1313}
1314
1315static DEFINE_PER_CPU(unsigned, xed_nesting_count); 1217static DEFINE_PER_CPU(unsigned, xed_nesting_count);
1316static DEFINE_PER_CPU(unsigned int, current_word_idx);
1317static DEFINE_PER_CPU(unsigned int, current_bit_idx);
1318
1319/*
1320 * Mask out the i least significant bits of w
1321 */
1322#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
1323 1218
1324/*
1325 * Search the CPUs pending events bitmasks. For each one found, map
1326 * the event number to an irq, and feed it into do_IRQ() for
1327 * handling.
1328 *
1329 * Xen uses a two-level bitmap to speed searching. The first level is
1330 * a bitset of words which contain pending event bits. The second
1331 * level is a bitset of pending events themselves.
1332 */
1333static void __xen_evtchn_do_upcall(void) 1219static void __xen_evtchn_do_upcall(void)
1334{ 1220{
1335 int start_word_idx, start_bit_idx;
1336 int word_idx, bit_idx;
1337 int i, irq;
1338 int cpu = get_cpu();
1339 struct shared_info *s = HYPERVISOR_shared_info;
1340 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); 1221 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1222 int cpu = get_cpu();
1341 unsigned count; 1223 unsigned count;
1342 1224
1343 do { 1225 do {
1344 xen_ulong_t pending_words;
1345 xen_ulong_t pending_bits;
1346 struct irq_desc *desc;
1347
1348 vcpu_info->evtchn_upcall_pending = 0; 1226 vcpu_info->evtchn_upcall_pending = 0;
1349 1227
1350 if (__this_cpu_inc_return(xed_nesting_count) - 1) 1228 if (__this_cpu_inc_return(xed_nesting_count) - 1)
1351 goto out; 1229 goto out;
1352 1230
1353 /* 1231 xen_evtchn_handle_events(cpu);
1354 * Master flag must be cleared /before/ clearing
1355 * selector flag. xchg_xen_ulong must contain an
1356 * appropriate barrier.
1357 */
1358 if ((irq = per_cpu(virq_to_irq, cpu)[VIRQ_TIMER]) != -1) {
1359 int evtchn = evtchn_from_irq(irq);
1360 word_idx = evtchn / BITS_PER_LONG;
1361 pending_bits = evtchn % BITS_PER_LONG;
1362 if (active_evtchns(cpu, s, word_idx) & (1ULL << pending_bits)) {
1363 desc = irq_to_desc(irq);
1364 if (desc)
1365 generic_handle_irq_desc(irq, desc);
1366 }
1367 }
1368
1369 pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
1370
1371 start_word_idx = __this_cpu_read(current_word_idx);
1372 start_bit_idx = __this_cpu_read(current_bit_idx);
1373
1374 word_idx = start_word_idx;
1375
1376 for (i = 0; pending_words != 0; i++) {
1377 xen_ulong_t words;
1378
1379 words = MASK_LSBS(pending_words, word_idx);
1380
1381 /*
1382 * If we masked out all events, wrap to beginning.
1383 */
1384 if (words == 0) {
1385 word_idx = 0;
1386 bit_idx = 0;
1387 continue;
1388 }
1389 word_idx = EVTCHN_FIRST_BIT(words);
1390
1391 pending_bits = active_evtchns(cpu, s, word_idx);
1392 bit_idx = 0; /* usually scan entire word from start */
1393 /*
1394 * We scan the starting word in two parts.
1395 *
1396 * 1st time: start in the middle, scanning the
1397 * upper bits.
1398 *
1399 * 2nd time: scan the whole word (not just the
1400 * parts skipped in the first pass) -- if an
1401 * event in the previously scanned bits is
1402 * pending again it would just be scanned on
1403 * the next loop anyway.
1404 */
1405 if (word_idx == start_word_idx) {
1406 if (i == 0)
1407 bit_idx = start_bit_idx;
1408 }
1409
1410 do {
1411 xen_ulong_t bits;
1412 int port;
1413
1414 bits = MASK_LSBS(pending_bits, bit_idx);
1415
1416 /* If we masked out all events, move on. */
1417 if (bits == 0)
1418 break;
1419
1420 bit_idx = EVTCHN_FIRST_BIT(bits);
1421
1422 /* Process port. */
1423 port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
1424 irq = evtchn_to_irq[port];
1425
1426 if (irq != -1) {
1427 desc = irq_to_desc(irq);
1428 if (desc)
1429 generic_handle_irq_desc(irq, desc);
1430 }
1431
1432 bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
1433
1434 /* Next caller starts at last processed + 1 */
1435 __this_cpu_write(current_word_idx,
1436 bit_idx ? word_idx :
1437 (word_idx+1) % BITS_PER_EVTCHN_WORD);
1438 __this_cpu_write(current_bit_idx, bit_idx);
1439 } while (bit_idx != 0);
1440
1441 /* Scan start_l1i twice; all others once. */
1442 if ((word_idx != start_word_idx) || (i != 0))
1443 pending_words &= ~(1UL << word_idx);
1444
1445 word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
1446 }
1447 1232
1448 BUG_ON(!irqs_disabled()); 1233 BUG_ON(!irqs_disabled());
1449 1234
@@ -1492,12 +1277,12 @@ void rebind_evtchn_irq(int evtchn, int irq)
1492 mutex_lock(&irq_mapping_update_lock); 1277 mutex_lock(&irq_mapping_update_lock);
1493 1278
1494 /* After resume the irq<->evtchn mappings are all cleared out */ 1279 /* After resume the irq<->evtchn mappings are all cleared out */
1495 BUG_ON(evtchn_to_irq[evtchn] != -1); 1280 BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1496 /* Expect irq to have been bound before, 1281 /* Expect irq to have been bound before,
1497 so there should be a proper type */ 1282 so there should be a proper type */
1498 BUG_ON(info->type == IRQT_UNBOUND); 1283 BUG_ON(info->type == IRQT_UNBOUND);
1499 1284
1500 xen_irq_info_evtchn_init(irq, evtchn); 1285 (void)xen_irq_info_evtchn_setup(irq, evtchn);
1501 1286
1502 mutex_unlock(&irq_mapping_update_lock); 1287 mutex_unlock(&irq_mapping_update_lock);
1503 1288
@@ -1511,7 +1296,6 @@ void rebind_evtchn_irq(int evtchn, int irq)
1511/* Rebind an evtchn so that it gets delivered to a specific cpu */ 1296/* Rebind an evtchn so that it gets delivered to a specific cpu */
1512static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) 1297static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1513{ 1298{
1514 struct shared_info *s = HYPERVISOR_shared_info;
1515 struct evtchn_bind_vcpu bind_vcpu; 1299 struct evtchn_bind_vcpu bind_vcpu;
1516 int evtchn = evtchn_from_irq(irq); 1300 int evtchn = evtchn_from_irq(irq);
1517 int masked; 1301 int masked;
@@ -1534,7 +1318,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1534 * Mask the event while changing the VCPU binding to prevent 1318 * Mask the event while changing the VCPU binding to prevent
1535 * it being delivered on an unexpected VCPU. 1319 * it being delivered on an unexpected VCPU.
1536 */ 1320 */
1537 masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask)); 1321 masked = test_and_set_mask(evtchn);
1538 1322
1539 /* 1323 /*
1540 * If this fails, it usually just indicates that we're dealing with a 1324 * If this fails, it usually just indicates that we're dealing with a
@@ -1558,22 +1342,26 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1558 return rebind_irq_to_cpu(data->irq, tcpu); 1342 return rebind_irq_to_cpu(data->irq, tcpu);
1559} 1343}
1560 1344
1561int resend_irq_on_evtchn(unsigned int irq) 1345static int retrigger_evtchn(int evtchn)
1562{ 1346{
1563 int masked, evtchn = evtchn_from_irq(irq); 1347 int masked;
1564 struct shared_info *s = HYPERVISOR_shared_info;
1565 1348
1566 if (!VALID_EVTCHN(evtchn)) 1349 if (!VALID_EVTCHN(evtchn))
1567 return 1; 1350 return 0;
1568 1351
1569 masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask)); 1352 masked = test_and_set_mask(evtchn);
1570 sync_set_bit(evtchn, BM(s->evtchn_pending)); 1353 set_evtchn(evtchn);
1571 if (!masked) 1354 if (!masked)
1572 unmask_evtchn(evtchn); 1355 unmask_evtchn(evtchn);
1573 1356
1574 return 1; 1357 return 1;
1575} 1358}
1576 1359
1360int resend_irq_on_evtchn(unsigned int irq)
1361{
1362 return retrigger_evtchn(evtchn_from_irq(irq));
1363}
1364
1577static void enable_dynirq(struct irq_data *data) 1365static void enable_dynirq(struct irq_data *data)
1578{ 1366{
1579 int evtchn = evtchn_from_irq(data->irq); 1367 int evtchn = evtchn_from_irq(data->irq);
@@ -1608,21 +1396,7 @@ static void mask_ack_dynirq(struct irq_data *data)
1608 1396
1609static int retrigger_dynirq(struct irq_data *data) 1397static int retrigger_dynirq(struct irq_data *data)
1610{ 1398{
1611 int evtchn = evtchn_from_irq(data->irq); 1399 return retrigger_evtchn(evtchn_from_irq(data->irq));
1612 struct shared_info *sh = HYPERVISOR_shared_info;
1613 int ret = 0;
1614
1615 if (VALID_EVTCHN(evtchn)) {
1616 int masked;
1617
1618 masked = sync_test_and_set_bit(evtchn, BM(sh->evtchn_mask));
1619 sync_set_bit(evtchn, BM(sh->evtchn_pending));
1620 if (!masked)
1621 unmask_evtchn(evtchn);
1622 ret = 1;
1623 }
1624
1625 return ret;
1626} 1400}
1627 1401
1628static void restore_pirqs(void) 1402static void restore_pirqs(void)
@@ -1683,7 +1457,7 @@ static void restore_cpu_virqs(unsigned int cpu)
1683 evtchn = bind_virq.port; 1457 evtchn = bind_virq.port;
1684 1458
1685 /* Record the new mapping. */ 1459 /* Record the new mapping. */
1686 xen_irq_info_virq_init(cpu, irq, evtchn, virq); 1460 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1687 bind_evtchn_to_cpu(evtchn, cpu); 1461 bind_evtchn_to_cpu(evtchn, cpu);
1688 } 1462 }
1689} 1463}
@@ -1707,7 +1481,7 @@ static void restore_cpu_ipis(unsigned int cpu)
1707 evtchn = bind_ipi.port; 1481 evtchn = bind_ipi.port;
1708 1482
1709 /* Record the new mapping. */ 1483 /* Record the new mapping. */
1710 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); 1484 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1711 bind_evtchn_to_cpu(evtchn, cpu); 1485 bind_evtchn_to_cpu(evtchn, cpu);
1712 } 1486 }
1713} 1487}
@@ -1784,21 +1558,18 @@ EXPORT_SYMBOL_GPL(xen_test_irq_shared);
1784 1558
1785void xen_irq_resume(void) 1559void xen_irq_resume(void)
1786{ 1560{
1787 unsigned int cpu, evtchn; 1561 unsigned int cpu;
1788 struct irq_info *info; 1562 struct irq_info *info;
1789 1563
1790 init_evtchn_cpu_bindings();
1791
1792 /* New event-channel space is not 'live' yet. */ 1564 /* New event-channel space is not 'live' yet. */
1793 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) 1565 xen_evtchn_mask_all();
1794 mask_evtchn(evtchn); 1566 xen_evtchn_resume();
1795 1567
1796 /* No IRQ <-> event-channel mappings. */ 1568 /* No IRQ <-> event-channel mappings. */
1797 list_for_each_entry(info, &xen_irq_list_head, list) 1569 list_for_each_entry(info, &xen_irq_list_head, list)
1798 info->evtchn = 0; /* zap event-channel binding */ 1570 info->evtchn = 0; /* zap event-channel binding */
1799 1571
1800 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) 1572 clear_evtchn_to_irq_all();
1801 evtchn_to_irq[evtchn] = -1;
1802 1573
1803 for_each_possible_cpu(cpu) { 1574 for_each_possible_cpu(cpu) {
1804 restore_cpu_virqs(cpu); 1575 restore_cpu_virqs(cpu);
@@ -1889,27 +1660,40 @@ void xen_callback_vector(void)
1889void xen_callback_vector(void) {} 1660void xen_callback_vector(void) {}
1890#endif 1661#endif
1891 1662
1663#undef MODULE_PARAM_PREFIX
1664#define MODULE_PARAM_PREFIX "xen."
1665
1666static bool fifo_events = true;
1667module_param(fifo_events, bool, 0);
1668
1892void __init xen_init_IRQ(void) 1669void __init xen_init_IRQ(void)
1893{ 1670{
1894 int i; 1671 int ret = -EINVAL;
1895 1672
1896 evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), 1673 if (fifo_events)
1897 GFP_KERNEL); 1674 ret = xen_evtchn_fifo_init();
1898 BUG_ON(!evtchn_to_irq); 1675 if (ret < 0)
1899 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1676 xen_evtchn_2l_init();
1900 evtchn_to_irq[i] = -1;
1901 1677
1902 init_evtchn_cpu_bindings(); 1678 evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
1679 sizeof(*evtchn_to_irq), GFP_KERNEL);
1680 BUG_ON(!evtchn_to_irq);
1903 1681
1904 /* No event channels are 'live' right now. */ 1682 /* No event channels are 'live' right now. */
1905 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1683 xen_evtchn_mask_all();
1906 mask_evtchn(i);
1907 1684
1908 pirq_needs_eoi = pirq_needs_eoi_flag; 1685 pirq_needs_eoi = pirq_needs_eoi_flag;
1909 1686
1910#ifdef CONFIG_X86 1687#ifdef CONFIG_X86
1911 if (xen_hvm_domain()) { 1688 if (xen_pv_domain()) {
1689 irq_ctx_init(smp_processor_id());
1690 if (xen_initial_domain())
1691 pci_xen_initial_domain();
1692 }
1693 if (xen_feature(XENFEAT_hvm_callback_vector))
1912 xen_callback_vector(); 1694 xen_callback_vector();
1695
1696 if (xen_hvm_domain()) {
1913 native_init_IRQ(); 1697 native_init_IRQ();
1914 /* pci_xen_hvm_init must be called after native_init_IRQ so that 1698 /* pci_xen_hvm_init must be called after native_init_IRQ so that
1915 * __acpi_register_gsi can point at the right function */ 1699 * __acpi_register_gsi can point at the right function */
@@ -1918,13 +1702,10 @@ void __init xen_init_IRQ(void)
1918 int rc; 1702 int rc;
1919 struct physdev_pirq_eoi_gmfn eoi_gmfn; 1703 struct physdev_pirq_eoi_gmfn eoi_gmfn;
1920 1704
1921 irq_ctx_init(smp_processor_id());
1922 if (xen_initial_domain())
1923 pci_xen_initial_domain();
1924
1925 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); 1705 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
1926 eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map); 1706 eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
1927 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn); 1707 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
1708 /* TODO: No PVH support for PIRQ EOI */
1928 if (rc != 0) { 1709 if (rc != 0) {
1929 free_page((unsigned long) pirq_eoi_map); 1710 free_page((unsigned long) pirq_eoi_map);
1930 pirq_eoi_map = NULL; 1711 pirq_eoi_map = NULL;
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
new file mode 100644
index 000000000000..1de2a191b395
--- /dev/null
+++ b/drivers/xen/events/events_fifo.c
@@ -0,0 +1,428 @@
1/*
2 * Xen event channels (FIFO-based ABI)
3 *
4 * Copyright (C) 2013 Citrix Systems R&D ltd.
5 *
6 * This source code is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
10 *
11 * Or, when distributed separately from the Linux kernel or
12 * incorporated into other software packages, subject to the following
13 * license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
35
36#include <linux/linkage.h>
37#include <linux/interrupt.h>
38#include <linux/irq.h>
39#include <linux/module.h>
40#include <linux/smp.h>
41#include <linux/percpu.h>
42#include <linux/cpu.h>
43
44#include <asm/sync_bitops.h>
45#include <asm/xen/hypercall.h>
46#include <asm/xen/hypervisor.h>
47#include <asm/xen/page.h>
48
49#include <xen/xen.h>
50#include <xen/xen-ops.h>
51#include <xen/events.h>
52#include <xen/interface/xen.h>
53#include <xen/interface/event_channel.h>
54
55#include "events_internal.h"
56
57#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
58#define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE)
59
60struct evtchn_fifo_queue {
61 uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
62};
63
64static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
65static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue);
66static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES] __read_mostly;
67static unsigned event_array_pages __read_mostly;
68
69#define BM(w) ((unsigned long *)(w))
70
71static inline event_word_t *event_word_from_port(unsigned port)
72{
73 unsigned i = port / EVENT_WORDS_PER_PAGE;
74
75 return event_array[i] + port % EVENT_WORDS_PER_PAGE;
76}
77
78static unsigned evtchn_fifo_max_channels(void)
79{
80 return EVTCHN_FIFO_NR_CHANNELS;
81}
82
83static unsigned evtchn_fifo_nr_channels(void)
84{
85 return event_array_pages * EVENT_WORDS_PER_PAGE;
86}
87
88static void free_unused_array_pages(void)
89{
90 unsigned i;
91
92 for (i = event_array_pages; i < MAX_EVENT_ARRAY_PAGES; i++) {
93 if (!event_array[i])
94 break;
95 free_page((unsigned long)event_array[i]);
96 event_array[i] = NULL;
97 }
98}
99
100static void init_array_page(event_word_t *array_page)
101{
102 unsigned i;
103
104 for (i = 0; i < EVENT_WORDS_PER_PAGE; i++)
105 array_page[i] = 1 << EVTCHN_FIFO_MASKED;
106}
107
108static int evtchn_fifo_setup(struct irq_info *info)
109{
110 unsigned port = info->evtchn;
111 unsigned new_array_pages;
112 int ret;
113
114 new_array_pages = port / EVENT_WORDS_PER_PAGE + 1;
115
116 if (new_array_pages > MAX_EVENT_ARRAY_PAGES)
117 return -EINVAL;
118
119 while (event_array_pages < new_array_pages) {
120 void *array_page;
121 struct evtchn_expand_array expand_array;
122
123 /* Might already have a page if we've resumed. */
124 array_page = event_array[event_array_pages];
125 if (!array_page) {
126 array_page = (void *)__get_free_page(GFP_KERNEL);
127 if (array_page == NULL) {
128 ret = -ENOMEM;
129 goto error;
130 }
131 event_array[event_array_pages] = array_page;
132 }
133
134 /* Mask all events in this page before adding it. */
135 init_array_page(array_page);
136
137 expand_array.array_gfn = virt_to_mfn(array_page);
138
139 ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array);
140 if (ret < 0)
141 goto error;
142
143 event_array_pages++;
144 }
145 return 0;
146
147 error:
148 if (event_array_pages == 0)
149 panic("xen: unable to expand event array with initial page (%d)\n", ret);
150 else
151 pr_err("unable to expand event array (%d)\n", ret);
152 free_unused_array_pages();
153 return ret;
154}
155
156static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu)
157{
158 /* no-op */
159}
160
161static void evtchn_fifo_clear_pending(unsigned port)
162{
163 event_word_t *word = event_word_from_port(port);
164 sync_clear_bit(EVTCHN_FIFO_PENDING, BM(word));
165}
166
167static void evtchn_fifo_set_pending(unsigned port)
168{
169 event_word_t *word = event_word_from_port(port);
170 sync_set_bit(EVTCHN_FIFO_PENDING, BM(word));
171}
172
173static bool evtchn_fifo_is_pending(unsigned port)
174{
175 event_word_t *word = event_word_from_port(port);
176 return sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
177}
178
179static bool evtchn_fifo_test_and_set_mask(unsigned port)
180{
181 event_word_t *word = event_word_from_port(port);
182 return sync_test_and_set_bit(EVTCHN_FIFO_MASKED, BM(word));
183}
184
185static void evtchn_fifo_mask(unsigned port)
186{
187 event_word_t *word = event_word_from_port(port);
188 sync_set_bit(EVTCHN_FIFO_MASKED, BM(word));
189}
190
191/*
192 * Clear MASKED, spinning if BUSY is set.
193 */
194static void clear_masked(volatile event_word_t *word)
195{
196 event_word_t new, old, w;
197
198 w = *word;
199
200 do {
201 old = w & ~(1 << EVTCHN_FIFO_BUSY);
202 new = old & ~(1 << EVTCHN_FIFO_MASKED);
203 w = sync_cmpxchg(word, old, new);
204 } while (w != old);
205}
206
207static void evtchn_fifo_unmask(unsigned port)
208{
209 event_word_t *word = event_word_from_port(port);
210
211 BUG_ON(!irqs_disabled());
212
213 clear_masked(word);
214 if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))) {
215 struct evtchn_unmask unmask = { .port = port };
216 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
217 }
218}
219
220static uint32_t clear_linked(volatile event_word_t *word)
221{
222 event_word_t new, old, w;
223
224 w = *word;
225
226 do {
227 old = w;
228 new = (w & ~((1 << EVTCHN_FIFO_LINKED)
229 | EVTCHN_FIFO_LINK_MASK));
230 } while ((w = sync_cmpxchg(word, old, new)) != old);
231
232 return w & EVTCHN_FIFO_LINK_MASK;
233}
234
235static void handle_irq_for_port(unsigned port)
236{
237 int irq;
238 struct irq_desc *desc;
239
240 irq = get_evtchn_to_irq(port);
241 if (irq != -1) {
242 desc = irq_to_desc(irq);
243 if (desc)
244 generic_handle_irq_desc(irq, desc);
245 }
246}
247
248static void consume_one_event(unsigned cpu,
249 struct evtchn_fifo_control_block *control_block,
250 unsigned priority, uint32_t *ready)
251{
252 struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
253 uint32_t head;
254 unsigned port;
255 event_word_t *word;
256
257 head = q->head[priority];
258
259 /*
260 * Reached the tail last time? Read the new HEAD from the
261 * control block.
262 */
263 if (head == 0) {
264 rmb(); /* Ensure word is up-to-date before reading head. */
265 head = control_block->head[priority];
266 }
267
268 port = head;
269 word = event_word_from_port(port);
270 head = clear_linked(word);
271
272 /*
273 * If the link is non-zero, there are more events in the
274 * queue, otherwise the queue is empty.
275 *
276 * If the queue is empty, clear this priority from our local
277 * copy of the ready word.
278 */
279 if (head == 0)
280 clear_bit(priority, BM(ready));
281
282 if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))
283 && !sync_test_bit(EVTCHN_FIFO_MASKED, BM(word)))
284 handle_irq_for_port(port);
285
286 q->head[priority] = head;
287}
288
289static void evtchn_fifo_handle_events(unsigned cpu)
290{
291 struct evtchn_fifo_control_block *control_block;
292 uint32_t ready;
293 unsigned q;
294
295 control_block = per_cpu(cpu_control_block, cpu);
296
297 ready = xchg(&control_block->ready, 0);
298
299 while (ready) {
300 q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES);
301 consume_one_event(cpu, control_block, q, &ready);
302 ready |= xchg(&control_block->ready, 0);
303 }
304}
305
306static void evtchn_fifo_resume(void)
307{
308 unsigned cpu;
309
310 for_each_possible_cpu(cpu) {
311 void *control_block = per_cpu(cpu_control_block, cpu);
312 struct evtchn_init_control init_control;
313 int ret;
314
315 if (!control_block)
316 continue;
317
318 /*
319 * If this CPU is offline, take the opportunity to
320 * free the control block while it is not being
321 * used.
322 */
323 if (!cpu_online(cpu)) {
324 free_page((unsigned long)control_block);
325 per_cpu(cpu_control_block, cpu) = NULL;
326 continue;
327 }
328
329 init_control.control_gfn = virt_to_mfn(control_block);
330 init_control.offset = 0;
331 init_control.vcpu = cpu;
332
333 ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control,
334 &init_control);
335 if (ret < 0)
336 BUG();
337 }
338
339 /*
340 * The event array starts out as empty again and is extended
341 * as normal when events are bound. The existing pages will
342 * be reused.
343 */
344 event_array_pages = 0;
345}
346
347static const struct evtchn_ops evtchn_ops_fifo = {
348 .max_channels = evtchn_fifo_max_channels,
349 .nr_channels = evtchn_fifo_nr_channels,
350 .setup = evtchn_fifo_setup,
351 .bind_to_cpu = evtchn_fifo_bind_to_cpu,
352 .clear_pending = evtchn_fifo_clear_pending,
353 .set_pending = evtchn_fifo_set_pending,
354 .is_pending = evtchn_fifo_is_pending,
355 .test_and_set_mask = evtchn_fifo_test_and_set_mask,
356 .mask = evtchn_fifo_mask,
357 .unmask = evtchn_fifo_unmask,
358 .handle_events = evtchn_fifo_handle_events,
359 .resume = evtchn_fifo_resume,
360};
361
362static int evtchn_fifo_init_control_block(unsigned cpu)
363{
364 struct page *control_block = NULL;
365 struct evtchn_init_control init_control;
366 int ret = -ENOMEM;
367
368 control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
369 if (control_block == NULL)
370 goto error;
371
372 init_control.control_gfn = virt_to_mfn(page_address(control_block));
373 init_control.offset = 0;
374 init_control.vcpu = cpu;
375
376 ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
377 if (ret < 0)
378 goto error;
379
380 per_cpu(cpu_control_block, cpu) = page_address(control_block);
381
382 return 0;
383
384 error:
385 __free_page(control_block);
386 return ret;
387}
388
389static int evtchn_fifo_cpu_notification(struct notifier_block *self,
390 unsigned long action,
391 void *hcpu)
392{
393 int cpu = (long)hcpu;
394 int ret = 0;
395
396 switch (action) {
397 case CPU_UP_PREPARE:
398 if (!per_cpu(cpu_control_block, cpu))
399 ret = evtchn_fifo_init_control_block(cpu);
400 break;
401 default:
402 break;
403 }
404 return ret < 0 ? NOTIFY_BAD : NOTIFY_OK;
405}
406
407static struct notifier_block evtchn_fifo_cpu_notifier = {
408 .notifier_call = evtchn_fifo_cpu_notification,
409};
410
411int __init xen_evtchn_fifo_init(void)
412{
413 int cpu = get_cpu();
414 int ret;
415
416 ret = evtchn_fifo_init_control_block(cpu);
417 if (ret < 0)
418 goto out;
419
420 pr_info("Using FIFO-based ABI\n");
421
422 evtchn_ops = &evtchn_ops_fifo;
423
424 register_cpu_notifier(&evtchn_fifo_cpu_notifier);
425out:
426 put_cpu();
427 return ret;
428}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
new file mode 100644
index 000000000000..677f41a0fff9
--- /dev/null
+++ b/drivers/xen/events/events_internal.h
@@ -0,0 +1,150 @@
1/*
2 * Xen Event Channels (internal header)
3 *
4 * Copyright (C) 2013 Citrix Systems R&D Ltd.
5 *
6 * This source code is licensed under the GNU General Public License,
7 * Version 2 or later. See the file COPYING for more details.
8 */
9#ifndef __EVENTS_INTERNAL_H__
10#define __EVENTS_INTERNAL_H__
11
12/* Interrupt types. */
13enum xen_irq_type {
14 IRQT_UNBOUND = 0,
15 IRQT_PIRQ,
16 IRQT_VIRQ,
17 IRQT_IPI,
18 IRQT_EVTCHN
19};
20
21/*
22 * Packed IRQ information:
23 * type - enum xen_irq_type
24 * event channel - irq->event channel mapping
25 * cpu - cpu this event channel is bound to
26 * index - type-specific information:
27 * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
28 * guest, or GSI (real passthrough IRQ) of the device.
29 * VIRQ - virq number
30 * IPI - IPI vector
31 * EVTCHN -
32 */
33struct irq_info {
34 struct list_head list;
35 int refcnt;
36 enum xen_irq_type type; /* type */
37 unsigned irq;
38 unsigned int evtchn; /* event channel */
39 unsigned short cpu; /* cpu bound */
40
41 union {
42 unsigned short virq;
43 enum ipi_vector ipi;
44 struct {
45 unsigned short pirq;
46 unsigned short gsi;
47 unsigned char vector;
48 unsigned char flags;
49 uint16_t domid;
50 } pirq;
51 } u;
52};
53
54#define PIRQ_NEEDS_EOI (1 << 0)
55#define PIRQ_SHAREABLE (1 << 1)
56
57struct evtchn_ops {
58 unsigned (*max_channels)(void);
59 unsigned (*nr_channels)(void);
60
61 int (*setup)(struct irq_info *info);
62 void (*bind_to_cpu)(struct irq_info *info, unsigned cpu);
63
64 void (*clear_pending)(unsigned port);
65 void (*set_pending)(unsigned port);
66 bool (*is_pending)(unsigned port);
67 bool (*test_and_set_mask)(unsigned port);
68 void (*mask)(unsigned port);
69 void (*unmask)(unsigned port);
70
71 void (*handle_events)(unsigned cpu);
72 void (*resume)(void);
73};
74
75extern const struct evtchn_ops *evtchn_ops;
76
77extern int **evtchn_to_irq;
78int get_evtchn_to_irq(unsigned int evtchn);
79
80struct irq_info *info_for_irq(unsigned irq);
81unsigned cpu_from_irq(unsigned irq);
82unsigned cpu_from_evtchn(unsigned int evtchn);
83
84static inline unsigned xen_evtchn_max_channels(void)
85{
86 return evtchn_ops->max_channels();
87}
88
89/*
90 * Do any ABI specific setup for a bound event channel before it can
91 * be unmasked and used.
92 */
93static inline int xen_evtchn_port_setup(struct irq_info *info)
94{
95 if (evtchn_ops->setup)
96 return evtchn_ops->setup(info);
97 return 0;
98}
99
100static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info,
101 unsigned cpu)
102{
103 evtchn_ops->bind_to_cpu(info, cpu);
104}
105
106static inline void clear_evtchn(unsigned port)
107{
108 evtchn_ops->clear_pending(port);
109}
110
111static inline void set_evtchn(unsigned port)
112{
113 evtchn_ops->set_pending(port);
114}
115
116static inline bool test_evtchn(unsigned port)
117{
118 return evtchn_ops->is_pending(port);
119}
120
121static inline bool test_and_set_mask(unsigned port)
122{
123 return evtchn_ops->test_and_set_mask(port);
124}
125
126static inline void mask_evtchn(unsigned port)
127{
128 return evtchn_ops->mask(port);
129}
130
131static inline void unmask_evtchn(unsigned port)
132{
133 return evtchn_ops->unmask(port);
134}
135
136static inline void xen_evtchn_handle_events(unsigned cpu)
137{
138 return evtchn_ops->handle_events(cpu);
139}
140
141static inline void xen_evtchn_resume(void)
142{
143 if (evtchn_ops->resume)
144 evtchn_ops->resume();
145}
146
147void xen_evtchn_2l_init(void);
148int xen_evtchn_fifo_init(void);
149
150#endif /* #ifndef __EVENTS_INTERNAL_H__ */
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 5de2063e16d3..00f40f051d95 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -417,7 +417,7 @@ static long evtchn_ioctl(struct file *file,
417 break; 417 break;
418 418
419 rc = -EINVAL; 419 rc = -EINVAL;
420 if (unbind.port >= NR_EVENT_CHANNELS) 420 if (unbind.port >= xen_evtchn_nr_channels())
421 break; 421 break;
422 422
423 rc = -ENOTCONN; 423 rc = -ENOTCONN;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e41c79c986ea..073b4a19a8b0 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -846,7 +846,7 @@ static int __init gntdev_init(void)
846 if (!xen_domain()) 846 if (!xen_domain())
847 return -ENODEV; 847 return -ENODEV;
848 848
849 use_ptemod = xen_pv_domain(); 849 use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
850 850
851 err = misc_register(&gntdev_miscdev); 851 err = misc_register(&gntdev_miscdev);
852 if (err != 0) { 852 if (err != 0) {
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index aa846a48f400..1ce1c40331f3 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -62,12 +62,10 @@
62 62
63static grant_ref_t **gnttab_list; 63static grant_ref_t **gnttab_list;
64static unsigned int nr_grant_frames; 64static unsigned int nr_grant_frames;
65static unsigned int boot_max_nr_grant_frames;
66static int gnttab_free_count; 65static int gnttab_free_count;
67static grant_ref_t gnttab_free_head; 66static grant_ref_t gnttab_free_head;
68static DEFINE_SPINLOCK(gnttab_list_lock); 67static DEFINE_SPINLOCK(gnttab_list_lock);
69unsigned long xen_hvm_resume_frames; 68struct grant_frames xen_auto_xlat_grant_frames;
70EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
71 69
72static union { 70static union {
73 struct grant_entry_v1 *v1; 71 struct grant_entry_v1 *v1;
@@ -827,6 +825,11 @@ static unsigned int __max_nr_grant_frames(void)
827unsigned int gnttab_max_grant_frames(void) 825unsigned int gnttab_max_grant_frames(void)
828{ 826{
829 unsigned int xen_max = __max_nr_grant_frames(); 827 unsigned int xen_max = __max_nr_grant_frames();
828 static unsigned int boot_max_nr_grant_frames;
829
830 /* First time, initialize it properly. */
831 if (!boot_max_nr_grant_frames)
832 boot_max_nr_grant_frames = __max_nr_grant_frames();
830 833
831 if (xen_max > boot_max_nr_grant_frames) 834 if (xen_max > boot_max_nr_grant_frames)
832 return boot_max_nr_grant_frames; 835 return boot_max_nr_grant_frames;
@@ -834,6 +837,51 @@ unsigned int gnttab_max_grant_frames(void)
834} 837}
835EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); 838EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
836 839
840int gnttab_setup_auto_xlat_frames(unsigned long addr)
841{
842 xen_pfn_t *pfn;
843 unsigned int max_nr_gframes = __max_nr_grant_frames();
844 unsigned int i;
845 void *vaddr;
846
847 if (xen_auto_xlat_grant_frames.count)
848 return -EINVAL;
849
850 vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes);
851 if (vaddr == NULL) {
852 pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n",
853 addr);
854 return -ENOMEM;
855 }
856 pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
857 if (!pfn) {
858 xen_unmap(vaddr);
859 return -ENOMEM;
860 }
861 for (i = 0; i < max_nr_gframes; i++)
862 pfn[i] = PFN_DOWN(addr) + i;
863
864 xen_auto_xlat_grant_frames.vaddr = vaddr;
865 xen_auto_xlat_grant_frames.pfn = pfn;
866 xen_auto_xlat_grant_frames.count = max_nr_gframes;
867
868 return 0;
869}
870EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
871
872void gnttab_free_auto_xlat_frames(void)
873{
874 if (!xen_auto_xlat_grant_frames.count)
875 return;
876 kfree(xen_auto_xlat_grant_frames.pfn);
877 xen_unmap(xen_auto_xlat_grant_frames.vaddr);
878
879 xen_auto_xlat_grant_frames.pfn = NULL;
880 xen_auto_xlat_grant_frames.count = 0;
881 xen_auto_xlat_grant_frames.vaddr = NULL;
882}
883EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
884
837/* Handling of paged out grant targets (GNTST_eagain) */ 885/* Handling of paged out grant targets (GNTST_eagain) */
838#define MAX_DELAY 256 886#define MAX_DELAY 256
839static inline void 887static inline void
@@ -1060,10 +1108,11 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1060 unsigned int nr_gframes = end_idx + 1; 1108 unsigned int nr_gframes = end_idx + 1;
1061 int rc; 1109 int rc;
1062 1110
1063 if (xen_hvm_domain()) { 1111 if (xen_feature(XENFEAT_auto_translated_physmap)) {
1064 struct xen_add_to_physmap xatp; 1112 struct xen_add_to_physmap xatp;
1065 unsigned int i = end_idx; 1113 unsigned int i = end_idx;
1066 rc = 0; 1114 rc = 0;
1115 BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
1067 /* 1116 /*
1068 * Loop backwards, so that the first hypercall has the largest 1117 * Loop backwards, so that the first hypercall has the largest
1069 * index, ensuring that the table will grow only once. 1118 * index, ensuring that the table will grow only once.
@@ -1072,7 +1121,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
1072 xatp.domid = DOMID_SELF; 1121 xatp.domid = DOMID_SELF;
1073 xatp.idx = i; 1122 xatp.idx = i;
1074 xatp.space = XENMAPSPACE_grant_table; 1123 xatp.space = XENMAPSPACE_grant_table;
1075 xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i; 1124 xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
1076 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); 1125 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
1077 if (rc != 0) { 1126 if (rc != 0) {
1078 pr_warn("grant table add_to_physmap failed, err=%d\n", 1127 pr_warn("grant table add_to_physmap failed, err=%d\n",
@@ -1135,10 +1184,8 @@ static void gnttab_request_version(void)
1135 int rc; 1184 int rc;
1136 struct gnttab_set_version gsv; 1185 struct gnttab_set_version gsv;
1137 1186
1138 if (xen_hvm_domain()) 1187 gsv.version = 1;
1139 gsv.version = 1; 1188
1140 else
1141 gsv.version = 2;
1142 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1); 1189 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
1143 if (rc == 0 && gsv.version == 2) { 1190 if (rc == 0 && gsv.version == 2) {
1144 grant_table_version = 2; 1191 grant_table_version = 2;
@@ -1169,22 +1216,15 @@ static int gnttab_setup(void)
1169 if (max_nr_gframes < nr_grant_frames) 1216 if (max_nr_gframes < nr_grant_frames)
1170 return -ENOSYS; 1217 return -ENOSYS;
1171 1218
1172 if (xen_pv_domain()) 1219 if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
1173 return gnttab_map(0, nr_grant_frames - 1); 1220 gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
1174
1175 if (gnttab_shared.addr == NULL) {
1176 gnttab_shared.addr = xen_remap(xen_hvm_resume_frames,
1177 PAGE_SIZE * max_nr_gframes);
1178 if (gnttab_shared.addr == NULL) { 1221 if (gnttab_shared.addr == NULL) {
1179 pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n", 1222 pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
1180 xen_hvm_resume_frames); 1223 (unsigned long)xen_auto_xlat_grant_frames.vaddr);
1181 return -ENOMEM; 1224 return -ENOMEM;
1182 } 1225 }
1183 } 1226 }
1184 1227 return gnttab_map(0, nr_grant_frames - 1);
1185 gnttab_map(0, nr_grant_frames - 1);
1186
1187 return 0;
1188} 1228}
1189 1229
1190int gnttab_resume(void) 1230int gnttab_resume(void)
@@ -1227,13 +1267,12 @@ int gnttab_init(void)
1227 1267
1228 gnttab_request_version(); 1268 gnttab_request_version();
1229 nr_grant_frames = 1; 1269 nr_grant_frames = 1;
1230 boot_max_nr_grant_frames = __max_nr_grant_frames();
1231 1270
1232 /* Determine the maximum number of frames required for the 1271 /* Determine the maximum number of frames required for the
1233 * grant reference free list on the current hypervisor. 1272 * grant reference free list on the current hypervisor.
1234 */ 1273 */
1235 BUG_ON(grefs_per_grant_frame == 0); 1274 BUG_ON(grefs_per_grant_frame == 0);
1236 max_nr_glist_frames = (boot_max_nr_grant_frames * 1275 max_nr_glist_frames = (gnttab_max_grant_frames() *
1237 grefs_per_grant_frame / RPP); 1276 grefs_per_grant_frame / RPP);
1238 1277
1239 gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), 1278 gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
@@ -1286,5 +1325,6 @@ static int __gnttab_init(void)
1286 1325
1287 return gnttab_init(); 1326 return gnttab_init();
1288} 1327}
1289 1328/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
1290core_initcall(__gnttab_init); 1329 * beforehand to initialize xen_auto_xlat_grant_frames. */
1330core_initcall_sync(__gnttab_init);
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 188825122aae..dd9c249ea311 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -26,7 +26,9 @@
26#include <asm/xen/hypervisor.h> 26#include <asm/xen/hypervisor.h>
27#include <asm/xen/hypercall.h> 27#include <asm/xen/hypercall.h>
28#include "../pci/pci.h" 28#include "../pci/pci.h"
29#ifdef CONFIG_PCI_MMCONFIG
29#include <asm/pci_x86.h> 30#include <asm/pci_x86.h>
31#endif
30 32
31static bool __read_mostly pci_seg_supported = true; 33static bool __read_mostly pci_seg_supported = true;
32 34
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 2f3528e93cb9..a1361c312c06 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -108,6 +108,7 @@ static int platform_pci_init(struct pci_dev *pdev,
108 long ioaddr; 108 long ioaddr;
109 long mmio_addr, mmio_len; 109 long mmio_addr, mmio_len;
110 unsigned int max_nr_gframes; 110 unsigned int max_nr_gframes;
111 unsigned long grant_frames;
111 112
112 if (!xen_domain()) 113 if (!xen_domain())
113 return -ENODEV; 114 return -ENODEV;
@@ -154,13 +155,17 @@ static int platform_pci_init(struct pci_dev *pdev,
154 } 155 }
155 156
156 max_nr_gframes = gnttab_max_grant_frames(); 157 max_nr_gframes = gnttab_max_grant_frames();
157 xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); 158 grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
158 ret = gnttab_init(); 159 ret = gnttab_setup_auto_xlat_frames(grant_frames);
159 if (ret) 160 if (ret)
160 goto out; 161 goto out;
162 ret = gnttab_init();
163 if (ret)
164 goto grant_out;
161 xenbus_probe(NULL); 165 xenbus_probe(NULL);
162 return 0; 166 return 0;
163 167grant_out:
168 gnttab_free_auto_xlat_frames();
164out: 169out:
165 pci_release_region(pdev, 0); 170 pci_release_region(pdev, 0);
166mem_out: 171mem_out:
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index ec097d6f964d..01d59e66565d 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -45,6 +45,7 @@
45#include <xen/grant_table.h> 45#include <xen/grant_table.h>
46#include <xen/xenbus.h> 46#include <xen/xenbus.h>
47#include <xen/xen.h> 47#include <xen/xen.h>
48#include <xen/features.h>
48 49
49#include "xenbus_probe.h" 50#include "xenbus_probe.h"
50 51
@@ -743,7 +744,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
743 744
744void __init xenbus_ring_ops_init(void) 745void __init xenbus_ring_ops_init(void)
745{ 746{
746 if (xen_pv_domain()) 747 if (!xen_feature(XENFEAT_auto_translated_physmap))
747 ring_ops = &ring_ops_pv; 748 ring_ops = &ring_ops_pv;
748 else 749 else
749 ring_ops = &ring_ops_hvm; 750 ring_ops = &ring_ops_hvm;
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 129bf84c19ec..cb385c10d2b1 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -496,7 +496,7 @@ subsys_initcall(xenbus_probe_frontend_init);
496#ifndef MODULE 496#ifndef MODULE
497static int __init boot_wait_for_devices(void) 497static int __init boot_wait_for_devices(void)
498{ 498{
499 if (xen_hvm_domain() && !xen_platform_pci_unplug) 499 if (!xen_has_pv_devices())
500 return -ENODEV; 500 return -ENODEV;
501 501
502 ready_to_wait_for_devices = 1; 502 ready_to_wait_for_devices = 1;
diff --git a/include/xen/events.h b/include/xen/events.h
index c9ea10ee2273..c9c85cf84895 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -7,6 +7,8 @@
7#include <asm/xen/hypercall.h> 7#include <asm/xen/hypercall.h>
8#include <asm/xen/events.h> 8#include <asm/xen/events.h>
9 9
10unsigned xen_evtchn_nr_channels(void);
11
10int bind_evtchn_to_irq(unsigned int evtchn); 12int bind_evtchn_to_irq(unsigned int evtchn);
11int bind_evtchn_to_irqhandler(unsigned int evtchn, 13int bind_evtchn_to_irqhandler(unsigned int evtchn,
12 irq_handler_t handler, 14 irq_handler_t handler,
@@ -37,6 +39,11 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
37 */ 39 */
38void unbind_from_irqhandler(unsigned int irq, void *dev_id); 40void unbind_from_irqhandler(unsigned int irq, void *dev_id);
39 41
42#define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX
43#define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT
44#define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN
45int xen_set_irq_priority(unsigned irq, unsigned priority);
46
40/* 47/*
41 * Allow extra references to event channels exposed to userspace by evtchn 48 * Allow extra references to event channels exposed to userspace by evtchn
42 */ 49 */
@@ -73,6 +80,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout);
73 80
74/* Determine the IRQ which is bound to an event channel */ 81/* Determine the IRQ which is bound to an event channel */
75unsigned irq_from_evtchn(unsigned int evtchn); 82unsigned irq_from_evtchn(unsigned int evtchn);
83int irq_from_virq(unsigned int cpu, unsigned int virq);
84unsigned int evtchn_from_irq(unsigned irq);
76 85
77/* Xen HVM evtchn vector callback */ 86/* Xen HVM evtchn vector callback */
78void xen_hvm_callback_vector(void); 87void xen_hvm_callback_vector(void);
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 694dcaf266e6..5acb1e4ac0d3 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -178,8 +178,15 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
178 grant_status_t **__shared); 178 grant_status_t **__shared);
179void arch_gnttab_unmap(void *shared, unsigned long nr_gframes); 179void arch_gnttab_unmap(void *shared, unsigned long nr_gframes);
180 180
181extern unsigned long xen_hvm_resume_frames; 181struct grant_frames {
182 xen_pfn_t *pfn;
183 unsigned int count;
184 void *vaddr;
185};
186extern struct grant_frames xen_auto_xlat_grant_frames;
182unsigned int gnttab_max_grant_frames(void); 187unsigned int gnttab_max_grant_frames(void);
188int gnttab_setup_auto_xlat_frames(unsigned long addr);
189void gnttab_free_auto_xlat_frames(void);
183 190
184#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) 191#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
185 192
diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index 0360b15f4883..6f4eae328ca7 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -140,6 +140,19 @@
140 */ 140 */
141#define XEN_ELFNOTE_SUSPEND_CANCEL 14 141#define XEN_ELFNOTE_SUSPEND_CANCEL 14
142 142
143/*
144 * The features supported by this kernel (numeric).
145 *
146 * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a
147 * kernel to specify support for features that older hypervisors don't
148 * know about. The set of features 4.2 and newer hypervisors will
149 * consider supported by the kernel is the combination of the sets
150 * specified through this and the string note.
151 *
152 * LEGACY: FEATURES
153 */
154#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
155
143#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ 156#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
144 157
145/* 158/*
diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
index f4942921e202..7e6acef5415b 100644
--- a/include/xen/interface/event_channel.h
+++ b/include/xen/interface/event_channel.h
@@ -190,6 +190,39 @@ struct evtchn_reset {
190}; 190};
191typedef struct evtchn_reset evtchn_reset_t; 191typedef struct evtchn_reset evtchn_reset_t;
192 192
193/*
194 * EVTCHNOP_init_control: initialize the control block for the FIFO ABI.
195 */
196#define EVTCHNOP_init_control 11
197struct evtchn_init_control {
198 /* IN parameters. */
199 uint64_t control_gfn;
200 uint32_t offset;
201 uint32_t vcpu;
202 /* OUT parameters. */
203 uint8_t link_bits;
204 uint8_t _pad[7];
205};
206
207/*
208 * EVTCHNOP_expand_array: add an additional page to the event array.
209 */
210#define EVTCHNOP_expand_array 12
211struct evtchn_expand_array {
212 /* IN parameters. */
213 uint64_t array_gfn;
214};
215
216/*
217 * EVTCHNOP_set_priority: set the priority for an event channel.
218 */
219#define EVTCHNOP_set_priority 13
220struct evtchn_set_priority {
221 /* IN parameters. */
222 uint32_t port;
223 uint32_t priority;
224};
225
193struct evtchn_op { 226struct evtchn_op {
194 uint32_t cmd; /* EVTCHNOP_* */ 227 uint32_t cmd; /* EVTCHNOP_* */
195 union { 228 union {
@@ -207,4 +240,39 @@ struct evtchn_op {
207}; 240};
208DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); 241DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
209 242
243/*
244 * 2-level ABI
245 */
246
247#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
248
249/*
250 * FIFO ABI
251 */
252
253/* Events may have priorities from 0 (highest) to 15 (lowest). */
254#define EVTCHN_FIFO_PRIORITY_MAX 0
255#define EVTCHN_FIFO_PRIORITY_DEFAULT 7
256#define EVTCHN_FIFO_PRIORITY_MIN 15
257
258#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1)
259
260typedef uint32_t event_word_t;
261
262#define EVTCHN_FIFO_PENDING 31
263#define EVTCHN_FIFO_MASKED 30
264#define EVTCHN_FIFO_LINKED 29
265#define EVTCHN_FIFO_BUSY 28
266
267#define EVTCHN_FIFO_LINK_BITS 17
268#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1)
269
270#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS)
271
272struct evtchn_fifo_control_block {
273 uint32_t ready;
274 uint32_t _rsvd;
275 event_word_t head[EVTCHN_FIFO_MAX_QUEUES];
276};
277
210#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ 278#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 53ec4167bd0b..0cd5ca333fac 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -281,12 +281,6 @@ struct multicall_entry {
281}; 281};
282DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); 282DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);
283 283
284/*
285 * Event channel endpoints per domain:
286 * 1024 if a long is 32 bits; 4096 if a long is 64 bits.
287 */
288#define NR_EVENT_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
289
290struct vcpu_time_info { 284struct vcpu_time_info {
291 /* 285 /*
292 * Updates to the following values are preceded and followed 286 * Updates to the following values are preceded and followed
diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
index 438c256c274b..5c52b5583917 100644
--- a/include/xen/platform_pci.h
+++ b/include/xen/platform_pci.h
@@ -46,6 +46,27 @@ static inline int xen_must_unplug_disks(void) {
46#endif 46#endif
47} 47}
48 48
49extern int xen_platform_pci_unplug; 49#if defined(CONFIG_XEN_PVHVM)
50 50extern bool xen_has_pv_devices(void);
51extern bool xen_has_pv_disk_devices(void);
52extern bool xen_has_pv_nic_devices(void);
53extern bool xen_has_pv_and_legacy_disk_devices(void);
54#else
55static inline bool xen_has_pv_devices(void)
56{
57 return IS_ENABLED(CONFIG_XEN);
58}
59static inline bool xen_has_pv_disk_devices(void)
60{
61 return IS_ENABLED(CONFIG_XEN);
62}
63static inline bool xen_has_pv_nic_devices(void)
64{
65 return IS_ENABLED(CONFIG_XEN);
66}
67static inline bool xen_has_pv_and_legacy_disk_devices(void)
68{
69 return false;
70}
71#endif
51#endif /* _XEN_PLATFORM_PCI_H */ 72#endif /* _XEN_PLATFORM_PCI_H */
diff --git a/include/xen/xen.h b/include/xen/xen.h
index a74d4362c4f8..0c0e3ef4c45d 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -29,4 +29,18 @@ extern enum xen_domain_type xen_domain_type;
29#define xen_initial_domain() (0) 29#define xen_initial_domain() (0)
30#endif /* CONFIG_XEN_DOM0 */ 30#endif /* CONFIG_XEN_DOM0 */
31 31
32#ifdef CONFIG_XEN_PVH
33/* This functionality exists only for x86. The XEN_PVHVM support exists
34 * only in x86 world - hence on ARM it will be always disabled.
35 * N.B. ARM guests are neither PV nor HVM nor PVHVM.
36 * It's a bit like PVH but is different also (it's further towards the H
37 * end of the spectrum than even PVH).
38 */
39#include <xen/features.h>
40#define xen_pvh_domain() (xen_pv_domain() && \
41 xen_feature(XENFEAT_auto_translated_physmap) && \
42 xen_have_vector_callback)
43#else
44#define xen_pvh_domain() (0)
45#endif
32#endif /* _XEN_XEN_H */ 46#endif /* _XEN_XEN_H */