aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-xen_cpu20
-rw-r--r--arch/x86/include/asm/xen/hypercall.h8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c22
-rw-r--r--arch/x86/xen/enlighten.c224
-rw-r--r--arch/x86/xen/mmu.c39
-rw-r--r--arch/x86/xen/setup.c23
-rw-r--r--arch/x86/xen/suspend.c2
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--drivers/tty/hvc/hvc_xen.c15
-rw-r--r--drivers/xen/Kconfig8
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/mcelog.c414
-rw-r--r--drivers/xen/pcpu.c371
-rw-r--r--drivers/xen/platform-pci.c18
-rw-r--r--drivers/xen/xen-acpi-processor.c9
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c20
-rw-r--r--include/linux/miscdevice.h1
-rw-r--r--include/xen/events.h2
-rw-r--r--include/xen/interface/io/xs_wire.h3
-rw-r--r--include/xen/interface/platform.h8
-rw-r--r--include/xen/interface/xen-mca.h385
-rw-r--r--include/xen/interface/xen.h1
23 files changed, 1511 insertions, 90 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-xen_cpu b/Documentation/ABI/testing/sysfs-devices-system-xen_cpu
new file mode 100644
index 000000000000..9ca02fb2d498
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-system-xen_cpu
@@ -0,0 +1,20 @@
1What: /sys/devices/system/xen_cpu/
2Date: May 2012
3Contact: Liu, Jinsong <jinsong.liu@intel.com>
4Description:
5 A collection of global/individual Xen physical cpu attributes
6
7 Individual physical cpu attributes are contained in
8 subdirectories named by the Xen's logical cpu number, e.g.:
9 /sys/devices/system/xen_cpu/xen_cpu#/
10
11
12What: /sys/devices/system/xen_cpu/xen_cpu#/online
13Date: May 2012
14Contact: Liu, Jinsong <jinsong.liu@intel.com>
15Description:
16 Interface to online/offline Xen physical cpus
17
18 When running under Xen platform, it provide user interface
19 to online/offline physical cpus, except cpu0 due to several
20 logic restrictions and assumptions.
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 5728852fb90f..59c226d120cd 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -48,6 +48,7 @@
48#include <xen/interface/sched.h> 48#include <xen/interface/sched.h>
49#include <xen/interface/physdev.h> 49#include <xen/interface/physdev.h>
50#include <xen/interface/platform.h> 50#include <xen/interface/platform.h>
51#include <xen/interface/xen-mca.h>
51 52
52/* 53/*
53 * The hypercall asms have to meet several constraints: 54 * The hypercall asms have to meet several constraints:
@@ -302,6 +303,13 @@ HYPERVISOR_set_timer_op(u64 timeout)
302} 303}
303 304
304static inline int 305static inline int
306HYPERVISOR_mca(struct xen_mc *mc_op)
307{
308 mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
309 return _hypercall1(int, mca, mc_op);
310}
311
312static inline int
305HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) 313HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
306{ 314{
307 platform_op->interface_version = XENPF_INTERFACE_VERSION; 315 platform_op->interface_version = XENPF_INTERFACE_VERSION;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9473e8772fd1..5e095f873e3e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -60,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
60 60
61int mce_disabled __read_mostly; 61int mce_disabled __read_mostly;
62 62
63#define MISC_MCELOG_MINOR 227
64
65#define SPINUNIT 100 /* 100ns */ 63#define SPINUNIT 100 /* 100ns */
66 64
67atomic_t mce_entry; 65atomic_t mce_entry;
@@ -2346,7 +2344,7 @@ static __init int mcheck_init_device(void)
2346 2344
2347 return err; 2345 return err;
2348} 2346}
2349device_initcall(mcheck_init_device); 2347device_initcall_sync(mcheck_init_device);
2350 2348
2351/* 2349/*
2352 * Old style boot options parsing. Only for compatibility. 2350 * Old style boot options parsing. Only for compatibility.
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 671b95a2ffb5..c4e916d77378 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -759,4 +759,24 @@ static __init int threshold_init_device(void)
759 759
760 return 0; 760 return 0;
761} 761}
762device_initcall(threshold_init_device); 762/*
763 * there are 3 funcs which need to be _initcalled in a logic sequence:
764 * 1. xen_late_init_mcelog
765 * 2. mcheck_init_device
766 * 3. threshold_init_device
767 *
768 * xen_late_init_mcelog must register xen_mce_chrdev_device before
769 * native mce_chrdev_device registration if running under xen platform;
770 *
771 * mcheck_init_device should be inited before threshold_init_device to
772 * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
773 *
774 * so we use following _initcalls
775 * 1. device_initcall(xen_late_init_mcelog);
776 * 2. device_initcall_sync(mcheck_init_device);
777 * 3. late_initcall(threshold_init_device);
778 *
779 * when running under xen, the initcall order is 1,2,3;
780 * on baremetal, we skip 1 and we do only 2 and 3.
781 */
782late_initcall(threshold_init_device);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ed7d54985d0c..bf4bda6d3e9a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -31,6 +31,7 @@
31#include <linux/pci.h> 31#include <linux/pci.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/memblock.h> 33#include <linux/memblock.h>
34#include <linux/syscore_ops.h>
34 35
35#include <xen/xen.h> 36#include <xen/xen.h>
36#include <xen/interface/xen.h> 37#include <xen/interface/xen.h>
@@ -38,6 +39,7 @@
38#include <xen/interface/physdev.h> 39#include <xen/interface/physdev.h>
39#include <xen/interface/vcpu.h> 40#include <xen/interface/vcpu.h>
40#include <xen/interface/memory.h> 41#include <xen/interface/memory.h>
42#include <xen/interface/xen-mca.h>
41#include <xen/features.h> 43#include <xen/features.h>
42#include <xen/page.h> 44#include <xen/page.h>
43#include <xen/hvm.h> 45#include <xen/hvm.h>
@@ -107,7 +109,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback);
107 * Point at some empty memory to start with. We map the real shared_info 109 * Point at some empty memory to start with. We map the real shared_info
108 * page as soon as fixmap is up and running. 110 * page as soon as fixmap is up and running.
109 */ 111 */
110struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; 112struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
111 113
112/* 114/*
113 * Flag to determine whether vcpu info placement is available on all 115 * Flag to determine whether vcpu info placement is available on all
@@ -124,6 +126,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
124 */ 126 */
125static int have_vcpu_info_placement = 1; 127static int have_vcpu_info_placement = 1;
126 128
129struct tls_descs {
130 struct desc_struct desc[3];
131};
132
133/*
134 * Updating the 3 TLS descriptors in the GDT on every task switch is
135 * surprisingly expensive so we avoid updating them if they haven't
136 * changed. Since Xen writes different descriptors than the one
137 * passed in the update_descriptor hypercall we keep shadow copies to
138 * compare against.
139 */
140static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
141
127static void clamp_max_cpus(void) 142static void clamp_max_cpus(void)
128{ 143{
129#ifdef CONFIG_SMP 144#ifdef CONFIG_SMP
@@ -341,9 +356,7 @@ static void __init xen_init_cpuid_mask(void)
341 unsigned int xsave_mask; 356 unsigned int xsave_mask;
342 357
343 cpuid_leaf1_edx_mask = 358 cpuid_leaf1_edx_mask =
344 ~((1 << X86_FEATURE_MCE) | /* disable MCE */ 359 ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */
345 (1 << X86_FEATURE_MCA) | /* disable MCA */
346 (1 << X86_FEATURE_MTRR) | /* disable MTRR */
347 (1 << X86_FEATURE_ACC)); /* thermal monitoring */ 360 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
348 361
349 if (!xen_initial_domain()) 362 if (!xen_initial_domain())
@@ -540,12 +553,28 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
540 BUG(); 553 BUG();
541} 554}
542 555
556static inline bool desc_equal(const struct desc_struct *d1,
557 const struct desc_struct *d2)
558{
559 return d1->a == d2->a && d1->b == d2->b;
560}
561
543static void load_TLS_descriptor(struct thread_struct *t, 562static void load_TLS_descriptor(struct thread_struct *t,
544 unsigned int cpu, unsigned int i) 563 unsigned int cpu, unsigned int i)
545{ 564{
546 struct desc_struct *gdt = get_cpu_gdt_table(cpu); 565 struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i];
547 xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); 566 struct desc_struct *gdt;
548 struct multicall_space mc = __xen_mc_entry(0); 567 xmaddr_t maddr;
568 struct multicall_space mc;
569
570 if (desc_equal(shadow, &t->tls_array[i]))
571 return;
572
573 *shadow = t->tls_array[i];
574
575 gdt = get_cpu_gdt_table(cpu);
576 maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
577 mc = __xen_mc_entry(0);
549 578
550 MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); 579 MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
551} 580}
@@ -627,8 +656,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
627 /* 656 /*
628 * Look for known traps using IST, and substitute them 657 * Look for known traps using IST, and substitute them
629 * appropriately. The debugger ones are the only ones we care 658 * appropriately. The debugger ones are the only ones we care
630 * about. Xen will handle faults like double_fault and 659 * about. Xen will handle faults like double_fault,
631 * machine_check, so we should never see them. Warn if 660 * so we should never see them. Warn if
632 * there's an unexpected IST-using fault handler. 661 * there's an unexpected IST-using fault handler.
633 */ 662 */
634 if (addr == (unsigned long)debug) 663 if (addr == (unsigned long)debug)
@@ -643,7 +672,11 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
643 return 0; 672 return 0;
644#ifdef CONFIG_X86_MCE 673#ifdef CONFIG_X86_MCE
645 } else if (addr == (unsigned long)machine_check) { 674 } else if (addr == (unsigned long)machine_check) {
646 return 0; 675 /*
676 * when xen hypervisor inject vMCE to guest,
677 * use native mce handler to handle it
678 */
679 ;
647#endif 680#endif
648 } else { 681 } else {
649 /* Some other trap using IST? */ 682 /* Some other trap using IST? */
@@ -1437,64 +1470,155 @@ asmlinkage void __init xen_start_kernel(void)
1437#endif 1470#endif
1438} 1471}
1439 1472
1440static int init_hvm_pv_info(int *major, int *minor) 1473#ifdef CONFIG_XEN_PVHVM
1441{ 1474/*
1442 uint32_t eax, ebx, ecx, edx, pages, msr, base; 1475 * The pfn containing the shared_info is located somewhere in RAM. This
1443 u64 pfn; 1476 * will cause trouble if the current kernel is doing a kexec boot into a
1444 1477 * new kernel. The new kernel (and its startup code) can not know where
1445 base = xen_cpuid_base(); 1478 * the pfn is, so it can not reserve the page. The hypervisor will
1446 cpuid(base + 1, &eax, &ebx, &ecx, &edx); 1479 * continue to update the pfn, and as a result memory corruption occours
1447 1480 * in the new kernel.
1448 *major = eax >> 16; 1481 *
1449 *minor = eax & 0xffff; 1482 * One way to work around this issue is to allocate a page in the
1450 printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); 1483 * xen-platform pci device's BAR memory range. But pci init is done very
1451 1484 * late and the shared_info page is already in use very early to read
1452 cpuid(base + 2, &pages, &msr, &ecx, &edx); 1485 * the pvclock. So moving the pfn from RAM to MMIO is racy because some
1453 1486 * code paths on other vcpus could access the pfn during the small
1454 pfn = __pa(hypercall_page); 1487 * window when the old pfn is moved to the new pfn. There is even a
1455 wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); 1488 * small window were the old pfn is not backed by a mfn, and during that
1456 1489 * time all reads return -1.
1457 xen_setup_features(); 1490 *
1458 1491 * Because it is not known upfront where the MMIO region is located it
1459 pv_info.name = "Xen HVM"; 1492 * can not be used right from the start in xen_hvm_init_shared_info.
1460 1493 *
1461 xen_domain_type = XEN_HVM_DOMAIN; 1494 * To minimise trouble the move of the pfn is done shortly before kexec.
1495 * This does not eliminate the race because all vcpus are still online
1496 * when the syscore_ops will be called. But hopefully there is no work
1497 * pending at this point in time. Also the syscore_op is run last which
1498 * reduces the risk further.
1499 */
1462 1500
1463 return 0; 1501static struct shared_info *xen_hvm_shared_info;
1464}
1465 1502
1466void __ref xen_hvm_init_shared_info(void) 1503static void xen_hvm_connect_shared_info(unsigned long pfn)
1467{ 1504{
1468 int cpu;
1469 struct xen_add_to_physmap xatp; 1505 struct xen_add_to_physmap xatp;
1470 static struct shared_info *shared_info_page = 0;
1471 1506
1472 if (!shared_info_page)
1473 shared_info_page = (struct shared_info *)
1474 extend_brk(PAGE_SIZE, PAGE_SIZE);
1475 xatp.domid = DOMID_SELF; 1507 xatp.domid = DOMID_SELF;
1476 xatp.idx = 0; 1508 xatp.idx = 0;
1477 xatp.space = XENMAPSPACE_shared_info; 1509 xatp.space = XENMAPSPACE_shared_info;
1478 xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; 1510 xatp.gpfn = pfn;
1479 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 1511 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
1480 BUG(); 1512 BUG();
1481 1513
1482 HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; 1514}
1515static void xen_hvm_set_shared_info(struct shared_info *sip)
1516{
1517 int cpu;
1518
1519 HYPERVISOR_shared_info = sip;
1483 1520
1484 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info 1521 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
1485 * page, we use it in the event channel upcall and in some pvclock 1522 * page, we use it in the event channel upcall and in some pvclock
1486 * related functions. We don't need the vcpu_info placement 1523 * related functions. We don't need the vcpu_info placement
1487 * optimizations because we don't use any pv_mmu or pv_irq op on 1524 * optimizations because we don't use any pv_mmu or pv_irq op on
1488 * HVM. 1525 * HVM.
1489 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is 1526 * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is
1490 * online but xen_hvm_init_shared_info is run at resume time too and 1527 * online but xen_hvm_set_shared_info is run at resume time too and
1491 * in that case multiple vcpus might be online. */ 1528 * in that case multiple vcpus might be online. */
1492 for_each_online_cpu(cpu) { 1529 for_each_online_cpu(cpu) {
1493 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1530 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1494 } 1531 }
1495} 1532}
1496 1533
1497#ifdef CONFIG_XEN_PVHVM 1534/* Reconnect the shared_info pfn to a mfn */
1535void xen_hvm_resume_shared_info(void)
1536{
1537 xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
1538}
1539
1540#ifdef CONFIG_KEXEC
1541static struct shared_info *xen_hvm_shared_info_kexec;
1542static unsigned long xen_hvm_shared_info_pfn_kexec;
1543
1544/* Remember a pfn in MMIO space for kexec reboot */
1545void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn)
1546{
1547 xen_hvm_shared_info_kexec = sip;
1548 xen_hvm_shared_info_pfn_kexec = pfn;
1549}
1550
1551static void xen_hvm_syscore_shutdown(void)
1552{
1553 struct xen_memory_reservation reservation = {
1554 .domid = DOMID_SELF,
1555 .nr_extents = 1,
1556 };
1557 unsigned long prev_pfn;
1558 int rc;
1559
1560 if (!xen_hvm_shared_info_kexec)
1561 return;
1562
1563 prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT;
1564 set_xen_guest_handle(reservation.extent_start, &prev_pfn);
1565
1566 /* Move pfn to MMIO, disconnects previous pfn from mfn */
1567 xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec);
1568
1569 /* Update pointers, following hypercall is also a memory barrier */
1570 xen_hvm_set_shared_info(xen_hvm_shared_info_kexec);
1571
1572 /* Allocate new mfn for previous pfn */
1573 do {
1574 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
1575 if (rc == 0)
1576 msleep(123);
1577 } while (rc == 0);
1578
1579 /* Make sure the previous pfn is really connected to a (new) mfn */
1580 BUG_ON(rc != 1);
1581}
1582
1583static struct syscore_ops xen_hvm_syscore_ops = {
1584 .shutdown = xen_hvm_syscore_shutdown,
1585};
1586#endif
1587
1588/* Use a pfn in RAM, may move to MMIO before kexec. */
1589static void __init xen_hvm_init_shared_info(void)
1590{
1591 /* Remember pointer for resume */
1592 xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
1593 xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
1594 xen_hvm_set_shared_info(xen_hvm_shared_info);
1595}
1596
1597static void __init init_hvm_pv_info(void)
1598{
1599 int major, minor;
1600 uint32_t eax, ebx, ecx, edx, pages, msr, base;
1601 u64 pfn;
1602
1603 base = xen_cpuid_base();
1604 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1605
1606 major = eax >> 16;
1607 minor = eax & 0xffff;
1608 printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
1609
1610 cpuid(base + 2, &pages, &msr, &ecx, &edx);
1611
1612 pfn = __pa(hypercall_page);
1613 wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
1614
1615 xen_setup_features();
1616
1617 pv_info.name = "Xen HVM";
1618
1619 xen_domain_type = XEN_HVM_DOMAIN;
1620}
1621
1498static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, 1622static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
1499 unsigned long action, void *hcpu) 1623 unsigned long action, void *hcpu)
1500{ 1624{
@@ -1517,14 +1641,12 @@ static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = {
1517 1641
1518static void __init xen_hvm_guest_init(void) 1642static void __init xen_hvm_guest_init(void)
1519{ 1643{
1520 int r; 1644 init_hvm_pv_info();
1521 int major, minor;
1522
1523 r = init_hvm_pv_info(&major, &minor);
1524 if (r < 0)
1525 return;
1526 1645
1527 xen_hvm_init_shared_info(); 1646 xen_hvm_init_shared_info();
1647#ifdef CONFIG_KEXEC
1648 register_syscore_ops(&xen_hvm_syscore_ops);
1649#endif
1528 1650
1529 if (xen_feature(XENFEAT_hvm_callback_vector)) 1651 if (xen_feature(XENFEAT_hvm_callback_vector))
1530 xen_have_vector_callback = 1; 1652 xen_have_vector_callback = 1;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3a73785631ce..27336dfcda8e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -308,8 +308,20 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
308 308
309static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) 309static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
310{ 310{
311 if (!xen_batched_set_pte(ptep, pteval)) 311 if (!xen_batched_set_pte(ptep, pteval)) {
312 native_set_pte(ptep, pteval); 312 /*
313 * Could call native_set_pte() here and trap and
314 * emulate the PTE write but with 32-bit guests this
315 * needs two traps (one for each of the two 32-bit
316 * words in the PTE) so do one hypercall directly
317 * instead.
318 */
319 struct mmu_update u;
320
321 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
322 u.val = pte_val_ma(pteval);
323 HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
324 }
313} 325}
314 326
315static void xen_set_pte(pte_t *ptep, pte_t pteval) 327static void xen_set_pte(pte_t *ptep, pte_t pteval)
@@ -1416,13 +1428,28 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1416} 1428}
1417#endif /* CONFIG_X86_64 */ 1429#endif /* CONFIG_X86_64 */
1418 1430
1419/* Init-time set_pte while constructing initial pagetables, which 1431/*
1420 doesn't allow RO pagetable pages to be remapped RW */ 1432 * Init-time set_pte while constructing initial pagetables, which
1433 * doesn't allow RO page table pages to be remapped RW.
1434 *
1435 * If there is no MFN for this PFN then this page is initially
1436 * ballooned out so clear the PTE (as in decrease_reservation() in
1437 * drivers/xen/balloon.c).
1438 *
1439 * Many of these PTE updates are done on unpinned and writable pages
1440 * and doing a hypercall for these is unnecessary and expensive. At
1441 * this point it is not possible to tell if a page is pinned or not,
1442 * so always write the PTE directly and rely on Xen trapping and
1443 * emulating any updates as necessary.
1444 */
1421static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) 1445static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
1422{ 1446{
1423 pte = mask_rw_pte(ptep, pte); 1447 if (pte_mfn(pte) != INVALID_P2M_ENTRY)
1448 pte = mask_rw_pte(ptep, pte);
1449 else
1450 pte = __pte_ma(0);
1424 1451
1425 xen_set_pte(ptep, pte); 1452 native_set_pte(ptep, pte);
1426} 1453}
1427 1454
1428static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 1455static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a4790bf22c59..ead85576d54a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -157,25 +157,24 @@ static unsigned long __init xen_populate_chunk(
157 unsigned long dest_pfn; 157 unsigned long dest_pfn;
158 158
159 for (i = 0, entry = list; i < map_size; i++, entry++) { 159 for (i = 0, entry = list; i < map_size; i++, entry++) {
160 unsigned long credits = credits_left;
161 unsigned long s_pfn; 160 unsigned long s_pfn;
162 unsigned long e_pfn; 161 unsigned long e_pfn;
163 unsigned long pfns; 162 unsigned long pfns;
164 long capacity; 163 long capacity;
165 164
166 if (credits <= 0) 165 if (credits_left <= 0)
167 break; 166 break;
168 167
169 if (entry->type != E820_RAM) 168 if (entry->type != E820_RAM)
170 continue; 169 continue;
171 170
172 e_pfn = PFN_UP(entry->addr + entry->size); 171 e_pfn = PFN_DOWN(entry->addr + entry->size);
173 172
174 /* We only care about E820 after the xen_start_info->nr_pages */ 173 /* We only care about E820 after the xen_start_info->nr_pages */
175 if (e_pfn <= max_pfn) 174 if (e_pfn <= max_pfn)
176 continue; 175 continue;
177 176
178 s_pfn = PFN_DOWN(entry->addr); 177 s_pfn = PFN_UP(entry->addr);
179 /* If the E820 falls within the nr_pages, we want to start 178 /* If the E820 falls within the nr_pages, we want to start
180 * at the nr_pages PFN. 179 * at the nr_pages PFN.
181 * If that would mean going past the E820 entry, skip it 180 * If that would mean going past the E820 entry, skip it
@@ -184,23 +183,19 @@ static unsigned long __init xen_populate_chunk(
184 capacity = e_pfn - max_pfn; 183 capacity = e_pfn - max_pfn;
185 dest_pfn = max_pfn; 184 dest_pfn = max_pfn;
186 } else { 185 } else {
187 /* last_pfn MUST be within E820_RAM regions */
188 if (*last_pfn && e_pfn >= *last_pfn)
189 s_pfn = *last_pfn;
190 capacity = e_pfn - s_pfn; 186 capacity = e_pfn - s_pfn;
191 dest_pfn = s_pfn; 187 dest_pfn = s_pfn;
192 } 188 }
193 /* If we had filled this E820_RAM entry, go to the next one. */
194 if (capacity <= 0)
195 continue;
196 189
197 if (credits > capacity) 190 if (credits_left < capacity)
198 credits = capacity; 191 capacity = credits_left;
199 192
200 pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false); 193 pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
201 done += pfns; 194 done += pfns;
202 credits_left -= pfns;
203 *last_pfn = (dest_pfn + pfns); 195 *last_pfn = (dest_pfn + pfns);
196 if (pfns < capacity)
197 break;
198 credits_left -= pfns;
204 } 199 }
205 return done; 200 return done;
206} 201}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 45329c8c226e..ae8a00c39de4 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
30{ 30{
31#ifdef CONFIG_XEN_PVHVM 31#ifdef CONFIG_XEN_PVHVM
32 int cpu; 32 int cpu;
33 xen_hvm_init_shared_info(); 33 xen_hvm_resume_shared_info();
34 xen_callback_vector(); 34 xen_callback_vector();
35 xen_unplug_emulated_devices(); 35 xen_unplug_emulated_devices();
36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) { 36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 202d4c150154..1e4329e04e0f 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -41,7 +41,7 @@ void xen_enable_syscall(void);
41void xen_vcpu_restore(void); 41void xen_vcpu_restore(void);
42 42
43void xen_callback_vector(void); 43void xen_callback_vector(void);
44void xen_hvm_init_shared_info(void); 44void xen_hvm_resume_shared_info(void);
45void xen_unplug_emulated_devices(void); 45void xen_unplug_emulated_devices(void);
46 46
47void __init xen_build_dynamic_phys_to_machine(void); 47void __init xen_build_dynamic_phys_to_machine(void);
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 944eaeb8e0cf..1e456dca4f60 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -209,11 +209,10 @@ static int xen_hvm_console_init(void)
209 info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); 209 info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
210 if (!info) 210 if (!info)
211 return -ENOMEM; 211 return -ENOMEM;
212 } 212 } else if (info->intf != NULL) {
213 213 /* already configured */
214 /* already configured */
215 if (info->intf != NULL)
216 return 0; 214 return 0;
215 }
217 /* 216 /*
218 * If the toolstack (or the hypervisor) hasn't set these values, the 217 * If the toolstack (or the hypervisor) hasn't set these values, the
219 * default value is 0. Even though mfn = 0 and evtchn = 0 are 218 * default value is 0. Even though mfn = 0 and evtchn = 0 are
@@ -259,12 +258,10 @@ static int xen_pv_console_init(void)
259 info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); 258 info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
260 if (!info) 259 if (!info)
261 return -ENOMEM; 260 return -ENOMEM;
262 } 261 } else if (info->intf != NULL) {
263 262 /* already configured */
264 /* already configured */
265 if (info->intf != NULL)
266 return 0; 263 return 0;
267 264 }
268 info->evtchn = xen_start_info->console.domU.evtchn; 265 info->evtchn = xen_start_info->console.domU.evtchn;
269 info->intf = mfn_to_virt(xen_start_info->console.domU.mfn); 266 info->intf = mfn_to_virt(xen_start_info->console.domU.mfn);
270 info->vtermno = HVC_COOKIE; 267 info->vtermno = HVC_COOKIE;
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 8d2501e604dd..d4dffcd52873 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -196,4 +196,12 @@ config XEN_ACPI_PROCESSOR
196 called xen_acpi_processor If you do not know what to choose, select 196 called xen_acpi_processor If you do not know what to choose, select
197 M here. If the CPUFREQ drivers are built in, select Y here. 197 M here. If the CPUFREQ drivers are built in, select Y here.
198 198
199config XEN_MCE_LOG
200 bool "Xen platform mcelog"
201 depends on XEN_DOM0 && X86_64 && X86_MCE
202 default n
203 help
204 Allow kernel fetching MCE error from Xen platform and
205 converting it into Linux mcelog format for mcelog tools
206
199endmenu 207endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index fc3488631136..d80bea5535a2 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -17,7 +17,9 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
17obj-$(CONFIG_XEN_PVHVM) += platform-pci.o 17obj-$(CONFIG_XEN_PVHVM) += platform-pci.o
18obj-$(CONFIG_XEN_TMEM) += tmem.o 18obj-$(CONFIG_XEN_TMEM) += tmem.o
19obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o 19obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
20obj-$(CONFIG_XEN_DOM0) += pcpu.o
20obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o 21obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o
22obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o
21obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ 23obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
22obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o 24obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
23obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o 25obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
new file mode 100644
index 000000000000..8feee08bcb43
--- /dev/null
+++ b/drivers/xen/mcelog.c
@@ -0,0 +1,414 @@
1/******************************************************************************
2 * mcelog.c
3 * Driver for receiving and transferring machine check error infomation
4 *
5 * Copyright (c) 2012 Intel Corporation
6 * Author: Liu, Jinsong <jinsong.liu@intel.com>
7 * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
8 * Author: Ke, Liping <liping.ke@intel.com>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 * IN THE SOFTWARE.
33 */
34
35#include <linux/init.h>
36#include <linux/types.h>
37#include <linux/kernel.h>
38#include <linux/slab.h>
39#include <linux/fs.h>
40#include <linux/device.h>
41#include <linux/miscdevice.h>
42#include <linux/uaccess.h>
43#include <linux/capability.h>
44#include <linux/poll.h>
45#include <linux/sched.h>
46
47#include <xen/interface/xen.h>
48#include <xen/events.h>
49#include <xen/interface/vcpu.h>
50#include <xen/xen.h>
51#include <asm/xen/hypercall.h>
52#include <asm/xen/hypervisor.h>
53
54#define XEN_MCELOG "xen_mcelog: "
55
56static struct mc_info g_mi;
57static struct mcinfo_logical_cpu *g_physinfo;
58static uint32_t ncpus;
59
60static DEFINE_MUTEX(mcelog_lock);
61
62static struct xen_mce_log xen_mcelog = {
63 .signature = XEN_MCE_LOG_SIGNATURE,
64 .len = XEN_MCE_LOG_LEN,
65 .recordlen = sizeof(struct xen_mce),
66};
67
68static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock);
69static int xen_mce_chrdev_open_count; /* #times opened */
70static int xen_mce_chrdev_open_exclu; /* already open exclusive? */
71
72static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait);
73
74static int xen_mce_chrdev_open(struct inode *inode, struct file *file)
75{
76 spin_lock(&xen_mce_chrdev_state_lock);
77
78 if (xen_mce_chrdev_open_exclu ||
79 (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
80 spin_unlock(&xen_mce_chrdev_state_lock);
81
82 return -EBUSY;
83 }
84
85 if (file->f_flags & O_EXCL)
86 xen_mce_chrdev_open_exclu = 1;
87 xen_mce_chrdev_open_count++;
88
89 spin_unlock(&xen_mce_chrdev_state_lock);
90
91 return nonseekable_open(inode, file);
92}
93
94static int xen_mce_chrdev_release(struct inode *inode, struct file *file)
95{
96 spin_lock(&xen_mce_chrdev_state_lock);
97
98 xen_mce_chrdev_open_count--;
99 xen_mce_chrdev_open_exclu = 0;
100
101 spin_unlock(&xen_mce_chrdev_state_lock);
102
103 return 0;
104}
105
106static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf,
107 size_t usize, loff_t *off)
108{
109 char __user *buf = ubuf;
110 unsigned num;
111 int i, err;
112
113 mutex_lock(&mcelog_lock);
114
115 num = xen_mcelog.next;
116
117 /* Only supports full reads right now */
118 err = -EINVAL;
119 if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce))
120 goto out;
121
122 err = 0;
123 for (i = 0; i < num; i++) {
124 struct xen_mce *m = &xen_mcelog.entry[i];
125
126 err |= copy_to_user(buf, m, sizeof(*m));
127 buf += sizeof(*m);
128 }
129
130 memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce));
131 xen_mcelog.next = 0;
132
133 if (err)
134 err = -EFAULT;
135
136out:
137 mutex_unlock(&mcelog_lock);
138
139 return err ? err : buf - ubuf;
140}
141
142static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait)
143{
144 poll_wait(file, &xen_mce_chrdev_wait, wait);
145
146 if (xen_mcelog.next)
147 return POLLIN | POLLRDNORM;
148
149 return 0;
150}
151
152static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd,
153 unsigned long arg)
154{
155 int __user *p = (int __user *)arg;
156
157 if (!capable(CAP_SYS_ADMIN))
158 return -EPERM;
159
160 switch (cmd) {
161 case MCE_GET_RECORD_LEN:
162 return put_user(sizeof(struct xen_mce), p);
163 case MCE_GET_LOG_LEN:
164 return put_user(XEN_MCE_LOG_LEN, p);
165 case MCE_GETCLEAR_FLAGS: {
166 unsigned flags;
167
168 do {
169 flags = xen_mcelog.flags;
170 } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags);
171
172 return put_user(flags, p);
173 }
174 default:
175 return -ENOTTY;
176 }
177}
178
179static const struct file_operations xen_mce_chrdev_ops = {
180 .open = xen_mce_chrdev_open,
181 .release = xen_mce_chrdev_release,
182 .read = xen_mce_chrdev_read,
183 .poll = xen_mce_chrdev_poll,
184 .unlocked_ioctl = xen_mce_chrdev_ioctl,
185 .llseek = no_llseek,
186};
187
188static struct miscdevice xen_mce_chrdev_device = {
189 MISC_MCELOG_MINOR,
190 "mcelog",
191 &xen_mce_chrdev_ops,
192};
193
194/*
195 * Caller should hold the mcelog_lock
196 */
197static void xen_mce_log(struct xen_mce *mce)
198{
199 unsigned entry;
200
201 entry = xen_mcelog.next;
202
203 /*
204 * When the buffer fills up discard new entries.
205 * Assume that the earlier errors are the more
206 * interesting ones:
207 */
208 if (entry >= XEN_MCE_LOG_LEN) {
209 set_bit(XEN_MCE_OVERFLOW,
210 (unsigned long *)&xen_mcelog.flags);
211 return;
212 }
213
214 memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce));
215
216 xen_mcelog.next++;
217}
218
219static int convert_log(struct mc_info *mi)
220{
221 struct mcinfo_common *mic;
222 struct mcinfo_global *mc_global;
223 struct mcinfo_bank *mc_bank;
224 struct xen_mce m;
225 uint32_t i;
226
227 mic = NULL;
228 x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
229 if (unlikely(!mic)) {
230 pr_warning(XEN_MCELOG "Failed to find global error info\n");
231 return -ENODEV;
232 }
233
234 memset(&m, 0, sizeof(struct xen_mce));
235
236 mc_global = (struct mcinfo_global *)mic;
237 m.mcgstatus = mc_global->mc_gstatus;
238 m.apicid = mc_global->mc_apicid;
239
240 for (i = 0; i < ncpus; i++)
241 if (g_physinfo[i].mc_apicid == m.apicid)
242 break;
243 if (unlikely(i == ncpus)) {
244 pr_warning(XEN_MCELOG "Failed to match cpu with apicid %d\n",
245 m.apicid);
246 return -ENODEV;
247 }
248
249 m.socketid = g_physinfo[i].mc_chipid;
250 m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
251 m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
252 m.mcgcap = g_physinfo[i].mc_msrvalues[__MC_MSR_MCGCAP].value;
253
254 mic = NULL;
255 x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
256 if (unlikely(!mic)) {
257 pr_warning(XEN_MCELOG "Fail to find bank error info\n");
258 return -ENODEV;
259 }
260
261 do {
262 if ((!mic) || (mic->size == 0) ||
263 (mic->type != MC_TYPE_GLOBAL &&
264 mic->type != MC_TYPE_BANK &&
265 mic->type != MC_TYPE_EXTENDED &&
266 mic->type != MC_TYPE_RECOVERY))
267 break;
268
269 if (mic->type == MC_TYPE_BANK) {
270 mc_bank = (struct mcinfo_bank *)mic;
271 m.misc = mc_bank->mc_misc;
272 m.status = mc_bank->mc_status;
273 m.addr = mc_bank->mc_addr;
274 m.tsc = mc_bank->mc_tsc;
275 m.bank = mc_bank->mc_bank;
276 m.finished = 1;
277 /*log this record*/
278 xen_mce_log(&m);
279 }
280 mic = x86_mcinfo_next(mic);
281 } while (1);
282
283 return 0;
284}
285
286static int mc_queue_handle(uint32_t flags)
287{
288 struct xen_mc mc_op;
289 int ret = 0;
290
291 mc_op.cmd = XEN_MC_fetch;
292 mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
293 set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi);
294 do {
295 mc_op.u.mc_fetch.flags = flags;
296 ret = HYPERVISOR_mca(&mc_op);
297 if (ret) {
298 pr_err(XEN_MCELOG "Failed to fetch %s error log\n",
299 (flags == XEN_MC_URGENT) ?
300 "urgnet" : "nonurgent");
301 break;
302 }
303
304 if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
305 mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
306 break;
307 else {
308 ret = convert_log(&g_mi);
309 if (ret)
310 pr_warning(XEN_MCELOG
311 "Failed to convert this error log, "
312 "continue acking it anyway\n");
313
314 mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK;
315 ret = HYPERVISOR_mca(&mc_op);
316 if (ret) {
317 pr_err(XEN_MCELOG
318 "Failed to ack previous error log\n");
319 break;
320 }
321 }
322 } while (1);
323
324 return ret;
325}
326
327/* virq handler for machine check error info*/
328static void xen_mce_work_fn(struct work_struct *work)
329{
330 int err;
331
332 mutex_lock(&mcelog_lock);
333
334 /* urgent mc_info */
335 err = mc_queue_handle(XEN_MC_URGENT);
336 if (err)
337 pr_err(XEN_MCELOG
338 "Failed to handle urgent mc_info queue, "
339 "continue handling nonurgent mc_info queue anyway.\n");
340
341 /* nonurgent mc_info */
342 err = mc_queue_handle(XEN_MC_NONURGENT);
343 if (err)
344 pr_err(XEN_MCELOG
345 "Failed to handle nonurgent mc_info queue.\n");
346
347 /* wake processes polling /dev/mcelog */
348 wake_up_interruptible(&xen_mce_chrdev_wait);
349
350 mutex_unlock(&mcelog_lock);
351}
352static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
353
354static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
355{
356 schedule_work(&xen_mce_work);
357 return IRQ_HANDLED;
358}
359
360static int bind_virq_for_mce(void)
361{
362 int ret;
363 struct xen_mc mc_op;
364
365 memset(&mc_op, 0, sizeof(struct xen_mc));
366
367 /* Fetch physical CPU Numbers */
368 mc_op.cmd = XEN_MC_physcpuinfo;
369 mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
370 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
371 ret = HYPERVISOR_mca(&mc_op);
372 if (ret) {
373 pr_err(XEN_MCELOG "Failed to get CPU numbers\n");
374 return ret;
375 }
376
377 /* Fetch each CPU Physical Info for later reference*/
378 ncpus = mc_op.u.mc_physcpuinfo.ncpus;
379 g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu),
380 GFP_KERNEL);
381 if (!g_physinfo)
382 return -ENOMEM;
383 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
384 ret = HYPERVISOR_mca(&mc_op);
385 if (ret) {
386 pr_err(XEN_MCELOG "Failed to get CPU info\n");
387 kfree(g_physinfo);
388 return ret;
389 }
390
391 ret = bind_virq_to_irqhandler(VIRQ_MCA, 0,
392 xen_mce_interrupt, 0, "mce", NULL);
393 if (ret < 0) {
394 pr_err(XEN_MCELOG "Failed to bind virq\n");
395 kfree(g_physinfo);
396 return ret;
397 }
398
399 return 0;
400}
401
402static int __init xen_late_init_mcelog(void)
403{
404 /* Only DOM0 is responsible for MCE logging */
405 if (xen_initial_domain()) {
406 /* register character device /dev/mcelog for xen mcelog */
407 if (misc_register(&xen_mce_chrdev_device))
408 return -ENODEV;
409 return bind_virq_for_mce();
410 }
411
412 return -ENODEV;
413}
414device_initcall(xen_late_init_mcelog);
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
new file mode 100644
index 000000000000..067fcfa1723e
--- /dev/null
+++ b/drivers/xen/pcpu.c
@@ -0,0 +1,371 @@
1/******************************************************************************
2 * pcpu.c
3 * Management physical cpu in dom0, get pcpu info and provide sys interface
4 *
5 * Copyright (c) 2012 Intel Corporation
6 * Author: Liu, Jinsong <jinsong.liu@intel.com>
7 * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#include <linux/interrupt.h>
35#include <linux/spinlock.h>
36#include <linux/cpu.h>
37#include <linux/stat.h>
38#include <linux/capability.h>
39
40#include <xen/xen.h>
41#include <xen/xenbus.h>
42#include <xen/events.h>
43#include <xen/interface/platform.h>
44#include <asm/xen/hypervisor.h>
45#include <asm/xen/hypercall.h>
46
47#define XEN_PCPU "xen_cpu: "
48
49/*
50 * @cpu_id: Xen physical cpu logic number
51 * @flags: Xen physical cpu status flag
52 * - XEN_PCPU_FLAGS_ONLINE: cpu is online
53 * - XEN_PCPU_FLAGS_INVALID: cpu is not present
54 */
55struct pcpu {
56 struct list_head list;
57 struct device dev;
58 uint32_t cpu_id;
59 uint32_t flags;
60};
61
62static struct bus_type xen_pcpu_subsys = {
63 .name = "xen_cpu",
64 .dev_name = "xen_cpu",
65};
66
67static DEFINE_MUTEX(xen_pcpu_lock);
68
69static LIST_HEAD(xen_pcpus);
70
71static int xen_pcpu_down(uint32_t cpu_id)
72{
73 struct xen_platform_op op = {
74 .cmd = XENPF_cpu_offline,
75 .interface_version = XENPF_INTERFACE_VERSION,
76 .u.cpu_ol.cpuid = cpu_id,
77 };
78
79 return HYPERVISOR_dom0_op(&op);
80}
81
82static int xen_pcpu_up(uint32_t cpu_id)
83{
84 struct xen_platform_op op = {
85 .cmd = XENPF_cpu_online,
86 .interface_version = XENPF_INTERFACE_VERSION,
87 .u.cpu_ol.cpuid = cpu_id,
88 };
89
90 return HYPERVISOR_dom0_op(&op);
91}
92
93static ssize_t show_online(struct device *dev,
94 struct device_attribute *attr,
95 char *buf)
96{
97 struct pcpu *cpu = container_of(dev, struct pcpu, dev);
98
99 return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE));
100}
101
102static ssize_t __ref store_online(struct device *dev,
103 struct device_attribute *attr,
104 const char *buf, size_t count)
105{
106 struct pcpu *pcpu = container_of(dev, struct pcpu, dev);
107 unsigned long long val;
108 ssize_t ret;
109
110 if (!capable(CAP_SYS_ADMIN))
111 return -EPERM;
112
113 if (kstrtoull(buf, 0, &val) < 0)
114 return -EINVAL;
115
116 switch (val) {
117 case 0:
118 ret = xen_pcpu_down(pcpu->cpu_id);
119 break;
120 case 1:
121 ret = xen_pcpu_up(pcpu->cpu_id);
122 break;
123 default:
124 ret = -EINVAL;
125 }
126
127 if (ret >= 0)
128 ret = count;
129 return ret;
130}
131static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);
132
133static bool xen_pcpu_online(uint32_t flags)
134{
135 return !!(flags & XEN_PCPU_FLAGS_ONLINE);
136}
137
138static void pcpu_online_status(struct xenpf_pcpuinfo *info,
139 struct pcpu *pcpu)
140{
141 if (xen_pcpu_online(info->flags) &&
142 !xen_pcpu_online(pcpu->flags)) {
143 /* the pcpu is onlined */
144 pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
145 kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
146 } else if (!xen_pcpu_online(info->flags) &&
147 xen_pcpu_online(pcpu->flags)) {
148 /* The pcpu is offlined */
149 pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
150 kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
151 }
152}
153
154static struct pcpu *get_pcpu(uint32_t cpu_id)
155{
156 struct pcpu *pcpu;
157
158 list_for_each_entry(pcpu, &xen_pcpus, list) {
159 if (pcpu->cpu_id == cpu_id)
160 return pcpu;
161 }
162
163 return NULL;
164}
165
166static void pcpu_release(struct device *dev)
167{
168 struct pcpu *pcpu = container_of(dev, struct pcpu, dev);
169
170 list_del(&pcpu->list);
171 kfree(pcpu);
172}
173
174static void unregister_and_remove_pcpu(struct pcpu *pcpu)
175{
176 struct device *dev;
177
178 if (!pcpu)
179 return;
180
181 dev = &pcpu->dev;
182 if (dev->id)
183 device_remove_file(dev, &dev_attr_online);
184
185 /* pcpu remove would be implicitly done */
186 device_unregister(dev);
187}
188
189static int register_pcpu(struct pcpu *pcpu)
190{
191 struct device *dev;
192 int err = -EINVAL;
193
194 if (!pcpu)
195 return err;
196
197 dev = &pcpu->dev;
198 dev->bus = &xen_pcpu_subsys;
199 dev->id = pcpu->cpu_id;
200 dev->release = pcpu_release;
201
202 err = device_register(dev);
203 if (err) {
204 pcpu_release(dev);
205 return err;
206 }
207
208 /*
209 * Xen never offline cpu0 due to several restrictions
210 * and assumptions. This basically doesn't add a sys control
211 * to user, one cannot attempt to offline BSP.
212 */
213 if (dev->id) {
214 err = device_create_file(dev, &dev_attr_online);
215 if (err) {
216 device_unregister(dev);
217 return err;
218 }
219 }
220
221 return 0;
222}
223
224static struct pcpu *create_and_register_pcpu(struct xenpf_pcpuinfo *info)
225{
226 struct pcpu *pcpu;
227 int err;
228
229 if (info->flags & XEN_PCPU_FLAGS_INVALID)
230 return ERR_PTR(-ENODEV);
231
232 pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
233 if (!pcpu)
234 return ERR_PTR(-ENOMEM);
235
236 INIT_LIST_HEAD(&pcpu->list);
237 pcpu->cpu_id = info->xen_cpuid;
238 pcpu->flags = info->flags;
239
240 /* Need hold on xen_pcpu_lock before pcpu list manipulations */
241 list_add_tail(&pcpu->list, &xen_pcpus);
242
243 err = register_pcpu(pcpu);
244 if (err) {
245 pr_warning(XEN_PCPU "Failed to register pcpu%u\n",
246 info->xen_cpuid);
247 return ERR_PTR(-ENOENT);
248 }
249
250 return pcpu;
251}
252
253/*
254 * Caller should hold the xen_pcpu_lock
255 */
256static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu)
257{
258 int ret;
259 struct pcpu *pcpu = NULL;
260 struct xenpf_pcpuinfo *info;
261 struct xen_platform_op op = {
262 .cmd = XENPF_get_cpuinfo,
263 .interface_version = XENPF_INTERFACE_VERSION,
264 .u.pcpu_info.xen_cpuid = cpu,
265 };
266
267 ret = HYPERVISOR_dom0_op(&op);
268 if (ret)
269 return ret;
270
271 info = &op.u.pcpu_info;
272 if (max_cpu)
273 *max_cpu = info->max_present;
274
275 pcpu = get_pcpu(cpu);
276
277 /*
278 * Only those at cpu present map has its sys interface.
279 */
280 if (info->flags & XEN_PCPU_FLAGS_INVALID) {
281 if (pcpu)
282 unregister_and_remove_pcpu(pcpu);
283 return 0;
284 }
285
286 if (!pcpu) {
287 pcpu = create_and_register_pcpu(info);
288 if (IS_ERR_OR_NULL(pcpu))
289 return -ENODEV;
290 } else
291 pcpu_online_status(info, pcpu);
292
293 return 0;
294}
295
296/*
297 * Sync dom0's pcpu information with xen hypervisor's
298 */
299static int xen_sync_pcpus(void)
300{
301 /*
302 * Boot cpu always have cpu_id 0 in xen
303 */
304 uint32_t cpu = 0, max_cpu = 0;
305 int err = 0;
306 struct pcpu *pcpu, *tmp;
307
308 mutex_lock(&xen_pcpu_lock);
309
310 while (!err && (cpu <= max_cpu)) {
311 err = sync_pcpu(cpu, &max_cpu);
312 cpu++;
313 }
314
315 if (err)
316 list_for_each_entry_safe(pcpu, tmp, &xen_pcpus, list)
317 unregister_and_remove_pcpu(pcpu);
318
319 mutex_unlock(&xen_pcpu_lock);
320
321 return err;
322}
323
324static void xen_pcpu_work_fn(struct work_struct *work)
325{
326 xen_sync_pcpus();
327}
328static DECLARE_WORK(xen_pcpu_work, xen_pcpu_work_fn);
329
330static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
331{
332 schedule_work(&xen_pcpu_work);
333 return IRQ_HANDLED;
334}
335
336static int __init xen_pcpu_init(void)
337{
338 int irq, ret;
339
340 if (!xen_initial_domain())
341 return -ENODEV;
342
343 irq = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
344 xen_pcpu_interrupt, 0,
345 "xen-pcpu", NULL);
346 if (irq < 0) {
347 pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
348 return irq;
349 }
350
351 ret = subsys_system_register(&xen_pcpu_subsys, NULL);
352 if (ret) {
353 pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
354 goto err1;
355 }
356
357 ret = xen_sync_pcpus();
358 if (ret) {
359 pr_warning(XEN_PCPU "Failed to sync pcpu info\n");
360 goto err2;
361 }
362
363 return 0;
364
365err2:
366 bus_unregister(&xen_pcpu_subsys);
367err1:
368 unbind_from_irqhandler(irq, NULL);
369 return ret;
370}
371arch_initcall(xen_pcpu_init);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 2389e581e23c..d4c50d63acbc 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -101,6 +101,19 @@ static int platform_pci_resume(struct pci_dev *pdev)
101 return 0; 101 return 0;
102} 102}
103 103
104static void __devinit prepare_shared_info(void)
105{
106#ifdef CONFIG_KEXEC
107 unsigned long addr;
108 struct shared_info *hvm_shared_info;
109
110 addr = alloc_xen_mmio(PAGE_SIZE);
111 hvm_shared_info = ioremap(addr, PAGE_SIZE);
112 memset(hvm_shared_info, 0, PAGE_SIZE);
113 xen_hvm_prepare_kexec(hvm_shared_info, addr >> PAGE_SHIFT);
114#endif
115}
116
104static int __devinit platform_pci_init(struct pci_dev *pdev, 117static int __devinit platform_pci_init(struct pci_dev *pdev,
105 const struct pci_device_id *ent) 118 const struct pci_device_id *ent)
106{ 119{
@@ -109,6 +122,9 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
109 long mmio_addr, mmio_len; 122 long mmio_addr, mmio_len;
110 unsigned int max_nr_gframes; 123 unsigned int max_nr_gframes;
111 124
125 if (!xen_domain())
126 return -ENODEV;
127
112 i = pci_enable_device(pdev); 128 i = pci_enable_device(pdev);
113 if (i) 129 if (i)
114 return i; 130 return i;
@@ -135,6 +151,8 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
135 platform_mmio = mmio_addr; 151 platform_mmio = mmio_addr;
136 platform_mmiolen = mmio_len; 152 platform_mmiolen = mmio_len;
137 153
154 prepare_shared_info();
155
138 if (!xen_have_vector_callback) { 156 if (!xen_have_vector_callback) {
139 ret = xen_allocate_irq(pdev); 157 ret = xen_allocate_irq(pdev);
140 if (ret) { 158 if (ret) {
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 7ff2569e17ae..b590ee067fcd 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -520,15 +520,18 @@ static int __init xen_acpi_processor_init(void)
520 520
521 if (!pr_backup) { 521 if (!pr_backup) {
522 pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); 522 pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
523 memcpy(pr_backup, _pr, sizeof(struct acpi_processor)); 523 if (pr_backup)
524 memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
524 } 525 }
525 (void)upload_pm_data(_pr); 526 (void)upload_pm_data(_pr);
526 } 527 }
527 rc = check_acpi_ids(pr_backup); 528 rc = check_acpi_ids(pr_backup);
528 if (rc)
529 goto err_unregister;
530 529
531 kfree(pr_backup); 530 kfree(pr_backup);
531 pr_backup = NULL;
532
533 if (rc)
534 goto err_unregister;
532 535
533 return 0; 536 return 0;
534err_unregister: 537err_unregister:
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index d1c217b23a42..bce15cf4a8df 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -618,6 +618,23 @@ static struct xenbus_watch *find_watch(const char *token)
618 return NULL; 618 return NULL;
619} 619}
620 620
621static void xs_reset_watches(void)
622{
623 int err, supported = 0;
624
625 if (!xen_hvm_domain())
626 return;
627
628 err = xenbus_scanf(XBT_NIL, "control",
629 "platform-feature-xs_reset_watches", "%d", &supported);
630 if (err != 1 || !supported)
631 return;
632
633 err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
634 if (err && err != -EEXIST)
635 printk(KERN_WARNING "xs_reset_watches failed: %d\n", err);
636}
637
621/* Register callback to watch this node. */ 638/* Register callback to watch this node. */
622int register_xenbus_watch(struct xenbus_watch *watch) 639int register_xenbus_watch(struct xenbus_watch *watch)
623{ 640{
@@ -900,5 +917,8 @@ int xs_init(void)
900 if (IS_ERR(task)) 917 if (IS_ERR(task))
901 return PTR_ERR(task); 918 return PTR_ERR(task);
902 919
920 /* shutdown watches for kexec boot */
921 xs_reset_watches();
922
903 return 0; 923 return 0;
904} 924}
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 0549d2115507..e0deeb2cc939 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -35,6 +35,7 @@
35#define MPT_MINOR 220 35#define MPT_MINOR 220
36#define MPT2SAS_MINOR 221 36#define MPT2SAS_MINOR 221
37#define UINPUT_MINOR 223 37#define UINPUT_MINOR 223
38#define MISC_MCELOG_MINOR 227
38#define HPET_MINOR 228 39#define HPET_MINOR 228
39#define FUSE_MINOR 229 40#define FUSE_MINOR 229
40#define KVM_MINOR 232 41#define KVM_MINOR 232
diff --git a/include/xen/events.h b/include/xen/events.h
index 04399b28e821..9c641deb65d2 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -58,6 +58,8 @@ void notify_remote_via_irq(int irq);
58 58
59void xen_irq_resume(void); 59void xen_irq_resume(void);
60 60
61void xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn);
62
61/* Clear an irq's pending state, in preparation for polling on it */ 63/* Clear an irq's pending state, in preparation for polling on it */
62void xen_clear_irq_pending(int irq); 64void xen_clear_irq_pending(int irq);
63void xen_set_irq_pending(int irq); 65void xen_set_irq_pending(int irq);
diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h
index 7cdfca24eafb..794deb07eb53 100644
--- a/include/xen/interface/io/xs_wire.h
+++ b/include/xen/interface/io/xs_wire.h
@@ -29,7 +29,8 @@ enum xsd_sockmsg_type
29 XS_IS_DOMAIN_INTRODUCED, 29 XS_IS_DOMAIN_INTRODUCED,
30 XS_RESUME, 30 XS_RESUME,
31 XS_SET_TARGET, 31 XS_SET_TARGET,
32 XS_RESTRICT 32 XS_RESTRICT,
33 XS_RESET_WATCHES,
33}; 34};
34 35
35#define XS_WRITE_NONE "NONE" 36#define XS_WRITE_NONE "NONE"
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 486653f0dd8f..61fa66160983 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -314,6 +314,13 @@ struct xenpf_pcpuinfo {
314}; 314};
315DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo); 315DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo);
316 316
317#define XENPF_cpu_online 56
318#define XENPF_cpu_offline 57
319struct xenpf_cpu_ol {
320 uint32_t cpuid;
321};
322DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
323
317struct xen_platform_op { 324struct xen_platform_op {
318 uint32_t cmd; 325 uint32_t cmd;
319 uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ 326 uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -330,6 +337,7 @@ struct xen_platform_op {
330 struct xenpf_getidletime getidletime; 337 struct xenpf_getidletime getidletime;
331 struct xenpf_set_processor_pminfo set_pminfo; 338 struct xenpf_set_processor_pminfo set_pminfo;
332 struct xenpf_pcpuinfo pcpu_info; 339 struct xenpf_pcpuinfo pcpu_info;
340 struct xenpf_cpu_ol cpu_ol;
333 uint8_t pad[128]; 341 uint8_t pad[128];
334 } u; 342 } u;
335}; 343};
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 000000000000..73a4ea714d93
--- /dev/null
+++ b/include/xen/interface/xen-mca.h
@@ -0,0 +1,385 @@
1/******************************************************************************
2 * arch-x86/mca.h
3 * Guest OS machine check interface to x86 Xen.
4 *
5 * Contributed by Advanced Micro Devices, Inc.
6 * Author: Christoph Egger <Christoph.Egger@amd.com>
7 *
8 * Updated by Intel Corporation
9 * Author: Liu, Jinsong <jinsong.liu@intel.com>
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to
13 * deal in the Software without restriction, including without limitation the
14 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
15 * sell copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 * DEALINGS IN THE SOFTWARE.
28 */
29
30#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
31#define __XEN_PUBLIC_ARCH_X86_MCA_H__
32
33/* Hypercall */
34#define __HYPERVISOR_mca __HYPERVISOR_arch_0
35
36#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
37
38/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
39#define XEN_MC_NONURGENT 0x1
40/* IN: Dom0 calls hypercall to retrieve urgent error log entry */
41#define XEN_MC_URGENT 0x2
42/* IN: Dom0 acknowledges previosly-fetched error log entry */
43#define XEN_MC_ACK 0x4
44
45/* OUT: All is ok */
46#define XEN_MC_OK 0x0
47/* OUT: Domain could not fetch data. */
48#define XEN_MC_FETCHFAILED 0x1
49/* OUT: There was no machine check data to fetch. */
50#define XEN_MC_NODATA 0x2
51
52#ifndef __ASSEMBLY__
53/* vIRQ injected to Dom0 */
54#define VIRQ_MCA VIRQ_ARCH_0
55
56/*
57 * mc_info entry types
58 * mca machine check info are recorded in mc_info entries.
59 * when fetch mca info, it can use MC_TYPE_... to distinguish
60 * different mca info.
61 */
62#define MC_TYPE_GLOBAL 0
63#define MC_TYPE_BANK 1
64#define MC_TYPE_EXTENDED 2
65#define MC_TYPE_RECOVERY 3
66
67struct mcinfo_common {
68 uint16_t type; /* structure type */
69 uint16_t size; /* size of this struct in bytes */
70};
71
72#define MC_FLAG_CORRECTABLE (1 << 0)
73#define MC_FLAG_UNCORRECTABLE (1 << 1)
74#define MC_FLAG_RECOVERABLE (1 << 2)
75#define MC_FLAG_POLLED (1 << 3)
76#define MC_FLAG_RESET (1 << 4)
77#define MC_FLAG_CMCI (1 << 5)
78#define MC_FLAG_MCE (1 << 6)
79
80/* contains x86 global mc information */
81struct mcinfo_global {
82 struct mcinfo_common common;
83
84 uint16_t mc_domid; /* running domain at the time in error */
85 uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
86 uint32_t mc_socketid; /* physical socket of the physical core */
87 uint16_t mc_coreid; /* physical impacted core */
88 uint16_t mc_core_threadid; /* core thread of physical core */
89 uint32_t mc_apicid;
90 uint32_t mc_flags;
91 uint64_t mc_gstatus; /* global status */
92};
93
94/* contains x86 bank mc information */
95struct mcinfo_bank {
96 struct mcinfo_common common;
97
98 uint16_t mc_bank; /* bank nr */
99 uint16_t mc_domid; /* domain referenced by mc_addr if valid */
100 uint64_t mc_status; /* bank status */
101 uint64_t mc_addr; /* bank address */
102 uint64_t mc_misc;
103 uint64_t mc_ctrl2;
104 uint64_t mc_tsc;
105};
106
107struct mcinfo_msr {
108 uint64_t reg; /* MSR */
109 uint64_t value; /* MSR value */
110};
111
112/* contains mc information from other or additional mc MSRs */
113struct mcinfo_extended {
114 struct mcinfo_common common;
115 uint32_t mc_msrs; /* Number of msr with valid values. */
116 /*
117 * Currently Intel extended MSR (32/64) include all gp registers
118 * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
119 * useful at present. So expand this array to 16/32 to leave room.
120 */
121 struct mcinfo_msr mc_msr[sizeof(void *) * 4];
122};
123
124/* Recovery Action flags. Giving recovery result information to DOM0 */
125
126/* Xen takes successful recovery action, the error is recovered */
127#define REC_ACTION_RECOVERED (0x1 << 0)
128/* No action is performed by XEN */
129#define REC_ACTION_NONE (0x1 << 1)
130/* It's possible DOM0 might take action ownership in some case */
131#define REC_ACTION_NEED_RESET (0x1 << 2)
132
133/*
134 * Different Recovery Action types, if the action is performed successfully,
135 * REC_ACTION_RECOVERED flag will be returned.
136 */
137
138/* Page Offline Action */
139#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
140/* CPU offline Action */
141#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
142/* L3 cache disable Action */
143#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
144
145/*
146 * Below interface used between XEN/DOM0 for passing XEN's recovery action
147 * information to DOM0.
148 */
149struct page_offline_action {
150 /* Params for passing the offlined page number to DOM0 */
151 uint64_t mfn;
152 uint64_t status;
153};
154
155struct cpu_offline_action {
156 /* Params for passing the identity of the offlined CPU to DOM0 */
157 uint32_t mc_socketid;
158 uint16_t mc_coreid;
159 uint16_t mc_core_threadid;
160};
161
162#define MAX_UNION_SIZE 16
163struct mcinfo_recovery {
164 struct mcinfo_common common;
165 uint16_t mc_bank; /* bank nr */
166 uint8_t action_flags;
167 uint8_t action_types;
168 union {
169 struct page_offline_action page_retire;
170 struct cpu_offline_action cpu_offline;
171 uint8_t pad[MAX_UNION_SIZE];
172 } action_info;
173};
174
175
176#define MCINFO_MAXSIZE 768
177struct mc_info {
178 /* Number of mcinfo_* entries in mi_data */
179 uint32_t mi_nentries;
180 uint32_t flags;
181 uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
182};
183DEFINE_GUEST_HANDLE_STRUCT(mc_info);
184
185#define __MC_MSR_ARRAYSIZE 8
186#define __MC_MSR_MCGCAP 0
187#define __MC_NMSRS 1
188#define MC_NCAPS 7
189struct mcinfo_logical_cpu {
190 uint32_t mc_cpunr;
191 uint32_t mc_chipid;
192 uint16_t mc_coreid;
193 uint16_t mc_threadid;
194 uint32_t mc_apicid;
195 uint32_t mc_clusterid;
196 uint32_t mc_ncores;
197 uint32_t mc_ncores_active;
198 uint32_t mc_nthreads;
199 uint32_t mc_cpuid_level;
200 uint32_t mc_family;
201 uint32_t mc_vendor;
202 uint32_t mc_model;
203 uint32_t mc_step;
204 char mc_vendorid[16];
205 char mc_brandid[64];
206 uint32_t mc_cpu_caps[MC_NCAPS];
207 uint32_t mc_cache_size;
208 uint32_t mc_cache_alignment;
209 uint32_t mc_nmsrvals;
210 struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
211};
212DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
213
214/*
215 * Prototype:
216 * uint32_t x86_mcinfo_nentries(struct mc_info *mi);
217 */
218#define x86_mcinfo_nentries(_mi) \
219 ((_mi)->mi_nentries)
220/*
221 * Prototype:
222 * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
223 */
224#define x86_mcinfo_first(_mi) \
225 ((struct mcinfo_common *)(_mi)->mi_data)
226/*
227 * Prototype:
228 * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
229 */
230#define x86_mcinfo_next(_mic) \
231 ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
232
233/*
234 * Prototype:
235 * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
236 */
237static inline void x86_mcinfo_lookup(struct mcinfo_common **ret,
238 struct mc_info *mi, uint16_t type)
239{
240 uint32_t i;
241 struct mcinfo_common *mic;
242 bool found = 0;
243
244 if (!ret || !mi)
245 return;
246
247 mic = x86_mcinfo_first(mi);
248 for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
249 if (mic->type == type) {
250 found = 1;
251 break;
252 }
253 mic = x86_mcinfo_next(mic);
254 }
255
256 *ret = found ? mic : NULL;
257}
258
259/*
260 * Fetch machine check data from hypervisor.
261 */
262#define XEN_MC_fetch 1
263struct xen_mc_fetch {
264 /*
265 * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
266 * XEN_MC_ACK if ack'king an earlier fetch
267 * OUT: XEN_MC_OK, XEN_MC_FETCHAILED, XEN_MC_NODATA
268 */
269 uint32_t flags;
270 uint32_t _pad0;
271 /* OUT: id for ack, IN: id we are ack'ing */
272 uint64_t fetch_id;
273
274 /* OUT variables. */
275 GUEST_HANDLE(mc_info) data;
276};
277DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
278
279
280/*
281 * This tells the hypervisor to notify a DomU about the machine check error
282 */
283#define XEN_MC_notifydomain 2
284struct xen_mc_notifydomain {
285 /* IN variables */
286 uint16_t mc_domid; /* The unprivileged domain to notify */
287 uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify */
288
289 /* IN/OUT variables */
290 uint32_t flags;
291};
292DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
293
294#define XEN_MC_physcpuinfo 3
295struct xen_mc_physcpuinfo {
296 /* IN/OUT */
297 uint32_t ncpus;
298 uint32_t _pad0;
299 /* OUT */
300 GUEST_HANDLE(mcinfo_logical_cpu) info;
301};
302
303#define XEN_MC_msrinject 4
304#define MC_MSRINJ_MAXMSRS 8
305struct xen_mc_msrinject {
306 /* IN */
307 uint32_t mcinj_cpunr; /* target processor id */
308 uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */
309 uint32_t mcinj_count; /* 0 .. count-1 in array are valid */
310 uint32_t _pad0;
311 struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
312};
313
314/* Flags for mcinj_flags above; bits 16-31 are reserved */
315#define MC_MSRINJ_F_INTERPOSE 0x1
316
317#define XEN_MC_mceinject 5
318struct xen_mc_mceinject {
319 unsigned int mceinj_cpunr; /* target processor id */
320};
321
322struct xen_mc {
323 uint32_t cmd;
324 uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
325 union {
326 struct xen_mc_fetch mc_fetch;
327 struct xen_mc_notifydomain mc_notifydomain;
328 struct xen_mc_physcpuinfo mc_physcpuinfo;
329 struct xen_mc_msrinject mc_msrinject;
330 struct xen_mc_mceinject mc_mceinject;
331 } u;
332};
333DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
334
335/* Fields are zero when not available */
336struct xen_mce {
337 __u64 status;
338 __u64 misc;
339 __u64 addr;
340 __u64 mcgstatus;
341 __u64 ip;
342 __u64 tsc; /* cpu time stamp counter */
343 __u64 time; /* wall time_t when error was detected */
344 __u8 cpuvendor; /* cpu vendor as encoded in system.h */
345 __u8 inject_flags; /* software inject flags */
346 __u16 pad;
347 __u32 cpuid; /* CPUID 1 EAX */
348 __u8 cs; /* code segment */
349 __u8 bank; /* machine check bank */
350 __u8 cpu; /* cpu number; obsolete; use extcpu now */
351 __u8 finished; /* entry is valid */
352 __u32 extcpu; /* linux cpu number that detected the error */
353 __u32 socketid; /* CPU socket ID */
354 __u32 apicid; /* CPU initial apic ID */
355 __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
356};
357
358/*
359 * This structure contains all data related to the MCE log. Also
360 * carries a signature to make it easier to find from external
361 * debugging tools. Each entry is only valid when its finished flag
362 * is set.
363 */
364
365#define XEN_MCE_LOG_LEN 32
366
367struct xen_mce_log {
368 char signature[12]; /* "MACHINECHECK" */
369 unsigned len; /* = XEN_MCE_LOG_LEN */
370 unsigned next;
371 unsigned flags;
372 unsigned recordlen; /* length of struct xen_mce */
373 struct xen_mce entry[XEN_MCE_LOG_LEN];
374};
375
376#define XEN_MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
377
378#define XEN_MCE_LOG_SIGNATURE "MACHINECHECK"
379
380#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
381#define MCE_GET_LOG_LEN _IOR('M', 2, int)
382#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
383
384#endif /* __ASSEMBLY__ */
385#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index a890804945e3..0801468f9abe 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -80,6 +80,7 @@
80#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ 80#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */
81#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ 81#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
82#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ 82#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
83#define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */
83 84
84/* Architecture-specific VIRQ definitions. */ 85/* Architecture-specific VIRQ definitions. */
85#define VIRQ_ARCH_0 16 86#define VIRQ_ARCH_0 16