diff options
-rw-r--r-- | Documentation/ABI/testing/sysfs-devices-system-xen_cpu | 20 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/hypercall.h | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c | 22 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 224 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 39 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 23 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 2 | ||||
-rw-r--r-- | drivers/tty/hvc/hvc_xen.c | 15 | ||||
-rw-r--r-- | drivers/xen/Kconfig | 8 | ||||
-rw-r--r-- | drivers/xen/Makefile | 2 | ||||
-rw-r--r-- | drivers/xen/mcelog.c | 414 | ||||
-rw-r--r-- | drivers/xen/pcpu.c | 371 | ||||
-rw-r--r-- | drivers/xen/platform-pci.c | 18 | ||||
-rw-r--r-- | drivers/xen/xen-acpi-processor.c | 9 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_xs.c | 20 | ||||
-rw-r--r-- | include/linux/miscdevice.h | 1 | ||||
-rw-r--r-- | include/xen/events.h | 2 | ||||
-rw-r--r-- | include/xen/interface/io/xs_wire.h | 3 | ||||
-rw-r--r-- | include/xen/interface/platform.h | 8 | ||||
-rw-r--r-- | include/xen/interface/xen-mca.h | 385 | ||||
-rw-r--r-- | include/xen/interface/xen.h | 1 |
23 files changed, 1511 insertions, 90 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-xen_cpu b/Documentation/ABI/testing/sysfs-devices-system-xen_cpu new file mode 100644 index 000000000000..9ca02fb2d498 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-system-xen_cpu | |||
@@ -0,0 +1,20 @@ | |||
1 | What: /sys/devices/system/xen_cpu/ | ||
2 | Date: May 2012 | ||
3 | Contact: Liu, Jinsong <jinsong.liu@intel.com> | ||
4 | Description: | ||
5 | A collection of global/individual Xen physical cpu attributes | ||
6 | |||
7 | Individual physical cpu attributes are contained in | ||
8 | subdirectories named by the Xen's logical cpu number, e.g.: | ||
9 | /sys/devices/system/xen_cpu/xen_cpu#/ | ||
10 | |||
11 | |||
12 | What: /sys/devices/system/xen_cpu/xen_cpu#/online | ||
13 | Date: May 2012 | ||
14 | Contact: Liu, Jinsong <jinsong.liu@intel.com> | ||
15 | Description: | ||
16 | Interface to online/offline Xen physical cpus | ||
17 | |||
18 | When running under Xen platform, it provide user interface | ||
19 | to online/offline physical cpus, except cpu0 due to several | ||
20 | logic restrictions and assumptions. | ||
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 5728852fb90f..59c226d120cd 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <xen/interface/sched.h> | 48 | #include <xen/interface/sched.h> |
49 | #include <xen/interface/physdev.h> | 49 | #include <xen/interface/physdev.h> |
50 | #include <xen/interface/platform.h> | 50 | #include <xen/interface/platform.h> |
51 | #include <xen/interface/xen-mca.h> | ||
51 | 52 | ||
52 | /* | 53 | /* |
53 | * The hypercall asms have to meet several constraints: | 54 | * The hypercall asms have to meet several constraints: |
@@ -302,6 +303,13 @@ HYPERVISOR_set_timer_op(u64 timeout) | |||
302 | } | 303 | } |
303 | 304 | ||
304 | static inline int | 305 | static inline int |
306 | HYPERVISOR_mca(struct xen_mc *mc_op) | ||
307 | { | ||
308 | mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; | ||
309 | return _hypercall1(int, mca, mc_op); | ||
310 | } | ||
311 | |||
312 | static inline int | ||
305 | HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) | 313 | HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) |
306 | { | 314 | { |
307 | platform_op->interface_version = XENPF_INTERFACE_VERSION; | 315 | platform_op->interface_version = XENPF_INTERFACE_VERSION; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9473e8772fd1..5e095f873e3e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -60,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
60 | 60 | ||
61 | int mce_disabled __read_mostly; | 61 | int mce_disabled __read_mostly; |
62 | 62 | ||
63 | #define MISC_MCELOG_MINOR 227 | ||
64 | |||
65 | #define SPINUNIT 100 /* 100ns */ | 63 | #define SPINUNIT 100 /* 100ns */ |
66 | 64 | ||
67 | atomic_t mce_entry; | 65 | atomic_t mce_entry; |
@@ -2346,7 +2344,7 @@ static __init int mcheck_init_device(void) | |||
2346 | 2344 | ||
2347 | return err; | 2345 | return err; |
2348 | } | 2346 | } |
2349 | device_initcall(mcheck_init_device); | 2347 | device_initcall_sync(mcheck_init_device); |
2350 | 2348 | ||
2351 | /* | 2349 | /* |
2352 | * Old style boot options parsing. Only for compatibility. | 2350 | * Old style boot options parsing. Only for compatibility. |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 671b95a2ffb5..c4e916d77378 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -759,4 +759,24 @@ static __init int threshold_init_device(void) | |||
759 | 759 | ||
760 | return 0; | 760 | return 0; |
761 | } | 761 | } |
762 | device_initcall(threshold_init_device); | 762 | /* |
763 | * there are 3 funcs which need to be _initcalled in a logic sequence: | ||
764 | * 1. xen_late_init_mcelog | ||
765 | * 2. mcheck_init_device | ||
766 | * 3. threshold_init_device | ||
767 | * | ||
768 | * xen_late_init_mcelog must register xen_mce_chrdev_device before | ||
769 | * native mce_chrdev_device registration if running under xen platform; | ||
770 | * | ||
771 | * mcheck_init_device should be inited before threshold_init_device to | ||
772 | * initialize mce_device, otherwise a NULL ptr dereference will cause panic. | ||
773 | * | ||
774 | * so we use following _initcalls | ||
775 | * 1. device_initcall(xen_late_init_mcelog); | ||
776 | * 2. device_initcall_sync(mcheck_init_device); | ||
777 | * 3. late_initcall(threshold_init_device); | ||
778 | * | ||
779 | * when running under xen, the initcall order is 1,2,3; | ||
780 | * on baremetal, we skip 1 and we do only 2 and 3. | ||
781 | */ | ||
782 | late_initcall(threshold_init_device); | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ed7d54985d0c..bf4bda6d3e9a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/pci.h> | 31 | #include <linux/pci.h> |
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/memblock.h> | 33 | #include <linux/memblock.h> |
34 | #include <linux/syscore_ops.h> | ||
34 | 35 | ||
35 | #include <xen/xen.h> | 36 | #include <xen/xen.h> |
36 | #include <xen/interface/xen.h> | 37 | #include <xen/interface/xen.h> |
@@ -38,6 +39,7 @@ | |||
38 | #include <xen/interface/physdev.h> | 39 | #include <xen/interface/physdev.h> |
39 | #include <xen/interface/vcpu.h> | 40 | #include <xen/interface/vcpu.h> |
40 | #include <xen/interface/memory.h> | 41 | #include <xen/interface/memory.h> |
42 | #include <xen/interface/xen-mca.h> | ||
41 | #include <xen/features.h> | 43 | #include <xen/features.h> |
42 | #include <xen/page.h> | 44 | #include <xen/page.h> |
43 | #include <xen/hvm.h> | 45 | #include <xen/hvm.h> |
@@ -107,7 +109,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); | |||
107 | * Point at some empty memory to start with. We map the real shared_info | 109 | * Point at some empty memory to start with. We map the real shared_info |
108 | * page as soon as fixmap is up and running. | 110 | * page as soon as fixmap is up and running. |
109 | */ | 111 | */ |
110 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | 112 | struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; |
111 | 113 | ||
112 | /* | 114 | /* |
113 | * Flag to determine whether vcpu info placement is available on all | 115 | * Flag to determine whether vcpu info placement is available on all |
@@ -124,6 +126,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | |||
124 | */ | 126 | */ |
125 | static int have_vcpu_info_placement = 1; | 127 | static int have_vcpu_info_placement = 1; |
126 | 128 | ||
129 | struct tls_descs { | ||
130 | struct desc_struct desc[3]; | ||
131 | }; | ||
132 | |||
133 | /* | ||
134 | * Updating the 3 TLS descriptors in the GDT on every task switch is | ||
135 | * surprisingly expensive so we avoid updating them if they haven't | ||
136 | * changed. Since Xen writes different descriptors than the one | ||
137 | * passed in the update_descriptor hypercall we keep shadow copies to | ||
138 | * compare against. | ||
139 | */ | ||
140 | static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); | ||
141 | |||
127 | static void clamp_max_cpus(void) | 142 | static void clamp_max_cpus(void) |
128 | { | 143 | { |
129 | #ifdef CONFIG_SMP | 144 | #ifdef CONFIG_SMP |
@@ -341,9 +356,7 @@ static void __init xen_init_cpuid_mask(void) | |||
341 | unsigned int xsave_mask; | 356 | unsigned int xsave_mask; |
342 | 357 | ||
343 | cpuid_leaf1_edx_mask = | 358 | cpuid_leaf1_edx_mask = |
344 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ | 359 | ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */ |
345 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | ||
346 | (1 << X86_FEATURE_MTRR) | /* disable MTRR */ | ||
347 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | 360 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ |
348 | 361 | ||
349 | if (!xen_initial_domain()) | 362 | if (!xen_initial_domain()) |
@@ -540,12 +553,28 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) | |||
540 | BUG(); | 553 | BUG(); |
541 | } | 554 | } |
542 | 555 | ||
556 | static inline bool desc_equal(const struct desc_struct *d1, | ||
557 | const struct desc_struct *d2) | ||
558 | { | ||
559 | return d1->a == d2->a && d1->b == d2->b; | ||
560 | } | ||
561 | |||
543 | static void load_TLS_descriptor(struct thread_struct *t, | 562 | static void load_TLS_descriptor(struct thread_struct *t, |
544 | unsigned int cpu, unsigned int i) | 563 | unsigned int cpu, unsigned int i) |
545 | { | 564 | { |
546 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 565 | struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i]; |
547 | xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | 566 | struct desc_struct *gdt; |
548 | struct multicall_space mc = __xen_mc_entry(0); | 567 | xmaddr_t maddr; |
568 | struct multicall_space mc; | ||
569 | |||
570 | if (desc_equal(shadow, &t->tls_array[i])) | ||
571 | return; | ||
572 | |||
573 | *shadow = t->tls_array[i]; | ||
574 | |||
575 | gdt = get_cpu_gdt_table(cpu); | ||
576 | maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | ||
577 | mc = __xen_mc_entry(0); | ||
549 | 578 | ||
550 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); | 579 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); |
551 | } | 580 | } |
@@ -627,8 +656,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
627 | /* | 656 | /* |
628 | * Look for known traps using IST, and substitute them | 657 | * Look for known traps using IST, and substitute them |
629 | * appropriately. The debugger ones are the only ones we care | 658 | * appropriately. The debugger ones are the only ones we care |
630 | * about. Xen will handle faults like double_fault and | 659 | * about. Xen will handle faults like double_fault, |
631 | * machine_check, so we should never see them. Warn if | 660 | * so we should never see them. Warn if |
632 | * there's an unexpected IST-using fault handler. | 661 | * there's an unexpected IST-using fault handler. |
633 | */ | 662 | */ |
634 | if (addr == (unsigned long)debug) | 663 | if (addr == (unsigned long)debug) |
@@ -643,7 +672,11 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
643 | return 0; | 672 | return 0; |
644 | #ifdef CONFIG_X86_MCE | 673 | #ifdef CONFIG_X86_MCE |
645 | } else if (addr == (unsigned long)machine_check) { | 674 | } else if (addr == (unsigned long)machine_check) { |
646 | return 0; | 675 | /* |
676 | * when xen hypervisor inject vMCE to guest, | ||
677 | * use native mce handler to handle it | ||
678 | */ | ||
679 | ; | ||
647 | #endif | 680 | #endif |
648 | } else { | 681 | } else { |
649 | /* Some other trap using IST? */ | 682 | /* Some other trap using IST? */ |
@@ -1437,64 +1470,155 @@ asmlinkage void __init xen_start_kernel(void) | |||
1437 | #endif | 1470 | #endif |
1438 | } | 1471 | } |
1439 | 1472 | ||
1440 | static int init_hvm_pv_info(int *major, int *minor) | 1473 | #ifdef CONFIG_XEN_PVHVM |
1441 | { | 1474 | /* |
1442 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | 1475 | * The pfn containing the shared_info is located somewhere in RAM. This |
1443 | u64 pfn; | 1476 | * will cause trouble if the current kernel is doing a kexec boot into a |
1444 | 1477 | * new kernel. The new kernel (and its startup code) can not know where | |
1445 | base = xen_cpuid_base(); | 1478 | * the pfn is, so it can not reserve the page. The hypervisor will |
1446 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | 1479 | * continue to update the pfn, and as a result memory corruption occours |
1447 | 1480 | * in the new kernel. | |
1448 | *major = eax >> 16; | 1481 | * |
1449 | *minor = eax & 0xffff; | 1482 | * One way to work around this issue is to allocate a page in the |
1450 | printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); | 1483 | * xen-platform pci device's BAR memory range. But pci init is done very |
1451 | 1484 | * late and the shared_info page is already in use very early to read | |
1452 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | 1485 | * the pvclock. So moving the pfn from RAM to MMIO is racy because some |
1453 | 1486 | * code paths on other vcpus could access the pfn during the small | |
1454 | pfn = __pa(hypercall_page); | 1487 | * window when the old pfn is moved to the new pfn. There is even a |
1455 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | 1488 | * small window were the old pfn is not backed by a mfn, and during that |
1456 | 1489 | * time all reads return -1. | |
1457 | xen_setup_features(); | 1490 | * |
1458 | 1491 | * Because it is not known upfront where the MMIO region is located it | |
1459 | pv_info.name = "Xen HVM"; | 1492 | * can not be used right from the start in xen_hvm_init_shared_info. |
1460 | 1493 | * | |
1461 | xen_domain_type = XEN_HVM_DOMAIN; | 1494 | * To minimise trouble the move of the pfn is done shortly before kexec. |
1495 | * This does not eliminate the race because all vcpus are still online | ||
1496 | * when the syscore_ops will be called. But hopefully there is no work | ||
1497 | * pending at this point in time. Also the syscore_op is run last which | ||
1498 | * reduces the risk further. | ||
1499 | */ | ||
1462 | 1500 | ||
1463 | return 0; | 1501 | static struct shared_info *xen_hvm_shared_info; |
1464 | } | ||
1465 | 1502 | ||
1466 | void __ref xen_hvm_init_shared_info(void) | 1503 | static void xen_hvm_connect_shared_info(unsigned long pfn) |
1467 | { | 1504 | { |
1468 | int cpu; | ||
1469 | struct xen_add_to_physmap xatp; | 1505 | struct xen_add_to_physmap xatp; |
1470 | static struct shared_info *shared_info_page = 0; | ||
1471 | 1506 | ||
1472 | if (!shared_info_page) | ||
1473 | shared_info_page = (struct shared_info *) | ||
1474 | extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1475 | xatp.domid = DOMID_SELF; | 1507 | xatp.domid = DOMID_SELF; |
1476 | xatp.idx = 0; | 1508 | xatp.idx = 0; |
1477 | xatp.space = XENMAPSPACE_shared_info; | 1509 | xatp.space = XENMAPSPACE_shared_info; |
1478 | xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; | 1510 | xatp.gpfn = pfn; |
1479 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | 1511 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) |
1480 | BUG(); | 1512 | BUG(); |
1481 | 1513 | ||
1482 | HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; | 1514 | } |
1515 | static void xen_hvm_set_shared_info(struct shared_info *sip) | ||
1516 | { | ||
1517 | int cpu; | ||
1518 | |||
1519 | HYPERVISOR_shared_info = sip; | ||
1483 | 1520 | ||
1484 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info | 1521 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info |
1485 | * page, we use it in the event channel upcall and in some pvclock | 1522 | * page, we use it in the event channel upcall and in some pvclock |
1486 | * related functions. We don't need the vcpu_info placement | 1523 | * related functions. We don't need the vcpu_info placement |
1487 | * optimizations because we don't use any pv_mmu or pv_irq op on | 1524 | * optimizations because we don't use any pv_mmu or pv_irq op on |
1488 | * HVM. | 1525 | * HVM. |
1489 | * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is | 1526 | * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is |
1490 | * online but xen_hvm_init_shared_info is run at resume time too and | 1527 | * online but xen_hvm_set_shared_info is run at resume time too and |
1491 | * in that case multiple vcpus might be online. */ | 1528 | * in that case multiple vcpus might be online. */ |
1492 | for_each_online_cpu(cpu) { | 1529 | for_each_online_cpu(cpu) { |
1493 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 1530 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
1494 | } | 1531 | } |
1495 | } | 1532 | } |
1496 | 1533 | ||
1497 | #ifdef CONFIG_XEN_PVHVM | 1534 | /* Reconnect the shared_info pfn to a mfn */ |
1535 | void xen_hvm_resume_shared_info(void) | ||
1536 | { | ||
1537 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1538 | } | ||
1539 | |||
1540 | #ifdef CONFIG_KEXEC | ||
1541 | static struct shared_info *xen_hvm_shared_info_kexec; | ||
1542 | static unsigned long xen_hvm_shared_info_pfn_kexec; | ||
1543 | |||
1544 | /* Remember a pfn in MMIO space for kexec reboot */ | ||
1545 | void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) | ||
1546 | { | ||
1547 | xen_hvm_shared_info_kexec = sip; | ||
1548 | xen_hvm_shared_info_pfn_kexec = pfn; | ||
1549 | } | ||
1550 | |||
1551 | static void xen_hvm_syscore_shutdown(void) | ||
1552 | { | ||
1553 | struct xen_memory_reservation reservation = { | ||
1554 | .domid = DOMID_SELF, | ||
1555 | .nr_extents = 1, | ||
1556 | }; | ||
1557 | unsigned long prev_pfn; | ||
1558 | int rc; | ||
1559 | |||
1560 | if (!xen_hvm_shared_info_kexec) | ||
1561 | return; | ||
1562 | |||
1563 | prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; | ||
1564 | set_xen_guest_handle(reservation.extent_start, &prev_pfn); | ||
1565 | |||
1566 | /* Move pfn to MMIO, disconnects previous pfn from mfn */ | ||
1567 | xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); | ||
1568 | |||
1569 | /* Update pointers, following hypercall is also a memory barrier */ | ||
1570 | xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); | ||
1571 | |||
1572 | /* Allocate new mfn for previous pfn */ | ||
1573 | do { | ||
1574 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); | ||
1575 | if (rc == 0) | ||
1576 | msleep(123); | ||
1577 | } while (rc == 0); | ||
1578 | |||
1579 | /* Make sure the previous pfn is really connected to a (new) mfn */ | ||
1580 | BUG_ON(rc != 1); | ||
1581 | } | ||
1582 | |||
1583 | static struct syscore_ops xen_hvm_syscore_ops = { | ||
1584 | .shutdown = xen_hvm_syscore_shutdown, | ||
1585 | }; | ||
1586 | #endif | ||
1587 | |||
1588 | /* Use a pfn in RAM, may move to MMIO before kexec. */ | ||
1589 | static void __init xen_hvm_init_shared_info(void) | ||
1590 | { | ||
1591 | /* Remember pointer for resume */ | ||
1592 | xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1593 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1594 | xen_hvm_set_shared_info(xen_hvm_shared_info); | ||
1595 | } | ||
1596 | |||
1597 | static void __init init_hvm_pv_info(void) | ||
1598 | { | ||
1599 | int major, minor; | ||
1600 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | ||
1601 | u64 pfn; | ||
1602 | |||
1603 | base = xen_cpuid_base(); | ||
1604 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | ||
1605 | |||
1606 | major = eax >> 16; | ||
1607 | minor = eax & 0xffff; | ||
1608 | printk(KERN_INFO "Xen version %d.%d.\n", major, minor); | ||
1609 | |||
1610 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | ||
1611 | |||
1612 | pfn = __pa(hypercall_page); | ||
1613 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | ||
1614 | |||
1615 | xen_setup_features(); | ||
1616 | |||
1617 | pv_info.name = "Xen HVM"; | ||
1618 | |||
1619 | xen_domain_type = XEN_HVM_DOMAIN; | ||
1620 | } | ||
1621 | |||
1498 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | 1622 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, |
1499 | unsigned long action, void *hcpu) | 1623 | unsigned long action, void *hcpu) |
1500 | { | 1624 | { |
@@ -1517,14 +1641,12 @@ static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { | |||
1517 | 1641 | ||
1518 | static void __init xen_hvm_guest_init(void) | 1642 | static void __init xen_hvm_guest_init(void) |
1519 | { | 1643 | { |
1520 | int r; | 1644 | init_hvm_pv_info(); |
1521 | int major, minor; | ||
1522 | |||
1523 | r = init_hvm_pv_info(&major, &minor); | ||
1524 | if (r < 0) | ||
1525 | return; | ||
1526 | 1645 | ||
1527 | xen_hvm_init_shared_info(); | 1646 | xen_hvm_init_shared_info(); |
1647 | #ifdef CONFIG_KEXEC | ||
1648 | register_syscore_ops(&xen_hvm_syscore_ops); | ||
1649 | #endif | ||
1528 | 1650 | ||
1529 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1651 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
1530 | xen_have_vector_callback = 1; | 1652 | xen_have_vector_callback = 1; |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3a73785631ce..27336dfcda8e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -308,8 +308,20 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) | |||
308 | 308 | ||
309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) | 309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) |
310 | { | 310 | { |
311 | if (!xen_batched_set_pte(ptep, pteval)) | 311 | if (!xen_batched_set_pte(ptep, pteval)) { |
312 | native_set_pte(ptep, pteval); | 312 | /* |
313 | * Could call native_set_pte() here and trap and | ||
314 | * emulate the PTE write but with 32-bit guests this | ||
315 | * needs two traps (one for each of the two 32-bit | ||
316 | * words in the PTE) so do one hypercall directly | ||
317 | * instead. | ||
318 | */ | ||
319 | struct mmu_update u; | ||
320 | |||
321 | u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; | ||
322 | u.val = pte_val_ma(pteval); | ||
323 | HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF); | ||
324 | } | ||
313 | } | 325 | } |
314 | 326 | ||
315 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | 327 | static void xen_set_pte(pte_t *ptep, pte_t pteval) |
@@ -1416,13 +1428,28 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1416 | } | 1428 | } |
1417 | #endif /* CONFIG_X86_64 */ | 1429 | #endif /* CONFIG_X86_64 */ |
1418 | 1430 | ||
1419 | /* Init-time set_pte while constructing initial pagetables, which | 1431 | /* |
1420 | doesn't allow RO pagetable pages to be remapped RW */ | 1432 | * Init-time set_pte while constructing initial pagetables, which |
1433 | * doesn't allow RO page table pages to be remapped RW. | ||
1434 | * | ||
1435 | * If there is no MFN for this PFN then this page is initially | ||
1436 | * ballooned out so clear the PTE (as in decrease_reservation() in | ||
1437 | * drivers/xen/balloon.c). | ||
1438 | * | ||
1439 | * Many of these PTE updates are done on unpinned and writable pages | ||
1440 | * and doing a hypercall for these is unnecessary and expensive. At | ||
1441 | * this point it is not possible to tell if a page is pinned or not, | ||
1442 | * so always write the PTE directly and rely on Xen trapping and | ||
1443 | * emulating any updates as necessary. | ||
1444 | */ | ||
1421 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) | 1445 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) |
1422 | { | 1446 | { |
1423 | pte = mask_rw_pte(ptep, pte); | 1447 | if (pte_mfn(pte) != INVALID_P2M_ENTRY) |
1448 | pte = mask_rw_pte(ptep, pte); | ||
1449 | else | ||
1450 | pte = __pte_ma(0); | ||
1424 | 1451 | ||
1425 | xen_set_pte(ptep, pte); | 1452 | native_set_pte(ptep, pte); |
1426 | } | 1453 | } |
1427 | 1454 | ||
1428 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 1455 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a4790bf22c59..ead85576d54a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -157,25 +157,24 @@ static unsigned long __init xen_populate_chunk( | |||
157 | unsigned long dest_pfn; | 157 | unsigned long dest_pfn; |
158 | 158 | ||
159 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 159 | for (i = 0, entry = list; i < map_size; i++, entry++) { |
160 | unsigned long credits = credits_left; | ||
161 | unsigned long s_pfn; | 160 | unsigned long s_pfn; |
162 | unsigned long e_pfn; | 161 | unsigned long e_pfn; |
163 | unsigned long pfns; | 162 | unsigned long pfns; |
164 | long capacity; | 163 | long capacity; |
165 | 164 | ||
166 | if (credits <= 0) | 165 | if (credits_left <= 0) |
167 | break; | 166 | break; |
168 | 167 | ||
169 | if (entry->type != E820_RAM) | 168 | if (entry->type != E820_RAM) |
170 | continue; | 169 | continue; |
171 | 170 | ||
172 | e_pfn = PFN_UP(entry->addr + entry->size); | 171 | e_pfn = PFN_DOWN(entry->addr + entry->size); |
173 | 172 | ||
174 | /* We only care about E820 after the xen_start_info->nr_pages */ | 173 | /* We only care about E820 after the xen_start_info->nr_pages */ |
175 | if (e_pfn <= max_pfn) | 174 | if (e_pfn <= max_pfn) |
176 | continue; | 175 | continue; |
177 | 176 | ||
178 | s_pfn = PFN_DOWN(entry->addr); | 177 | s_pfn = PFN_UP(entry->addr); |
179 | /* If the E820 falls within the nr_pages, we want to start | 178 | /* If the E820 falls within the nr_pages, we want to start |
180 | * at the nr_pages PFN. | 179 | * at the nr_pages PFN. |
181 | * If that would mean going past the E820 entry, skip it | 180 | * If that would mean going past the E820 entry, skip it |
@@ -184,23 +183,19 @@ static unsigned long __init xen_populate_chunk( | |||
184 | capacity = e_pfn - max_pfn; | 183 | capacity = e_pfn - max_pfn; |
185 | dest_pfn = max_pfn; | 184 | dest_pfn = max_pfn; |
186 | } else { | 185 | } else { |
187 | /* last_pfn MUST be within E820_RAM regions */ | ||
188 | if (*last_pfn && e_pfn >= *last_pfn) | ||
189 | s_pfn = *last_pfn; | ||
190 | capacity = e_pfn - s_pfn; | 186 | capacity = e_pfn - s_pfn; |
191 | dest_pfn = s_pfn; | 187 | dest_pfn = s_pfn; |
192 | } | 188 | } |
193 | /* If we had filled this E820_RAM entry, go to the next one. */ | ||
194 | if (capacity <= 0) | ||
195 | continue; | ||
196 | 189 | ||
197 | if (credits > capacity) | 190 | if (credits_left < capacity) |
198 | credits = capacity; | 191 | capacity = credits_left; |
199 | 192 | ||
200 | pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false); | 193 | pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); |
201 | done += pfns; | 194 | done += pfns; |
202 | credits_left -= pfns; | ||
203 | *last_pfn = (dest_pfn + pfns); | 195 | *last_pfn = (dest_pfn + pfns); |
196 | if (pfns < capacity) | ||
197 | break; | ||
198 | credits_left -= pfns; | ||
204 | } | 199 | } |
205 | return done; | 200 | return done; |
206 | } | 201 | } |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 45329c8c226e..ae8a00c39de4 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) | |||
30 | { | 30 | { |
31 | #ifdef CONFIG_XEN_PVHVM | 31 | #ifdef CONFIG_XEN_PVHVM |
32 | int cpu; | 32 | int cpu; |
33 | xen_hvm_init_shared_info(); | 33 | xen_hvm_resume_shared_info(); |
34 | xen_callback_vector(); | 34 | xen_callback_vector(); |
35 | xen_unplug_emulated_devices(); | 35 | xen_unplug_emulated_devices(); |
36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { | 36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c150154..1e4329e04e0f 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -41,7 +41,7 @@ void xen_enable_syscall(void); | |||
41 | void xen_vcpu_restore(void); | 41 | void xen_vcpu_restore(void); |
42 | 42 | ||
43 | void xen_callback_vector(void); | 43 | void xen_callback_vector(void); |
44 | void xen_hvm_init_shared_info(void); | 44 | void xen_hvm_resume_shared_info(void); |
45 | void xen_unplug_emulated_devices(void); | 45 | void xen_unplug_emulated_devices(void); |
46 | 46 | ||
47 | void __init xen_build_dynamic_phys_to_machine(void); | 47 | void __init xen_build_dynamic_phys_to_machine(void); |
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 944eaeb8e0cf..1e456dca4f60 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c | |||
@@ -209,11 +209,10 @@ static int xen_hvm_console_init(void) | |||
209 | info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); | 209 | info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); |
210 | if (!info) | 210 | if (!info) |
211 | return -ENOMEM; | 211 | return -ENOMEM; |
212 | } | 212 | } else if (info->intf != NULL) { |
213 | 213 | /* already configured */ | |
214 | /* already configured */ | ||
215 | if (info->intf != NULL) | ||
216 | return 0; | 214 | return 0; |
215 | } | ||
217 | /* | 216 | /* |
218 | * If the toolstack (or the hypervisor) hasn't set these values, the | 217 | * If the toolstack (or the hypervisor) hasn't set these values, the |
219 | * default value is 0. Even though mfn = 0 and evtchn = 0 are | 218 | * default value is 0. Even though mfn = 0 and evtchn = 0 are |
@@ -259,12 +258,10 @@ static int xen_pv_console_init(void) | |||
259 | info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); | 258 | info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); |
260 | if (!info) | 259 | if (!info) |
261 | return -ENOMEM; | 260 | return -ENOMEM; |
262 | } | 261 | } else if (info->intf != NULL) { |
263 | 262 | /* already configured */ | |
264 | /* already configured */ | ||
265 | if (info->intf != NULL) | ||
266 | return 0; | 263 | return 0; |
267 | 264 | } | |
268 | info->evtchn = xen_start_info->console.domU.evtchn; | 265 | info->evtchn = xen_start_info->console.domU.evtchn; |
269 | info->intf = mfn_to_virt(xen_start_info->console.domU.mfn); | 266 | info->intf = mfn_to_virt(xen_start_info->console.domU.mfn); |
270 | info->vtermno = HVC_COOKIE; | 267 | info->vtermno = HVC_COOKIE; |
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 8d2501e604dd..d4dffcd52873 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig | |||
@@ -196,4 +196,12 @@ config XEN_ACPI_PROCESSOR | |||
196 | called xen_acpi_processor If you do not know what to choose, select | 196 | called xen_acpi_processor If you do not know what to choose, select |
197 | M here. If the CPUFREQ drivers are built in, select Y here. | 197 | M here. If the CPUFREQ drivers are built in, select Y here. |
198 | 198 | ||
199 | config XEN_MCE_LOG | ||
200 | bool "Xen platform mcelog" | ||
201 | depends on XEN_DOM0 && X86_64 && X86_MCE | ||
202 | default n | ||
203 | help | ||
204 | Allow kernel fetching MCE error from Xen platform and | ||
205 | converting it into Linux mcelog format for mcelog tools | ||
206 | |||
199 | endmenu | 207 | endmenu |
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index fc3488631136..d80bea5535a2 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -17,7 +17,9 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o | |||
17 | obj-$(CONFIG_XEN_PVHVM) += platform-pci.o | 17 | obj-$(CONFIG_XEN_PVHVM) += platform-pci.o |
18 | obj-$(CONFIG_XEN_TMEM) += tmem.o | 18 | obj-$(CONFIG_XEN_TMEM) += tmem.o |
19 | obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o | 19 | obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o |
20 | obj-$(CONFIG_XEN_DOM0) += pcpu.o | ||
20 | obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o | 21 | obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o |
22 | obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o | ||
21 | obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ | 23 | obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ |
22 | obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o | 24 | obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o |
23 | obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o | 25 | obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o |
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c new file mode 100644 index 000000000000..8feee08bcb43 --- /dev/null +++ b/drivers/xen/mcelog.c | |||
@@ -0,0 +1,414 @@ | |||
1 | /****************************************************************************** | ||
2 | * mcelog.c | ||
3 | * Driver for receiving and transferring machine check error infomation | ||
4 | * | ||
5 | * Copyright (c) 2012 Intel Corporation | ||
6 | * Author: Liu, Jinsong <jinsong.liu@intel.com> | ||
7 | * Author: Jiang, Yunhong <yunhong.jiang@intel.com> | ||
8 | * Author: Ke, Liping <liping.ke@intel.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License version 2 | ||
12 | * as published by the Free Software Foundation; or, when distributed | ||
13 | * separately from the Linux kernel or incorporated into other | ||
14 | * software packages, subject to the following license: | ||
15 | * | ||
16 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
17 | * of this source file (the "Software"), to deal in the Software without | ||
18 | * restriction, including without limitation the rights to use, copy, modify, | ||
19 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
20 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
21 | * the following conditions: | ||
22 | * | ||
23 | * The above copyright notice and this permission notice shall be included in | ||
24 | * all copies or substantial portions of the Software. | ||
25 | * | ||
26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
27 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
28 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
29 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
30 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
31 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
32 | * IN THE SOFTWARE. | ||
33 | */ | ||
34 | |||
35 | #include <linux/init.h> | ||
36 | #include <linux/types.h> | ||
37 | #include <linux/kernel.h> | ||
38 | #include <linux/slab.h> | ||
39 | #include <linux/fs.h> | ||
40 | #include <linux/device.h> | ||
41 | #include <linux/miscdevice.h> | ||
42 | #include <linux/uaccess.h> | ||
43 | #include <linux/capability.h> | ||
44 | #include <linux/poll.h> | ||
45 | #include <linux/sched.h> | ||
46 | |||
47 | #include <xen/interface/xen.h> | ||
48 | #include <xen/events.h> | ||
49 | #include <xen/interface/vcpu.h> | ||
50 | #include <xen/xen.h> | ||
51 | #include <asm/xen/hypercall.h> | ||
52 | #include <asm/xen/hypervisor.h> | ||
53 | |||
54 | #define XEN_MCELOG "xen_mcelog: " | ||
55 | |||
56 | static struct mc_info g_mi; | ||
57 | static struct mcinfo_logical_cpu *g_physinfo; | ||
58 | static uint32_t ncpus; | ||
59 | |||
60 | static DEFINE_MUTEX(mcelog_lock); | ||
61 | |||
62 | static struct xen_mce_log xen_mcelog = { | ||
63 | .signature = XEN_MCE_LOG_SIGNATURE, | ||
64 | .len = XEN_MCE_LOG_LEN, | ||
65 | .recordlen = sizeof(struct xen_mce), | ||
66 | }; | ||
67 | |||
68 | static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock); | ||
69 | static int xen_mce_chrdev_open_count; /* #times opened */ | ||
70 | static int xen_mce_chrdev_open_exclu; /* already open exclusive? */ | ||
71 | |||
72 | static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait); | ||
73 | |||
74 | static int xen_mce_chrdev_open(struct inode *inode, struct file *file) | ||
75 | { | ||
76 | spin_lock(&xen_mce_chrdev_state_lock); | ||
77 | |||
78 | if (xen_mce_chrdev_open_exclu || | ||
79 | (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) { | ||
80 | spin_unlock(&xen_mce_chrdev_state_lock); | ||
81 | |||
82 | return -EBUSY; | ||
83 | } | ||
84 | |||
85 | if (file->f_flags & O_EXCL) | ||
86 | xen_mce_chrdev_open_exclu = 1; | ||
87 | xen_mce_chrdev_open_count++; | ||
88 | |||
89 | spin_unlock(&xen_mce_chrdev_state_lock); | ||
90 | |||
91 | return nonseekable_open(inode, file); | ||
92 | } | ||
93 | |||
94 | static int xen_mce_chrdev_release(struct inode *inode, struct file *file) | ||
95 | { | ||
96 | spin_lock(&xen_mce_chrdev_state_lock); | ||
97 | |||
98 | xen_mce_chrdev_open_count--; | ||
99 | xen_mce_chrdev_open_exclu = 0; | ||
100 | |||
101 | spin_unlock(&xen_mce_chrdev_state_lock); | ||
102 | |||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, | ||
107 | size_t usize, loff_t *off) | ||
108 | { | ||
109 | char __user *buf = ubuf; | ||
110 | unsigned num; | ||
111 | int i, err; | ||
112 | |||
113 | mutex_lock(&mcelog_lock); | ||
114 | |||
115 | num = xen_mcelog.next; | ||
116 | |||
117 | /* Only supports full reads right now */ | ||
118 | err = -EINVAL; | ||
119 | if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce)) | ||
120 | goto out; | ||
121 | |||
122 | err = 0; | ||
123 | for (i = 0; i < num; i++) { | ||
124 | struct xen_mce *m = &xen_mcelog.entry[i]; | ||
125 | |||
126 | err |= copy_to_user(buf, m, sizeof(*m)); | ||
127 | buf += sizeof(*m); | ||
128 | } | ||
129 | |||
130 | memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce)); | ||
131 | xen_mcelog.next = 0; | ||
132 | |||
133 | if (err) | ||
134 | err = -EFAULT; | ||
135 | |||
136 | out: | ||
137 | mutex_unlock(&mcelog_lock); | ||
138 | |||
139 | return err ? err : buf - ubuf; | ||
140 | } | ||
141 | |||
142 | static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait) | ||
143 | { | ||
144 | poll_wait(file, &xen_mce_chrdev_wait, wait); | ||
145 | |||
146 | if (xen_mcelog.next) | ||
147 | return POLLIN | POLLRDNORM; | ||
148 | |||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd, | ||
153 | unsigned long arg) | ||
154 | { | ||
155 | int __user *p = (int __user *)arg; | ||
156 | |||
157 | if (!capable(CAP_SYS_ADMIN)) | ||
158 | return -EPERM; | ||
159 | |||
160 | switch (cmd) { | ||
161 | case MCE_GET_RECORD_LEN: | ||
162 | return put_user(sizeof(struct xen_mce), p); | ||
163 | case MCE_GET_LOG_LEN: | ||
164 | return put_user(XEN_MCE_LOG_LEN, p); | ||
165 | case MCE_GETCLEAR_FLAGS: { | ||
166 | unsigned flags; | ||
167 | |||
168 | do { | ||
169 | flags = xen_mcelog.flags; | ||
170 | } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags); | ||
171 | |||
172 | return put_user(flags, p); | ||
173 | } | ||
174 | default: | ||
175 | return -ENOTTY; | ||
176 | } | ||
177 | } | ||
178 | |||
179 | static const struct file_operations xen_mce_chrdev_ops = { | ||
180 | .open = xen_mce_chrdev_open, | ||
181 | .release = xen_mce_chrdev_release, | ||
182 | .read = xen_mce_chrdev_read, | ||
183 | .poll = xen_mce_chrdev_poll, | ||
184 | .unlocked_ioctl = xen_mce_chrdev_ioctl, | ||
185 | .llseek = no_llseek, | ||
186 | }; | ||
187 | |||
188 | static struct miscdevice xen_mce_chrdev_device = { | ||
189 | MISC_MCELOG_MINOR, | ||
190 | "mcelog", | ||
191 | &xen_mce_chrdev_ops, | ||
192 | }; | ||
193 | |||
194 | /* | ||
195 | * Caller should hold the mcelog_lock | ||
196 | */ | ||
197 | static void xen_mce_log(struct xen_mce *mce) | ||
198 | { | ||
199 | unsigned entry; | ||
200 | |||
201 | entry = xen_mcelog.next; | ||
202 | |||
203 | /* | ||
204 | * When the buffer fills up discard new entries. | ||
205 | * Assume that the earlier errors are the more | ||
206 | * interesting ones: | ||
207 | */ | ||
208 | if (entry >= XEN_MCE_LOG_LEN) { | ||
209 | set_bit(XEN_MCE_OVERFLOW, | ||
210 | (unsigned long *)&xen_mcelog.flags); | ||
211 | return; | ||
212 | } | ||
213 | |||
214 | memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce)); | ||
215 | |||
216 | xen_mcelog.next++; | ||
217 | } | ||
218 | |||
219 | static int convert_log(struct mc_info *mi) | ||
220 | { | ||
221 | struct mcinfo_common *mic; | ||
222 | struct mcinfo_global *mc_global; | ||
223 | struct mcinfo_bank *mc_bank; | ||
224 | struct xen_mce m; | ||
225 | uint32_t i; | ||
226 | |||
227 | mic = NULL; | ||
228 | x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL); | ||
229 | if (unlikely(!mic)) { | ||
230 | pr_warning(XEN_MCELOG "Failed to find global error info\n"); | ||
231 | return -ENODEV; | ||
232 | } | ||
233 | |||
234 | memset(&m, 0, sizeof(struct xen_mce)); | ||
235 | |||
236 | mc_global = (struct mcinfo_global *)mic; | ||
237 | m.mcgstatus = mc_global->mc_gstatus; | ||
238 | m.apicid = mc_global->mc_apicid; | ||
239 | |||
240 | for (i = 0; i < ncpus; i++) | ||
241 | if (g_physinfo[i].mc_apicid == m.apicid) | ||
242 | break; | ||
243 | if (unlikely(i == ncpus)) { | ||
244 | pr_warning(XEN_MCELOG "Failed to match cpu with apicid %d\n", | ||
245 | m.apicid); | ||
246 | return -ENODEV; | ||
247 | } | ||
248 | |||
249 | m.socketid = g_physinfo[i].mc_chipid; | ||
250 | m.cpu = m.extcpu = g_physinfo[i].mc_cpunr; | ||
251 | m.cpuvendor = (__u8)g_physinfo[i].mc_vendor; | ||
252 | m.mcgcap = g_physinfo[i].mc_msrvalues[__MC_MSR_MCGCAP].value; | ||
253 | |||
254 | mic = NULL; | ||
255 | x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK); | ||
256 | if (unlikely(!mic)) { | ||
257 | pr_warning(XEN_MCELOG "Fail to find bank error info\n"); | ||
258 | return -ENODEV; | ||
259 | } | ||
260 | |||
261 | do { | ||
262 | if ((!mic) || (mic->size == 0) || | ||
263 | (mic->type != MC_TYPE_GLOBAL && | ||
264 | mic->type != MC_TYPE_BANK && | ||
265 | mic->type != MC_TYPE_EXTENDED && | ||
266 | mic->type != MC_TYPE_RECOVERY)) | ||
267 | break; | ||
268 | |||
269 | if (mic->type == MC_TYPE_BANK) { | ||
270 | mc_bank = (struct mcinfo_bank *)mic; | ||
271 | m.misc = mc_bank->mc_misc; | ||
272 | m.status = mc_bank->mc_status; | ||
273 | m.addr = mc_bank->mc_addr; | ||
274 | m.tsc = mc_bank->mc_tsc; | ||
275 | m.bank = mc_bank->mc_bank; | ||
276 | m.finished = 1; | ||
277 | /*log this record*/ | ||
278 | xen_mce_log(&m); | ||
279 | } | ||
280 | mic = x86_mcinfo_next(mic); | ||
281 | } while (1); | ||
282 | |||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | static int mc_queue_handle(uint32_t flags) | ||
287 | { | ||
288 | struct xen_mc mc_op; | ||
289 | int ret = 0; | ||
290 | |||
291 | mc_op.cmd = XEN_MC_fetch; | ||
292 | mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; | ||
293 | set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi); | ||
294 | do { | ||
295 | mc_op.u.mc_fetch.flags = flags; | ||
296 | ret = HYPERVISOR_mca(&mc_op); | ||
297 | if (ret) { | ||
298 | pr_err(XEN_MCELOG "Failed to fetch %s error log\n", | ||
299 | (flags == XEN_MC_URGENT) ? | ||
300 | "urgnet" : "nonurgent"); | ||
301 | break; | ||
302 | } | ||
303 | |||
304 | if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA || | ||
305 | mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) | ||
306 | break; | ||
307 | else { | ||
308 | ret = convert_log(&g_mi); | ||
309 | if (ret) | ||
310 | pr_warning(XEN_MCELOG | ||
311 | "Failed to convert this error log, " | ||
312 | "continue acking it anyway\n"); | ||
313 | |||
314 | mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK; | ||
315 | ret = HYPERVISOR_mca(&mc_op); | ||
316 | if (ret) { | ||
317 | pr_err(XEN_MCELOG | ||
318 | "Failed to ack previous error log\n"); | ||
319 | break; | ||
320 | } | ||
321 | } | ||
322 | } while (1); | ||
323 | |||
324 | return ret; | ||
325 | } | ||
326 | |||
327 | /* virq handler for machine check error info*/ | ||
328 | static void xen_mce_work_fn(struct work_struct *work) | ||
329 | { | ||
330 | int err; | ||
331 | |||
332 | mutex_lock(&mcelog_lock); | ||
333 | |||
334 | /* urgent mc_info */ | ||
335 | err = mc_queue_handle(XEN_MC_URGENT); | ||
336 | if (err) | ||
337 | pr_err(XEN_MCELOG | ||
338 | "Failed to handle urgent mc_info queue, " | ||
339 | "continue handling nonurgent mc_info queue anyway.\n"); | ||
340 | |||
341 | /* nonurgent mc_info */ | ||
342 | err = mc_queue_handle(XEN_MC_NONURGENT); | ||
343 | if (err) | ||
344 | pr_err(XEN_MCELOG | ||
345 | "Failed to handle nonurgent mc_info queue.\n"); | ||
346 | |||
347 | /* wake processes polling /dev/mcelog */ | ||
348 | wake_up_interruptible(&xen_mce_chrdev_wait); | ||
349 | |||
350 | mutex_unlock(&mcelog_lock); | ||
351 | } | ||
352 | static DECLARE_WORK(xen_mce_work, xen_mce_work_fn); | ||
353 | |||
354 | static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) | ||
355 | { | ||
356 | schedule_work(&xen_mce_work); | ||
357 | return IRQ_HANDLED; | ||
358 | } | ||
359 | |||
360 | static int bind_virq_for_mce(void) | ||
361 | { | ||
362 | int ret; | ||
363 | struct xen_mc mc_op; | ||
364 | |||
365 | memset(&mc_op, 0, sizeof(struct xen_mc)); | ||
366 | |||
367 | /* Fetch physical CPU Numbers */ | ||
368 | mc_op.cmd = XEN_MC_physcpuinfo; | ||
369 | mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; | ||
370 | set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); | ||
371 | ret = HYPERVISOR_mca(&mc_op); | ||
372 | if (ret) { | ||
373 | pr_err(XEN_MCELOG "Failed to get CPU numbers\n"); | ||
374 | return ret; | ||
375 | } | ||
376 | |||
377 | /* Fetch each CPU Physical Info for later reference*/ | ||
378 | ncpus = mc_op.u.mc_physcpuinfo.ncpus; | ||
379 | g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu), | ||
380 | GFP_KERNEL); | ||
381 | if (!g_physinfo) | ||
382 | return -ENOMEM; | ||
383 | set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); | ||
384 | ret = HYPERVISOR_mca(&mc_op); | ||
385 | if (ret) { | ||
386 | pr_err(XEN_MCELOG "Failed to get CPU info\n"); | ||
387 | kfree(g_physinfo); | ||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | ret = bind_virq_to_irqhandler(VIRQ_MCA, 0, | ||
392 | xen_mce_interrupt, 0, "mce", NULL); | ||
393 | if (ret < 0) { | ||
394 | pr_err(XEN_MCELOG "Failed to bind virq\n"); | ||
395 | kfree(g_physinfo); | ||
396 | return ret; | ||
397 | } | ||
398 | |||
399 | return 0; | ||
400 | } | ||
401 | |||
402 | static int __init xen_late_init_mcelog(void) | ||
403 | { | ||
404 | /* Only DOM0 is responsible for MCE logging */ | ||
405 | if (xen_initial_domain()) { | ||
406 | /* register character device /dev/mcelog for xen mcelog */ | ||
407 | if (misc_register(&xen_mce_chrdev_device)) | ||
408 | return -ENODEV; | ||
409 | return bind_virq_for_mce(); | ||
410 | } | ||
411 | |||
412 | return -ENODEV; | ||
413 | } | ||
414 | device_initcall(xen_late_init_mcelog); | ||
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c new file mode 100644 index 000000000000..067fcfa1723e --- /dev/null +++ b/drivers/xen/pcpu.c | |||
@@ -0,0 +1,371 @@ | |||
1 | /****************************************************************************** | ||
2 | * pcpu.c | ||
3 | * Management physical cpu in dom0, get pcpu info and provide sys interface | ||
4 | * | ||
5 | * Copyright (c) 2012 Intel Corporation | ||
6 | * Author: Liu, Jinsong <jinsong.liu@intel.com> | ||
7 | * Author: Jiang, Yunhong <yunhong.jiang@intel.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation; or, when distributed | ||
12 | * separately from the Linux kernel or incorporated into other | ||
13 | * software packages, subject to the following license: | ||
14 | * | ||
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
16 | * of this source file (the "Software"), to deal in the Software without | ||
17 | * restriction, including without limitation the rights to use, copy, modify, | ||
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
19 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
20 | * the following conditions: | ||
21 | * | ||
22 | * The above copyright notice and this permission notice shall be included in | ||
23 | * all copies or substantial portions of the Software. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
31 | * IN THE SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/interrupt.h> | ||
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/cpu.h> | ||
37 | #include <linux/stat.h> | ||
38 | #include <linux/capability.h> | ||
39 | |||
40 | #include <xen/xen.h> | ||
41 | #include <xen/xenbus.h> | ||
42 | #include <xen/events.h> | ||
43 | #include <xen/interface/platform.h> | ||
44 | #include <asm/xen/hypervisor.h> | ||
45 | #include <asm/xen/hypercall.h> | ||
46 | |||
47 | #define XEN_PCPU "xen_cpu: " | ||
48 | |||
49 | /* | ||
50 | * @cpu_id: Xen physical cpu logic number | ||
51 | * @flags: Xen physical cpu status flag | ||
52 | * - XEN_PCPU_FLAGS_ONLINE: cpu is online | ||
53 | * - XEN_PCPU_FLAGS_INVALID: cpu is not present | ||
54 | */ | ||
55 | struct pcpu { | ||
56 | struct list_head list; | ||
57 | struct device dev; | ||
58 | uint32_t cpu_id; | ||
59 | uint32_t flags; | ||
60 | }; | ||
61 | |||
62 | static struct bus_type xen_pcpu_subsys = { | ||
63 | .name = "xen_cpu", | ||
64 | .dev_name = "xen_cpu", | ||
65 | }; | ||
66 | |||
67 | static DEFINE_MUTEX(xen_pcpu_lock); | ||
68 | |||
69 | static LIST_HEAD(xen_pcpus); | ||
70 | |||
71 | static int xen_pcpu_down(uint32_t cpu_id) | ||
72 | { | ||
73 | struct xen_platform_op op = { | ||
74 | .cmd = XENPF_cpu_offline, | ||
75 | .interface_version = XENPF_INTERFACE_VERSION, | ||
76 | .u.cpu_ol.cpuid = cpu_id, | ||
77 | }; | ||
78 | |||
79 | return HYPERVISOR_dom0_op(&op); | ||
80 | } | ||
81 | |||
82 | static int xen_pcpu_up(uint32_t cpu_id) | ||
83 | { | ||
84 | struct xen_platform_op op = { | ||
85 | .cmd = XENPF_cpu_online, | ||
86 | .interface_version = XENPF_INTERFACE_VERSION, | ||
87 | .u.cpu_ol.cpuid = cpu_id, | ||
88 | }; | ||
89 | |||
90 | return HYPERVISOR_dom0_op(&op); | ||
91 | } | ||
92 | |||
93 | static ssize_t show_online(struct device *dev, | ||
94 | struct device_attribute *attr, | ||
95 | char *buf) | ||
96 | { | ||
97 | struct pcpu *cpu = container_of(dev, struct pcpu, dev); | ||
98 | |||
99 | return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE)); | ||
100 | } | ||
101 | |||
102 | static ssize_t __ref store_online(struct device *dev, | ||
103 | struct device_attribute *attr, | ||
104 | const char *buf, size_t count) | ||
105 | { | ||
106 | struct pcpu *pcpu = container_of(dev, struct pcpu, dev); | ||
107 | unsigned long long val; | ||
108 | ssize_t ret; | ||
109 | |||
110 | if (!capable(CAP_SYS_ADMIN)) | ||
111 | return -EPERM; | ||
112 | |||
113 | if (kstrtoull(buf, 0, &val) < 0) | ||
114 | return -EINVAL; | ||
115 | |||
116 | switch (val) { | ||
117 | case 0: | ||
118 | ret = xen_pcpu_down(pcpu->cpu_id); | ||
119 | break; | ||
120 | case 1: | ||
121 | ret = xen_pcpu_up(pcpu->cpu_id); | ||
122 | break; | ||
123 | default: | ||
124 | ret = -EINVAL; | ||
125 | } | ||
126 | |||
127 | if (ret >= 0) | ||
128 | ret = count; | ||
129 | return ret; | ||
130 | } | ||
131 | static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online); | ||
132 | |||
133 | static bool xen_pcpu_online(uint32_t flags) | ||
134 | { | ||
135 | return !!(flags & XEN_PCPU_FLAGS_ONLINE); | ||
136 | } | ||
137 | |||
138 | static void pcpu_online_status(struct xenpf_pcpuinfo *info, | ||
139 | struct pcpu *pcpu) | ||
140 | { | ||
141 | if (xen_pcpu_online(info->flags) && | ||
142 | !xen_pcpu_online(pcpu->flags)) { | ||
143 | /* the pcpu is onlined */ | ||
144 | pcpu->flags |= XEN_PCPU_FLAGS_ONLINE; | ||
145 | kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE); | ||
146 | } else if (!xen_pcpu_online(info->flags) && | ||
147 | xen_pcpu_online(pcpu->flags)) { | ||
148 | /* The pcpu is offlined */ | ||
149 | pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE; | ||
150 | kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | static struct pcpu *get_pcpu(uint32_t cpu_id) | ||
155 | { | ||
156 | struct pcpu *pcpu; | ||
157 | |||
158 | list_for_each_entry(pcpu, &xen_pcpus, list) { | ||
159 | if (pcpu->cpu_id == cpu_id) | ||
160 | return pcpu; | ||
161 | } | ||
162 | |||
163 | return NULL; | ||
164 | } | ||
165 | |||
166 | static void pcpu_release(struct device *dev) | ||
167 | { | ||
168 | struct pcpu *pcpu = container_of(dev, struct pcpu, dev); | ||
169 | |||
170 | list_del(&pcpu->list); | ||
171 | kfree(pcpu); | ||
172 | } | ||
173 | |||
174 | static void unregister_and_remove_pcpu(struct pcpu *pcpu) | ||
175 | { | ||
176 | struct device *dev; | ||
177 | |||
178 | if (!pcpu) | ||
179 | return; | ||
180 | |||
181 | dev = &pcpu->dev; | ||
182 | if (dev->id) | ||
183 | device_remove_file(dev, &dev_attr_online); | ||
184 | |||
185 | /* pcpu remove would be implicitly done */ | ||
186 | device_unregister(dev); | ||
187 | } | ||
188 | |||
189 | static int register_pcpu(struct pcpu *pcpu) | ||
190 | { | ||
191 | struct device *dev; | ||
192 | int err = -EINVAL; | ||
193 | |||
194 | if (!pcpu) | ||
195 | return err; | ||
196 | |||
197 | dev = &pcpu->dev; | ||
198 | dev->bus = &xen_pcpu_subsys; | ||
199 | dev->id = pcpu->cpu_id; | ||
200 | dev->release = pcpu_release; | ||
201 | |||
202 | err = device_register(dev); | ||
203 | if (err) { | ||
204 | pcpu_release(dev); | ||
205 | return err; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Xen never offline cpu0 due to several restrictions | ||
210 | * and assumptions. This basically doesn't add a sys control | ||
211 | * to user, one cannot attempt to offline BSP. | ||
212 | */ | ||
213 | if (dev->id) { | ||
214 | err = device_create_file(dev, &dev_attr_online); | ||
215 | if (err) { | ||
216 | device_unregister(dev); | ||
217 | return err; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | static struct pcpu *create_and_register_pcpu(struct xenpf_pcpuinfo *info) | ||
225 | { | ||
226 | struct pcpu *pcpu; | ||
227 | int err; | ||
228 | |||
229 | if (info->flags & XEN_PCPU_FLAGS_INVALID) | ||
230 | return ERR_PTR(-ENODEV); | ||
231 | |||
232 | pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL); | ||
233 | if (!pcpu) | ||
234 | return ERR_PTR(-ENOMEM); | ||
235 | |||
236 | INIT_LIST_HEAD(&pcpu->list); | ||
237 | pcpu->cpu_id = info->xen_cpuid; | ||
238 | pcpu->flags = info->flags; | ||
239 | |||
240 | /* Need hold on xen_pcpu_lock before pcpu list manipulations */ | ||
241 | list_add_tail(&pcpu->list, &xen_pcpus); | ||
242 | |||
243 | err = register_pcpu(pcpu); | ||
244 | if (err) { | ||
245 | pr_warning(XEN_PCPU "Failed to register pcpu%u\n", | ||
246 | info->xen_cpuid); | ||
247 | return ERR_PTR(-ENOENT); | ||
248 | } | ||
249 | |||
250 | return pcpu; | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * Caller should hold the xen_pcpu_lock | ||
255 | */ | ||
256 | static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu) | ||
257 | { | ||
258 | int ret; | ||
259 | struct pcpu *pcpu = NULL; | ||
260 | struct xenpf_pcpuinfo *info; | ||
261 | struct xen_platform_op op = { | ||
262 | .cmd = XENPF_get_cpuinfo, | ||
263 | .interface_version = XENPF_INTERFACE_VERSION, | ||
264 | .u.pcpu_info.xen_cpuid = cpu, | ||
265 | }; | ||
266 | |||
267 | ret = HYPERVISOR_dom0_op(&op); | ||
268 | if (ret) | ||
269 | return ret; | ||
270 | |||
271 | info = &op.u.pcpu_info; | ||
272 | if (max_cpu) | ||
273 | *max_cpu = info->max_present; | ||
274 | |||
275 | pcpu = get_pcpu(cpu); | ||
276 | |||
277 | /* | ||
278 | * Only those at cpu present map has its sys interface. | ||
279 | */ | ||
280 | if (info->flags & XEN_PCPU_FLAGS_INVALID) { | ||
281 | if (pcpu) | ||
282 | unregister_and_remove_pcpu(pcpu); | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | if (!pcpu) { | ||
287 | pcpu = create_and_register_pcpu(info); | ||
288 | if (IS_ERR_OR_NULL(pcpu)) | ||
289 | return -ENODEV; | ||
290 | } else | ||
291 | pcpu_online_status(info, pcpu); | ||
292 | |||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * Sync dom0's pcpu information with xen hypervisor's | ||
298 | */ | ||
299 | static int xen_sync_pcpus(void) | ||
300 | { | ||
301 | /* | ||
302 | * Boot cpu always have cpu_id 0 in xen | ||
303 | */ | ||
304 | uint32_t cpu = 0, max_cpu = 0; | ||
305 | int err = 0; | ||
306 | struct pcpu *pcpu, *tmp; | ||
307 | |||
308 | mutex_lock(&xen_pcpu_lock); | ||
309 | |||
310 | while (!err && (cpu <= max_cpu)) { | ||
311 | err = sync_pcpu(cpu, &max_cpu); | ||
312 | cpu++; | ||
313 | } | ||
314 | |||
315 | if (err) | ||
316 | list_for_each_entry_safe(pcpu, tmp, &xen_pcpus, list) | ||
317 | unregister_and_remove_pcpu(pcpu); | ||
318 | |||
319 | mutex_unlock(&xen_pcpu_lock); | ||
320 | |||
321 | return err; | ||
322 | } | ||
323 | |||
324 | static void xen_pcpu_work_fn(struct work_struct *work) | ||
325 | { | ||
326 | xen_sync_pcpus(); | ||
327 | } | ||
328 | static DECLARE_WORK(xen_pcpu_work, xen_pcpu_work_fn); | ||
329 | |||
330 | static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id) | ||
331 | { | ||
332 | schedule_work(&xen_pcpu_work); | ||
333 | return IRQ_HANDLED; | ||
334 | } | ||
335 | |||
336 | static int __init xen_pcpu_init(void) | ||
337 | { | ||
338 | int irq, ret; | ||
339 | |||
340 | if (!xen_initial_domain()) | ||
341 | return -ENODEV; | ||
342 | |||
343 | irq = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0, | ||
344 | xen_pcpu_interrupt, 0, | ||
345 | "xen-pcpu", NULL); | ||
346 | if (irq < 0) { | ||
347 | pr_warning(XEN_PCPU "Failed to bind pcpu virq\n"); | ||
348 | return irq; | ||
349 | } | ||
350 | |||
351 | ret = subsys_system_register(&xen_pcpu_subsys, NULL); | ||
352 | if (ret) { | ||
353 | pr_warning(XEN_PCPU "Failed to register pcpu subsys\n"); | ||
354 | goto err1; | ||
355 | } | ||
356 | |||
357 | ret = xen_sync_pcpus(); | ||
358 | if (ret) { | ||
359 | pr_warning(XEN_PCPU "Failed to sync pcpu info\n"); | ||
360 | goto err2; | ||
361 | } | ||
362 | |||
363 | return 0; | ||
364 | |||
365 | err2: | ||
366 | bus_unregister(&xen_pcpu_subsys); | ||
367 | err1: | ||
368 | unbind_from_irqhandler(irq, NULL); | ||
369 | return ret; | ||
370 | } | ||
371 | arch_initcall(xen_pcpu_init); | ||
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 2389e581e23c..d4c50d63acbc 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c | |||
@@ -101,6 +101,19 @@ static int platform_pci_resume(struct pci_dev *pdev) | |||
101 | return 0; | 101 | return 0; |
102 | } | 102 | } |
103 | 103 | ||
104 | static void __devinit prepare_shared_info(void) | ||
105 | { | ||
106 | #ifdef CONFIG_KEXEC | ||
107 | unsigned long addr; | ||
108 | struct shared_info *hvm_shared_info; | ||
109 | |||
110 | addr = alloc_xen_mmio(PAGE_SIZE); | ||
111 | hvm_shared_info = ioremap(addr, PAGE_SIZE); | ||
112 | memset(hvm_shared_info, 0, PAGE_SIZE); | ||
113 | xen_hvm_prepare_kexec(hvm_shared_info, addr >> PAGE_SHIFT); | ||
114 | #endif | ||
115 | } | ||
116 | |||
104 | static int __devinit platform_pci_init(struct pci_dev *pdev, | 117 | static int __devinit platform_pci_init(struct pci_dev *pdev, |
105 | const struct pci_device_id *ent) | 118 | const struct pci_device_id *ent) |
106 | { | 119 | { |
@@ -109,6 +122,9 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, | |||
109 | long mmio_addr, mmio_len; | 122 | long mmio_addr, mmio_len; |
110 | unsigned int max_nr_gframes; | 123 | unsigned int max_nr_gframes; |
111 | 124 | ||
125 | if (!xen_domain()) | ||
126 | return -ENODEV; | ||
127 | |||
112 | i = pci_enable_device(pdev); | 128 | i = pci_enable_device(pdev); |
113 | if (i) | 129 | if (i) |
114 | return i; | 130 | return i; |
@@ -135,6 +151,8 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, | |||
135 | platform_mmio = mmio_addr; | 151 | platform_mmio = mmio_addr; |
136 | platform_mmiolen = mmio_len; | 152 | platform_mmiolen = mmio_len; |
137 | 153 | ||
154 | prepare_shared_info(); | ||
155 | |||
138 | if (!xen_have_vector_callback) { | 156 | if (!xen_have_vector_callback) { |
139 | ret = xen_allocate_irq(pdev); | 157 | ret = xen_allocate_irq(pdev); |
140 | if (ret) { | 158 | if (ret) { |
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 7ff2569e17ae..b590ee067fcd 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c | |||
@@ -520,15 +520,18 @@ static int __init xen_acpi_processor_init(void) | |||
520 | 520 | ||
521 | if (!pr_backup) { | 521 | if (!pr_backup) { |
522 | pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); | 522 | pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); |
523 | memcpy(pr_backup, _pr, sizeof(struct acpi_processor)); | 523 | if (pr_backup) |
524 | memcpy(pr_backup, _pr, sizeof(struct acpi_processor)); | ||
524 | } | 525 | } |
525 | (void)upload_pm_data(_pr); | 526 | (void)upload_pm_data(_pr); |
526 | } | 527 | } |
527 | rc = check_acpi_ids(pr_backup); | 528 | rc = check_acpi_ids(pr_backup); |
528 | if (rc) | ||
529 | goto err_unregister; | ||
530 | 529 | ||
531 | kfree(pr_backup); | 530 | kfree(pr_backup); |
531 | pr_backup = NULL; | ||
532 | |||
533 | if (rc) | ||
534 | goto err_unregister; | ||
532 | 535 | ||
533 | return 0; | 536 | return 0; |
534 | err_unregister: | 537 | err_unregister: |
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index d1c217b23a42..bce15cf4a8df 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c | |||
@@ -618,6 +618,23 @@ static struct xenbus_watch *find_watch(const char *token) | |||
618 | return NULL; | 618 | return NULL; |
619 | } | 619 | } |
620 | 620 | ||
621 | static void xs_reset_watches(void) | ||
622 | { | ||
623 | int err, supported = 0; | ||
624 | |||
625 | if (!xen_hvm_domain()) | ||
626 | return; | ||
627 | |||
628 | err = xenbus_scanf(XBT_NIL, "control", | ||
629 | "platform-feature-xs_reset_watches", "%d", &supported); | ||
630 | if (err != 1 || !supported) | ||
631 | return; | ||
632 | |||
633 | err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); | ||
634 | if (err && err != -EEXIST) | ||
635 | printk(KERN_WARNING "xs_reset_watches failed: %d\n", err); | ||
636 | } | ||
637 | |||
621 | /* Register callback to watch this node. */ | 638 | /* Register callback to watch this node. */ |
622 | int register_xenbus_watch(struct xenbus_watch *watch) | 639 | int register_xenbus_watch(struct xenbus_watch *watch) |
623 | { | 640 | { |
@@ -900,5 +917,8 @@ int xs_init(void) | |||
900 | if (IS_ERR(task)) | 917 | if (IS_ERR(task)) |
901 | return PTR_ERR(task); | 918 | return PTR_ERR(task); |
902 | 919 | ||
920 | /* shutdown watches for kexec boot */ | ||
921 | xs_reset_watches(); | ||
922 | |||
903 | return 0; | 923 | return 0; |
904 | } | 924 | } |
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 0549d2115507..e0deeb2cc939 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h | |||
@@ -35,6 +35,7 @@ | |||
35 | #define MPT_MINOR 220 | 35 | #define MPT_MINOR 220 |
36 | #define MPT2SAS_MINOR 221 | 36 | #define MPT2SAS_MINOR 221 |
37 | #define UINPUT_MINOR 223 | 37 | #define UINPUT_MINOR 223 |
38 | #define MISC_MCELOG_MINOR 227 | ||
38 | #define HPET_MINOR 228 | 39 | #define HPET_MINOR 228 |
39 | #define FUSE_MINOR 229 | 40 | #define FUSE_MINOR 229 |
40 | #define KVM_MINOR 232 | 41 | #define KVM_MINOR 232 |
diff --git a/include/xen/events.h b/include/xen/events.h index 04399b28e821..9c641deb65d2 100644 --- a/include/xen/events.h +++ b/include/xen/events.h | |||
@@ -58,6 +58,8 @@ void notify_remote_via_irq(int irq); | |||
58 | 58 | ||
59 | void xen_irq_resume(void); | 59 | void xen_irq_resume(void); |
60 | 60 | ||
61 | void xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn); | ||
62 | |||
61 | /* Clear an irq's pending state, in preparation for polling on it */ | 63 | /* Clear an irq's pending state, in preparation for polling on it */ |
62 | void xen_clear_irq_pending(int irq); | 64 | void xen_clear_irq_pending(int irq); |
63 | void xen_set_irq_pending(int irq); | 65 | void xen_set_irq_pending(int irq); |
diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h index 7cdfca24eafb..794deb07eb53 100644 --- a/include/xen/interface/io/xs_wire.h +++ b/include/xen/interface/io/xs_wire.h | |||
@@ -29,7 +29,8 @@ enum xsd_sockmsg_type | |||
29 | XS_IS_DOMAIN_INTRODUCED, | 29 | XS_IS_DOMAIN_INTRODUCED, |
30 | XS_RESUME, | 30 | XS_RESUME, |
31 | XS_SET_TARGET, | 31 | XS_SET_TARGET, |
32 | XS_RESTRICT | 32 | XS_RESTRICT, |
33 | XS_RESET_WATCHES, | ||
33 | }; | 34 | }; |
34 | 35 | ||
35 | #define XS_WRITE_NONE "NONE" | 36 | #define XS_WRITE_NONE "NONE" |
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index 486653f0dd8f..61fa66160983 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h | |||
@@ -314,6 +314,13 @@ struct xenpf_pcpuinfo { | |||
314 | }; | 314 | }; |
315 | DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo); | 315 | DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo); |
316 | 316 | ||
317 | #define XENPF_cpu_online 56 | ||
318 | #define XENPF_cpu_offline 57 | ||
319 | struct xenpf_cpu_ol { | ||
320 | uint32_t cpuid; | ||
321 | }; | ||
322 | DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol); | ||
323 | |||
317 | struct xen_platform_op { | 324 | struct xen_platform_op { |
318 | uint32_t cmd; | 325 | uint32_t cmd; |
319 | uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ | 326 | uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ |
@@ -330,6 +337,7 @@ struct xen_platform_op { | |||
330 | struct xenpf_getidletime getidletime; | 337 | struct xenpf_getidletime getidletime; |
331 | struct xenpf_set_processor_pminfo set_pminfo; | 338 | struct xenpf_set_processor_pminfo set_pminfo; |
332 | struct xenpf_pcpuinfo pcpu_info; | 339 | struct xenpf_pcpuinfo pcpu_info; |
340 | struct xenpf_cpu_ol cpu_ol; | ||
333 | uint8_t pad[128]; | 341 | uint8_t pad[128]; |
334 | } u; | 342 | } u; |
335 | }; | 343 | }; |
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h new file mode 100644 index 000000000000..73a4ea714d93 --- /dev/null +++ b/include/xen/interface/xen-mca.h | |||
@@ -0,0 +1,385 @@ | |||
1 | /****************************************************************************** | ||
2 | * arch-x86/mca.h | ||
3 | * Guest OS machine check interface to x86 Xen. | ||
4 | * | ||
5 | * Contributed by Advanced Micro Devices, Inc. | ||
6 | * Author: Christoph Egger <Christoph.Egger@amd.com> | ||
7 | * | ||
8 | * Updated by Intel Corporation | ||
9 | * Author: Liu, Jinsong <jinsong.liu@intel.com> | ||
10 | * | ||
11 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
12 | * of this software and associated documentation files (the "Software"), to | ||
13 | * deal in the Software without restriction, including without limitation the | ||
14 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
15 | * sell copies of the Software, and to permit persons to whom the Software is | ||
16 | * furnished to do so, subject to the following conditions: | ||
17 | * | ||
18 | * The above copyright notice and this permission notice shall be included in | ||
19 | * all copies or substantial portions of the Software. | ||
20 | * | ||
21 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
22 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
23 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
24 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
25 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
26 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
27 | * DEALINGS IN THE SOFTWARE. | ||
28 | */ | ||
29 | |||
30 | #ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ | ||
31 | #define __XEN_PUBLIC_ARCH_X86_MCA_H__ | ||
32 | |||
33 | /* Hypercall */ | ||
34 | #define __HYPERVISOR_mca __HYPERVISOR_arch_0 | ||
35 | |||
36 | #define XEN_MCA_INTERFACE_VERSION 0x01ecc003 | ||
37 | |||
38 | /* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */ | ||
39 | #define XEN_MC_NONURGENT 0x1 | ||
40 | /* IN: Dom0 calls hypercall to retrieve urgent error log entry */ | ||
41 | #define XEN_MC_URGENT 0x2 | ||
42 | /* IN: Dom0 acknowledges previosly-fetched error log entry */ | ||
43 | #define XEN_MC_ACK 0x4 | ||
44 | |||
45 | /* OUT: All is ok */ | ||
46 | #define XEN_MC_OK 0x0 | ||
47 | /* OUT: Domain could not fetch data. */ | ||
48 | #define XEN_MC_FETCHFAILED 0x1 | ||
49 | /* OUT: There was no machine check data to fetch. */ | ||
50 | #define XEN_MC_NODATA 0x2 | ||
51 | |||
52 | #ifndef __ASSEMBLY__ | ||
53 | /* vIRQ injected to Dom0 */ | ||
54 | #define VIRQ_MCA VIRQ_ARCH_0 | ||
55 | |||
56 | /* | ||
57 | * mc_info entry types | ||
58 | * mca machine check info are recorded in mc_info entries. | ||
59 | * when fetch mca info, it can use MC_TYPE_... to distinguish | ||
60 | * different mca info. | ||
61 | */ | ||
62 | #define MC_TYPE_GLOBAL 0 | ||
63 | #define MC_TYPE_BANK 1 | ||
64 | #define MC_TYPE_EXTENDED 2 | ||
65 | #define MC_TYPE_RECOVERY 3 | ||
66 | |||
67 | struct mcinfo_common { | ||
68 | uint16_t type; /* structure type */ | ||
69 | uint16_t size; /* size of this struct in bytes */ | ||
70 | }; | ||
71 | |||
72 | #define MC_FLAG_CORRECTABLE (1 << 0) | ||
73 | #define MC_FLAG_UNCORRECTABLE (1 << 1) | ||
74 | #define MC_FLAG_RECOVERABLE (1 << 2) | ||
75 | #define MC_FLAG_POLLED (1 << 3) | ||
76 | #define MC_FLAG_RESET (1 << 4) | ||
77 | #define MC_FLAG_CMCI (1 << 5) | ||
78 | #define MC_FLAG_MCE (1 << 6) | ||
79 | |||
80 | /* contains x86 global mc information */ | ||
81 | struct mcinfo_global { | ||
82 | struct mcinfo_common common; | ||
83 | |||
84 | uint16_t mc_domid; /* running domain at the time in error */ | ||
85 | uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ | ||
86 | uint32_t mc_socketid; /* physical socket of the physical core */ | ||
87 | uint16_t mc_coreid; /* physical impacted core */ | ||
88 | uint16_t mc_core_threadid; /* core thread of physical core */ | ||
89 | uint32_t mc_apicid; | ||
90 | uint32_t mc_flags; | ||
91 | uint64_t mc_gstatus; /* global status */ | ||
92 | }; | ||
93 | |||
94 | /* contains x86 bank mc information */ | ||
95 | struct mcinfo_bank { | ||
96 | struct mcinfo_common common; | ||
97 | |||
98 | uint16_t mc_bank; /* bank nr */ | ||
99 | uint16_t mc_domid; /* domain referenced by mc_addr if valid */ | ||
100 | uint64_t mc_status; /* bank status */ | ||
101 | uint64_t mc_addr; /* bank address */ | ||
102 | uint64_t mc_misc; | ||
103 | uint64_t mc_ctrl2; | ||
104 | uint64_t mc_tsc; | ||
105 | }; | ||
106 | |||
107 | struct mcinfo_msr { | ||
108 | uint64_t reg; /* MSR */ | ||
109 | uint64_t value; /* MSR value */ | ||
110 | }; | ||
111 | |||
112 | /* contains mc information from other or additional mc MSRs */ | ||
113 | struct mcinfo_extended { | ||
114 | struct mcinfo_common common; | ||
115 | uint32_t mc_msrs; /* Number of msr with valid values. */ | ||
116 | /* | ||
117 | * Currently Intel extended MSR (32/64) include all gp registers | ||
118 | * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be | ||
119 | * useful at present. So expand this array to 16/32 to leave room. | ||
120 | */ | ||
121 | struct mcinfo_msr mc_msr[sizeof(void *) * 4]; | ||
122 | }; | ||
123 | |||
124 | /* Recovery Action flags. Giving recovery result information to DOM0 */ | ||
125 | |||
126 | /* Xen takes successful recovery action, the error is recovered */ | ||
127 | #define REC_ACTION_RECOVERED (0x1 << 0) | ||
128 | /* No action is performed by XEN */ | ||
129 | #define REC_ACTION_NONE (0x1 << 1) | ||
130 | /* It's possible DOM0 might take action ownership in some case */ | ||
131 | #define REC_ACTION_NEED_RESET (0x1 << 2) | ||
132 | |||
133 | /* | ||
134 | * Different Recovery Action types, if the action is performed successfully, | ||
135 | * REC_ACTION_RECOVERED flag will be returned. | ||
136 | */ | ||
137 | |||
138 | /* Page Offline Action */ | ||
139 | #define MC_ACTION_PAGE_OFFLINE (0x1 << 0) | ||
140 | /* CPU offline Action */ | ||
141 | #define MC_ACTION_CPU_OFFLINE (0x1 << 1) | ||
142 | /* L3 cache disable Action */ | ||
143 | #define MC_ACTION_CACHE_SHRINK (0x1 << 2) | ||
144 | |||
145 | /* | ||
146 | * Below interface used between XEN/DOM0 for passing XEN's recovery action | ||
147 | * information to DOM0. | ||
148 | */ | ||
149 | struct page_offline_action { | ||
150 | /* Params for passing the offlined page number to DOM0 */ | ||
151 | uint64_t mfn; | ||
152 | uint64_t status; | ||
153 | }; | ||
154 | |||
155 | struct cpu_offline_action { | ||
156 | /* Params for passing the identity of the offlined CPU to DOM0 */ | ||
157 | uint32_t mc_socketid; | ||
158 | uint16_t mc_coreid; | ||
159 | uint16_t mc_core_threadid; | ||
160 | }; | ||
161 | |||
162 | #define MAX_UNION_SIZE 16 | ||
163 | struct mcinfo_recovery { | ||
164 | struct mcinfo_common common; | ||
165 | uint16_t mc_bank; /* bank nr */ | ||
166 | uint8_t action_flags; | ||
167 | uint8_t action_types; | ||
168 | union { | ||
169 | struct page_offline_action page_retire; | ||
170 | struct cpu_offline_action cpu_offline; | ||
171 | uint8_t pad[MAX_UNION_SIZE]; | ||
172 | } action_info; | ||
173 | }; | ||
174 | |||
175 | |||
176 | #define MCINFO_MAXSIZE 768 | ||
177 | struct mc_info { | ||
178 | /* Number of mcinfo_* entries in mi_data */ | ||
179 | uint32_t mi_nentries; | ||
180 | uint32_t flags; | ||
181 | uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; | ||
182 | }; | ||
183 | DEFINE_GUEST_HANDLE_STRUCT(mc_info); | ||
184 | |||
185 | #define __MC_MSR_ARRAYSIZE 8 | ||
186 | #define __MC_MSR_MCGCAP 0 | ||
187 | #define __MC_NMSRS 1 | ||
188 | #define MC_NCAPS 7 | ||
189 | struct mcinfo_logical_cpu { | ||
190 | uint32_t mc_cpunr; | ||
191 | uint32_t mc_chipid; | ||
192 | uint16_t mc_coreid; | ||
193 | uint16_t mc_threadid; | ||
194 | uint32_t mc_apicid; | ||
195 | uint32_t mc_clusterid; | ||
196 | uint32_t mc_ncores; | ||
197 | uint32_t mc_ncores_active; | ||
198 | uint32_t mc_nthreads; | ||
199 | uint32_t mc_cpuid_level; | ||
200 | uint32_t mc_family; | ||
201 | uint32_t mc_vendor; | ||
202 | uint32_t mc_model; | ||
203 | uint32_t mc_step; | ||
204 | char mc_vendorid[16]; | ||
205 | char mc_brandid[64]; | ||
206 | uint32_t mc_cpu_caps[MC_NCAPS]; | ||
207 | uint32_t mc_cache_size; | ||
208 | uint32_t mc_cache_alignment; | ||
209 | uint32_t mc_nmsrvals; | ||
210 | struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; | ||
211 | }; | ||
212 | DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu); | ||
213 | |||
214 | /* | ||
215 | * Prototype: | ||
216 | * uint32_t x86_mcinfo_nentries(struct mc_info *mi); | ||
217 | */ | ||
218 | #define x86_mcinfo_nentries(_mi) \ | ||
219 | ((_mi)->mi_nentries) | ||
220 | /* | ||
221 | * Prototype: | ||
222 | * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); | ||
223 | */ | ||
224 | #define x86_mcinfo_first(_mi) \ | ||
225 | ((struct mcinfo_common *)(_mi)->mi_data) | ||
226 | /* | ||
227 | * Prototype: | ||
228 | * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); | ||
229 | */ | ||
230 | #define x86_mcinfo_next(_mic) \ | ||
231 | ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) | ||
232 | |||
233 | /* | ||
234 | * Prototype: | ||
235 | * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); | ||
236 | */ | ||
237 | static inline void x86_mcinfo_lookup(struct mcinfo_common **ret, | ||
238 | struct mc_info *mi, uint16_t type) | ||
239 | { | ||
240 | uint32_t i; | ||
241 | struct mcinfo_common *mic; | ||
242 | bool found = 0; | ||
243 | |||
244 | if (!ret || !mi) | ||
245 | return; | ||
246 | |||
247 | mic = x86_mcinfo_first(mi); | ||
248 | for (i = 0; i < x86_mcinfo_nentries(mi); i++) { | ||
249 | if (mic->type == type) { | ||
250 | found = 1; | ||
251 | break; | ||
252 | } | ||
253 | mic = x86_mcinfo_next(mic); | ||
254 | } | ||
255 | |||
256 | *ret = found ? mic : NULL; | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * Fetch machine check data from hypervisor. | ||
261 | */ | ||
262 | #define XEN_MC_fetch 1 | ||
263 | struct xen_mc_fetch { | ||
264 | /* | ||
265 | * IN: XEN_MC_NONURGENT, XEN_MC_URGENT, | ||
266 | * XEN_MC_ACK if ack'king an earlier fetch | ||
267 | * OUT: XEN_MC_OK, XEN_MC_FETCHAILED, XEN_MC_NODATA | ||
268 | */ | ||
269 | uint32_t flags; | ||
270 | uint32_t _pad0; | ||
271 | /* OUT: id for ack, IN: id we are ack'ing */ | ||
272 | uint64_t fetch_id; | ||
273 | |||
274 | /* OUT variables. */ | ||
275 | GUEST_HANDLE(mc_info) data; | ||
276 | }; | ||
277 | DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch); | ||
278 | |||
279 | |||
280 | /* | ||
281 | * This tells the hypervisor to notify a DomU about the machine check error | ||
282 | */ | ||
283 | #define XEN_MC_notifydomain 2 | ||
284 | struct xen_mc_notifydomain { | ||
285 | /* IN variables */ | ||
286 | uint16_t mc_domid; /* The unprivileged domain to notify */ | ||
287 | uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify */ | ||
288 | |||
289 | /* IN/OUT variables */ | ||
290 | uint32_t flags; | ||
291 | }; | ||
292 | DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain); | ||
293 | |||
294 | #define XEN_MC_physcpuinfo 3 | ||
295 | struct xen_mc_physcpuinfo { | ||
296 | /* IN/OUT */ | ||
297 | uint32_t ncpus; | ||
298 | uint32_t _pad0; | ||
299 | /* OUT */ | ||
300 | GUEST_HANDLE(mcinfo_logical_cpu) info; | ||
301 | }; | ||
302 | |||
303 | #define XEN_MC_msrinject 4 | ||
304 | #define MC_MSRINJ_MAXMSRS 8 | ||
305 | struct xen_mc_msrinject { | ||
306 | /* IN */ | ||
307 | uint32_t mcinj_cpunr; /* target processor id */ | ||
308 | uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ | ||
309 | uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ | ||
310 | uint32_t _pad0; | ||
311 | struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; | ||
312 | }; | ||
313 | |||
314 | /* Flags for mcinj_flags above; bits 16-31 are reserved */ | ||
315 | #define MC_MSRINJ_F_INTERPOSE 0x1 | ||
316 | |||
317 | #define XEN_MC_mceinject 5 | ||
318 | struct xen_mc_mceinject { | ||
319 | unsigned int mceinj_cpunr; /* target processor id */ | ||
320 | }; | ||
321 | |||
322 | struct xen_mc { | ||
323 | uint32_t cmd; | ||
324 | uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ | ||
325 | union { | ||
326 | struct xen_mc_fetch mc_fetch; | ||
327 | struct xen_mc_notifydomain mc_notifydomain; | ||
328 | struct xen_mc_physcpuinfo mc_physcpuinfo; | ||
329 | struct xen_mc_msrinject mc_msrinject; | ||
330 | struct xen_mc_mceinject mc_mceinject; | ||
331 | } u; | ||
332 | }; | ||
333 | DEFINE_GUEST_HANDLE_STRUCT(xen_mc); | ||
334 | |||
335 | /* Fields are zero when not available */ | ||
336 | struct xen_mce { | ||
337 | __u64 status; | ||
338 | __u64 misc; | ||
339 | __u64 addr; | ||
340 | __u64 mcgstatus; | ||
341 | __u64 ip; | ||
342 | __u64 tsc; /* cpu time stamp counter */ | ||
343 | __u64 time; /* wall time_t when error was detected */ | ||
344 | __u8 cpuvendor; /* cpu vendor as encoded in system.h */ | ||
345 | __u8 inject_flags; /* software inject flags */ | ||
346 | __u16 pad; | ||
347 | __u32 cpuid; /* CPUID 1 EAX */ | ||
348 | __u8 cs; /* code segment */ | ||
349 | __u8 bank; /* machine check bank */ | ||
350 | __u8 cpu; /* cpu number; obsolete; use extcpu now */ | ||
351 | __u8 finished; /* entry is valid */ | ||
352 | __u32 extcpu; /* linux cpu number that detected the error */ | ||
353 | __u32 socketid; /* CPU socket ID */ | ||
354 | __u32 apicid; /* CPU initial apic ID */ | ||
355 | __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ | ||
356 | }; | ||
357 | |||
358 | /* | ||
359 | * This structure contains all data related to the MCE log. Also | ||
360 | * carries a signature to make it easier to find from external | ||
361 | * debugging tools. Each entry is only valid when its finished flag | ||
362 | * is set. | ||
363 | */ | ||
364 | |||
365 | #define XEN_MCE_LOG_LEN 32 | ||
366 | |||
367 | struct xen_mce_log { | ||
368 | char signature[12]; /* "MACHINECHECK" */ | ||
369 | unsigned len; /* = XEN_MCE_LOG_LEN */ | ||
370 | unsigned next; | ||
371 | unsigned flags; | ||
372 | unsigned recordlen; /* length of struct xen_mce */ | ||
373 | struct xen_mce entry[XEN_MCE_LOG_LEN]; | ||
374 | }; | ||
375 | |||
376 | #define XEN_MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ | ||
377 | |||
378 | #define XEN_MCE_LOG_SIGNATURE "MACHINECHECK" | ||
379 | |||
380 | #define MCE_GET_RECORD_LEN _IOR('M', 1, int) | ||
381 | #define MCE_GET_LOG_LEN _IOR('M', 2, int) | ||
382 | #define MCE_GETCLEAR_FLAGS _IOR('M', 3, int) | ||
383 | |||
384 | #endif /* __ASSEMBLY__ */ | ||
385 | #endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ | ||
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index a890804945e3..0801468f9abe 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h | |||
@@ -80,6 +80,7 @@ | |||
80 | #define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ | 80 | #define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ |
81 | #define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ | 81 | #define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ |
82 | #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ | 82 | #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ |
83 | #define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */ | ||
83 | 84 | ||
84 | /* Architecture-specific VIRQ definitions. */ | 85 | /* Architecture-specific VIRQ definitions. */ |
85 | #define VIRQ_ARCH_0 16 | 86 | #define VIRQ_ARCH_0 16 |