aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS1
-rw-r--r--Documentation/tpm/xen-tpmfront.txt113
-rw-r--r--MAINTAINERS16
-rw-r--r--arch/x86/include/asm/xen/events.h1
-rw-r--r--arch/x86/xen/enlighten.c15
-rw-r--r--arch/x86/xen/irq.c25
-rw-r--r--arch/x86/xen/p2m.c22
-rw-r--r--arch/x86/xen/setup.c29
-rw-r--r--arch/x86/xen/smp.c6
-rw-r--r--drivers/char/tpm/Kconfig12
-rw-r--r--drivers/char/tpm/Makefile1
-rw-r--r--drivers/char/tpm/xen-tpmfront.c473
-rw-r--r--drivers/xen/balloon.c74
-rw-r--r--drivers/xen/events.c30
-rw-r--r--drivers/xen/evtchn.c191
-rw-r--r--drivers/xen/gntdev.c11
-rw-r--r--drivers/xen/grant-table.c13
-rw-r--r--drivers/xen/privcmd.c83
-rw-r--r--drivers/xen/swiotlb-xen.c8
-rw-r--r--drivers/xen/xen-selfballoon.c54
-rw-r--r--include/xen/balloon.h3
-rw-r--r--include/xen/interface/io/tpmif.h52
-rw-r--r--include/xen/interface/vcpu.h2
-rw-r--r--lib/swiotlb.c8
24 files changed, 1057 insertions, 186 deletions
diff --git a/CREDITS b/CREDITS
index 206d0fcf07a5..646a0a9ad6d1 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1120,6 +1120,7 @@ D: author of userfs filesystem
1120D: Improved mmap and munmap handling 1120D: Improved mmap and munmap handling
1121D: General mm minor tidyups 1121D: General mm minor tidyups
1122D: autofs v4 maintainer 1122D: autofs v4 maintainer
1123D: Xen subsystem
1123S: 987 Alabama St 1124S: 987 Alabama St
1124S: San Francisco 1125S: San Francisco
1125S: CA, 94110 1126S: CA, 94110
diff --git a/Documentation/tpm/xen-tpmfront.txt b/Documentation/tpm/xen-tpmfront.txt
new file mode 100644
index 000000000000..69346de87ff3
--- /dev/null
+++ b/Documentation/tpm/xen-tpmfront.txt
@@ -0,0 +1,113 @@
1Virtual TPM interface for Xen
2
3Authors: Matthew Fioravante (JHUAPL), Daniel De Graaf (NSA)
4
5This document describes the virtual Trusted Platform Module (vTPM) subsystem for
6Xen. The reader is assumed to have familiarity with building and installing Xen,
7Linux, and a basic understanding of the TPM and vTPM concepts.
8
9INTRODUCTION
10
11The goal of this work is to provide a TPM functionality to a virtual guest
12operating system (in Xen terms, a DomU). This allows programs to interact with
13a TPM in a virtual system the same way they interact with a TPM on the physical
14system. Each guest gets its own unique, emulated, software TPM. However, each
15of the vTPM's secrets (Keys, NVRAM, etc) are managed by a vTPM Manager domain,
16which seals the secrets to the Physical TPM. If the process of creating each of
17these domains (manager, vTPM, and guest) is trusted, the vTPM subsystem extends
18the chain of trust rooted in the hardware TPM to virtual machines in Xen. Each
19major component of vTPM is implemented as a separate domain, providing secure
20separation guaranteed by the hypervisor. The vTPM domains are implemented in
21mini-os to reduce memory and processor overhead.
22
23This mini-os vTPM subsystem was built on top of the previous vTPM work done by
24IBM and Intel corporation.
25
26
27DESIGN OVERVIEW
28---------------
29
30The architecture of vTPM is described below:
31
32+------------------+
33| Linux DomU | ...
34| | ^ |
35| v | |
36| xen-tpmfront |
37+------------------+
38 | ^
39 v |
40+------------------+
41| mini-os/tpmback |
42| | ^ |
43| v | |
44| vtpm-stubdom | ...
45| | ^ |
46| v | |
47| mini-os/tpmfront |
48+------------------+
49 | ^
50 v |
51+------------------+
52| mini-os/tpmback |
53| | ^ |
54| v | |
55| vtpmmgr-stubdom |
56| | ^ |
57| v | |
58| mini-os/tpm_tis |
59+------------------+
60 | ^
61 v |
62+------------------+
63| Hardware TPM |
64+------------------+
65
66 * Linux DomU: The Linux based guest that wants to use a vTPM. There may be
67 more than one of these.
68
69 * xen-tpmfront.ko: Linux kernel virtual TPM frontend driver. This driver
70 provides vTPM access to a Linux-based DomU.
71
72 * mini-os/tpmback: Mini-os TPM backend driver. The Linux frontend driver
73 connects to this backend driver to facilitate communications
74 between the Linux DomU and its vTPM. This driver is also
75 used by vtpmmgr-stubdom to communicate with vtpm-stubdom.
76
77 * vtpm-stubdom: A mini-os stub domain that implements a vTPM. There is a
78 one to one mapping between running vtpm-stubdom instances and
79 logical vtpms on the system. The vTPM Platform Configuration
80 Registers (PCRs) are normally all initialized to zero.
81
82 * mini-os/tpmfront: Mini-os TPM frontend driver. The vTPM mini-os domain
83 vtpm-stubdom uses this driver to communicate with
84 vtpmmgr-stubdom. This driver is also used in mini-os
85 domains such as pv-grub that talk to the vTPM domain.
86
87 * vtpmmgr-stubdom: A mini-os domain that implements the vTPM manager. There is
88 only one vTPM manager and it should be running during the
89 entire lifetime of the machine. This domain regulates
90 access to the physical TPM on the system and secures the
91 persistent state of each vTPM.
92
93 * mini-os/tpm_tis: Mini-os TPM version 1.2 TPM Interface Specification (TIS)
94 driver. This driver used by vtpmmgr-stubdom to talk directly to
95 the hardware TPM. Communication is facilitated by mapping
96 hardware memory pages into vtpmmgr-stubdom.
97
98 * Hardware TPM: The physical TPM that is soldered onto the motherboard.
99
100
101INTEGRATION WITH XEN
102--------------------
103
104Support for the vTPM driver was added in Xen using the libxl toolstack in Xen
1054.3. See the Xen documentation (docs/misc/vtpm.txt) for details on setting up
106the vTPM and vTPM Manager stub domains. Once the stub domains are running, a
107vTPM device is set up in the same manner as a disk or network device in the
108domain's configuration file.
109
110In order to use features such as IMA that require a TPM to be loaded prior to
111the initrd, the xen-tpmfront driver must be compiled in to the kernel. If not
112using such features, the driver can be compiled as a module and will be loaded
113as usual.
diff --git a/MAINTAINERS b/MAINTAINERS
index 49a692e2aa57..4bb5689000f8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9278,9 +9278,9 @@ F: drivers/media/tuners/tuner-xc2028.*
9278 9278
9279XEN HYPERVISOR INTERFACE 9279XEN HYPERVISOR INTERFACE
9280M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> 9280M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
9281M: Jeremy Fitzhardinge <jeremy@goop.org> 9281M: Boris Ostrovsky <boris.ostrovsky@oracle.com>
9282L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9282M: David Vrabel <david.vrabel@citrix.com>
9283L: virtualization@lists.linux-foundation.org 9283L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9284S: Supported 9284S: Supported
9285F: arch/x86/xen/ 9285F: arch/x86/xen/
9286F: drivers/*/xen-*front.c 9286F: drivers/*/xen-*front.c
@@ -9291,35 +9291,35 @@ F: include/uapi/xen/
9291 9291
9292XEN HYPERVISOR ARM 9292XEN HYPERVISOR ARM
9293M: Stefano Stabellini <stefano.stabellini@eu.citrix.com> 9293M: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
9294L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9294L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9295S: Supported 9295S: Supported
9296F: arch/arm/xen/ 9296F: arch/arm/xen/
9297F: arch/arm/include/asm/xen/ 9297F: arch/arm/include/asm/xen/
9298 9298
9299XEN HYPERVISOR ARM64 9299XEN HYPERVISOR ARM64
9300M: Stefano Stabellini <stefano.stabellini@eu.citrix.com> 9300M: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
9301L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9301L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9302S: Supported 9302S: Supported
9303F: arch/arm64/xen/ 9303F: arch/arm64/xen/
9304F: arch/arm64/include/asm/xen/ 9304F: arch/arm64/include/asm/xen/
9305 9305
9306XEN NETWORK BACKEND DRIVER 9306XEN NETWORK BACKEND DRIVER
9307M: Ian Campbell <ian.campbell@citrix.com> 9307M: Ian Campbell <ian.campbell@citrix.com>
9308L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9308L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9309L: netdev@vger.kernel.org 9309L: netdev@vger.kernel.org
9310S: Supported 9310S: Supported
9311F: drivers/net/xen-netback/* 9311F: drivers/net/xen-netback/*
9312 9312
9313XEN PCI SUBSYSTEM 9313XEN PCI SUBSYSTEM
9314M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> 9314M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
9315L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9315L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9316S: Supported 9316S: Supported
9317F: arch/x86/pci/*xen* 9317F: arch/x86/pci/*xen*
9318F: drivers/pci/*xen* 9318F: drivers/pci/*xen*
9319 9319
9320XEN SWIOTLB SUBSYSTEM 9320XEN SWIOTLB SUBSYSTEM
9321M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> 9321M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
9322L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9322L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9323S: Supported 9323S: Supported
9324F: arch/x86/xen/*swiotlb* 9324F: arch/x86/xen/*swiotlb*
9325F: drivers/xen/*swiotlb* 9325F: drivers/xen/*swiotlb*
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index ca842f2769ef..608a79d5a466 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -7,6 +7,7 @@ enum ipi_vector {
7 XEN_CALL_FUNCTION_SINGLE_VECTOR, 7 XEN_CALL_FUNCTION_SINGLE_VECTOR,
8 XEN_SPIN_UNLOCK_VECTOR, 8 XEN_SPIN_UNLOCK_VECTOR,
9 XEN_IRQ_WORK_VECTOR, 9 XEN_IRQ_WORK_VECTOR,
10 XEN_NMI_VECTOR,
10 11
11 XEN_NR_IPIS, 12 XEN_NR_IPIS,
12}; 13};
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2fcaedc0b739..2fc216dfbd9c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -427,8 +427,7 @@ static void __init xen_init_cpuid_mask(void)
427 427
428 if (!xen_initial_domain()) 428 if (!xen_initial_domain())
429 cpuid_leaf1_edx_mask &= 429 cpuid_leaf1_edx_mask &=
430 ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ 430 ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */
431 (1 << X86_FEATURE_ACPI)); /* disable ACPI */
432 431
433 cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); 432 cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32));
434 433
@@ -735,8 +734,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
735 addr = (unsigned long)xen_int3; 734 addr = (unsigned long)xen_int3;
736 else if (addr == (unsigned long)stack_segment) 735 else if (addr == (unsigned long)stack_segment)
737 addr = (unsigned long)xen_stack_segment; 736 addr = (unsigned long)xen_stack_segment;
738 else if (addr == (unsigned long)double_fault || 737 else if (addr == (unsigned long)double_fault) {
739 addr == (unsigned long)nmi) {
740 /* Don't need to handle these */ 738 /* Don't need to handle these */
741 return 0; 739 return 0;
742#ifdef CONFIG_X86_MCE 740#ifdef CONFIG_X86_MCE
@@ -747,7 +745,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
747 */ 745 */
748 ; 746 ;
749#endif 747#endif
750 } else { 748 } else if (addr == (unsigned long)nmi)
749 /*
750 * Use the native version as well.
751 */
752 ;
753 else {
751 /* Some other trap using IST? */ 754 /* Some other trap using IST? */
752 if (WARN_ON(val->ist != 0)) 755 if (WARN_ON(val->ist != 0))
753 return 0; 756 return 0;
@@ -1710,6 +1713,8 @@ static void __init xen_hvm_guest_init(void)
1710 1713
1711 xen_hvm_init_shared_info(); 1714 xen_hvm_init_shared_info();
1712 1715
1716 xen_panic_handler_init();
1717
1713 if (xen_feature(XENFEAT_hvm_callback_vector)) 1718 if (xen_feature(XENFEAT_hvm_callback_vector))
1714 xen_have_vector_callback = 1; 1719 xen_have_vector_callback = 1;
1715 xen_hvm_smp_init(); 1720 xen_hvm_smp_init();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 01a4dc015ae1..0da7f863056f 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -47,23 +47,18 @@ static void xen_restore_fl(unsigned long flags)
47 /* convert from IF type flag */ 47 /* convert from IF type flag */
48 flags = !(flags & X86_EFLAGS_IF); 48 flags = !(flags & X86_EFLAGS_IF);
49 49
50 /* There's a one instruction preempt window here. We need to 50 /* See xen_irq_enable() for why preemption must be disabled. */
51 make sure we're don't switch CPUs between getting the vcpu
52 pointer and updating the mask. */
53 preempt_disable(); 51 preempt_disable();
54 vcpu = this_cpu_read(xen_vcpu); 52 vcpu = this_cpu_read(xen_vcpu);
55 vcpu->evtchn_upcall_mask = flags; 53 vcpu->evtchn_upcall_mask = flags;
56 preempt_enable_no_resched();
57
58 /* Doesn't matter if we get preempted here, because any
59 pending event will get dealt with anyway. */
60 54
61 if (flags == 0) { 55 if (flags == 0) {
62 preempt_check_resched();
63 barrier(); /* unmask then check (avoid races) */ 56 barrier(); /* unmask then check (avoid races) */
64 if (unlikely(vcpu->evtchn_upcall_pending)) 57 if (unlikely(vcpu->evtchn_upcall_pending))
65 xen_force_evtchn_callback(); 58 xen_force_evtchn_callback();
66 } 59 preempt_enable();
60 } else
61 preempt_enable_no_resched();
67} 62}
68PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); 63PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
69 64
@@ -82,10 +77,12 @@ static void xen_irq_enable(void)
82{ 77{
83 struct vcpu_info *vcpu; 78 struct vcpu_info *vcpu;
84 79
85 /* We don't need to worry about being preempted here, since 80 /*
86 either a) interrupts are disabled, so no preemption, or b) 81 * We may be preempted as soon as vcpu->evtchn_upcall_mask is
87 the caller is confused and is trying to re-enable interrupts 82 * cleared, so disable preemption to ensure we check for
88 on an indeterminate processor. */ 83 * events on the VCPU we are still running on.
84 */
85 preempt_disable();
89 86
90 vcpu = this_cpu_read(xen_vcpu); 87 vcpu = this_cpu_read(xen_vcpu);
91 vcpu->evtchn_upcall_mask = 0; 88 vcpu->evtchn_upcall_mask = 0;
@@ -96,6 +93,8 @@ static void xen_irq_enable(void)
96 barrier(); /* unmask then check (avoid races) */ 93 barrier(); /* unmask then check (avoid races) */
97 if (unlikely(vcpu->evtchn_upcall_pending)) 94 if (unlikely(vcpu->evtchn_upcall_pending))
98 xen_force_evtchn_callback(); 95 xen_force_evtchn_callback();
96
97 preempt_enable();
99} 98}
100PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); 99PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
101 100
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95fb2aa5927e..0d4ec35895d4 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,6 +161,7 @@
161#include <asm/xen/page.h> 161#include <asm/xen/page.h>
162#include <asm/xen/hypercall.h> 162#include <asm/xen/hypercall.h>
163#include <asm/xen/hypervisor.h> 163#include <asm/xen/hypervisor.h>
164#include <xen/balloon.h>
164#include <xen/grant_table.h> 165#include <xen/grant_table.h>
165 166
166#include "multicalls.h" 167#include "multicalls.h"
@@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page,
967 if (kmap_op != NULL) { 968 if (kmap_op != NULL) {
968 if (!PageHighMem(page)) { 969 if (!PageHighMem(page)) {
969 struct multicall_space mcs; 970 struct multicall_space mcs;
970 struct gnttab_unmap_grant_ref *unmap_op; 971 struct gnttab_unmap_and_replace *unmap_op;
972 struct page *scratch_page = get_balloon_scratch_page();
973 unsigned long scratch_page_address = (unsigned long)
974 __va(page_to_pfn(scratch_page) << PAGE_SHIFT);
971 975
972 /* 976 /*
973 * It might be that we queued all the m2p grant table 977 * It might be that we queued all the m2p grant table
@@ -990,21 +994,25 @@ int m2p_remove_override(struct page *page,
990 } 994 }
991 995
992 mcs = xen_mc_entry( 996 mcs = xen_mc_entry(
993 sizeof(struct gnttab_unmap_grant_ref)); 997 sizeof(struct gnttab_unmap_and_replace));
994 unmap_op = mcs.args; 998 unmap_op = mcs.args;
995 unmap_op->host_addr = kmap_op->host_addr; 999 unmap_op->host_addr = kmap_op->host_addr;
1000 unmap_op->new_addr = scratch_page_address;
996 unmap_op->handle = kmap_op->handle; 1001 unmap_op->handle = kmap_op->handle;
997 unmap_op->dev_bus_addr = 0;
998 1002
999 MULTI_grant_table_op(mcs.mc, 1003 MULTI_grant_table_op(mcs.mc,
1000 GNTTABOP_unmap_grant_ref, unmap_op, 1); 1004 GNTTABOP_unmap_and_replace, unmap_op, 1);
1001 1005
1002 xen_mc_issue(PARAVIRT_LAZY_MMU); 1006 xen_mc_issue(PARAVIRT_LAZY_MMU);
1003 1007
1004 set_pte_at(&init_mm, address, ptep, 1008 mcs = __xen_mc_entry(0);
1005 pfn_pte(pfn, PAGE_KERNEL)); 1009 MULTI_update_va_mapping(mcs.mc, scratch_page_address,
1006 __flush_tlb_single(address); 1010 pfn_pte(page_to_pfn(get_balloon_scratch_page()),
1011 PAGE_KERNEL_RO), 0);
1012 xen_mc_issue(PARAVIRT_LAZY_MMU);
1013
1007 kmap_op->host_addr = 0; 1014 kmap_op->host_addr = 0;
1015 put_balloon_scratch_page();
1008 } 1016 }
1009 } 1017 }
1010 1018
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8f3eea6b80c5..09f3059cb00b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -33,6 +33,9 @@
33/* These are code, but not functions. Defined in entry.S */ 33/* These are code, but not functions. Defined in entry.S */
34extern const char xen_hypervisor_callback[]; 34extern const char xen_hypervisor_callback[];
35extern const char xen_failsafe_callback[]; 35extern const char xen_failsafe_callback[];
36#ifdef CONFIG_X86_64
37extern const char nmi[];
38#endif
36extern void xen_sysenter_target(void); 39extern void xen_sysenter_target(void);
37extern void xen_syscall_target(void); 40extern void xen_syscall_target(void);
38extern void xen_syscall32_target(void); 41extern void xen_syscall32_target(void);
@@ -215,13 +218,19 @@ static void __init xen_set_identity_and_release_chunk(
215 unsigned long pfn; 218 unsigned long pfn;
216 219
217 /* 220 /*
218 * If the PFNs are currently mapped, the VA mapping also needs 221 * If the PFNs are currently mapped, clear the mappings
219 * to be updated to be 1:1. 222 * (except for the ISA region which must be 1:1 mapped) to
223 * release the refcounts (in Xen) on the original frames.
220 */ 224 */
221 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) 225 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
226 pte_t pte = __pte_ma(0);
227
228 if (pfn < PFN_UP(ISA_END_ADDRESS))
229 pte = mfn_pte(pfn, PAGE_KERNEL_IO);
230
222 (void)HYPERVISOR_update_va_mapping( 231 (void)HYPERVISOR_update_va_mapping(
223 (unsigned long)__va(pfn << PAGE_SHIFT), 232 (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
224 mfn_pte(pfn, PAGE_KERNEL_IO), 0); 233 }
225 234
226 if (start_pfn < nr_pages) 235 if (start_pfn < nr_pages)
227 *released += xen_release_chunk( 236 *released += xen_release_chunk(
@@ -547,7 +556,13 @@ void xen_enable_syscall(void)
547 } 556 }
548#endif /* CONFIG_X86_64 */ 557#endif /* CONFIG_X86_64 */
549} 558}
550 559void __cpuinit xen_enable_nmi(void)
560{
561#ifdef CONFIG_X86_64
562 if (register_callback(CALLBACKTYPE_nmi, nmi))
563 BUG();
564#endif
565}
551void __init xen_arch_setup(void) 566void __init xen_arch_setup(void)
552{ 567{
553 xen_panic_handler_init(); 568 xen_panic_handler_init();
@@ -565,7 +580,7 @@ void __init xen_arch_setup(void)
565 580
566 xen_enable_sysenter(); 581 xen_enable_sysenter();
567 xen_enable_syscall(); 582 xen_enable_syscall();
568 583 xen_enable_nmi();
569#ifdef CONFIG_ACPI 584#ifdef CONFIG_ACPI
570 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 585 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
571 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 586 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 597655bd72b0..9235842cd76a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -573,6 +573,12 @@ static inline int xen_map_vector(int vector)
573 case IRQ_WORK_VECTOR: 573 case IRQ_WORK_VECTOR:
574 xen_vector = XEN_IRQ_WORK_VECTOR; 574 xen_vector = XEN_IRQ_WORK_VECTOR;
575 break; 575 break;
576#ifdef CONFIG_X86_64
577 case NMI_VECTOR:
578 case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */
579 xen_vector = XEN_NMI_VECTOR;
580 break;
581#endif
576 default: 582 default:
577 xen_vector = -1; 583 xen_vector = -1;
578 printk(KERN_ERR "xen: vector 0x%x is not implemented\n", 584 printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index dbfd56446c31..94c0c74434ea 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -91,4 +91,16 @@ config TCG_ST33_I2C
91 To compile this driver as a module, choose M here; the module will be 91 To compile this driver as a module, choose M here; the module will be
92 called tpm_stm_st33_i2c. 92 called tpm_stm_st33_i2c.
93 93
94config TCG_XEN
95 tristate "XEN TPM Interface"
96 depends on TCG_TPM && XEN
97 select XEN_XENBUS_FRONTEND
98 ---help---
99 If you want to make TPM support available to a Xen user domain,
100 say Yes and it will be accessible from within Linux. See
101 the manpages for xl, xl.conf, and docs/misc/vtpm.txt in
102 the Xen source repository for more details.
103 To compile this driver as a module, choose M here; the module
104 will be called xen-tpmfront.
105
94endif # TCG_TPM 106endif # TCG_TPM
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index a3736c97c65a..eb41ff97d0ad 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -18,3 +18,4 @@ obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
18obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o 18obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
19obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o 19obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o
20obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o 20obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o
21obj-$(CONFIG_TCG_XEN) += xen-tpmfront.o
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
new file mode 100644
index 000000000000..7a7929ba2658
--- /dev/null
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -0,0 +1,473 @@
1/*
2 * Implementation of the Xen vTPM device frontend
3 *
4 * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2,
8 * as published by the Free Software Foundation.
9 */
10#include <linux/errno.h>
11#include <linux/err.h>
12#include <linux/interrupt.h>
13#include <xen/events.h>
14#include <xen/interface/io/tpmif.h>
15#include <xen/grant_table.h>
16#include <xen/xenbus.h>
17#include <xen/page.h>
18#include "tpm.h"
19
20struct tpm_private {
21 struct tpm_chip *chip;
22 struct xenbus_device *dev;
23
24 struct vtpm_shared_page *shr;
25
26 unsigned int evtchn;
27 int ring_ref;
28 domid_t backend_id;
29};
30
31enum status_bits {
32 VTPM_STATUS_RUNNING = 0x1,
33 VTPM_STATUS_IDLE = 0x2,
34 VTPM_STATUS_RESULT = 0x4,
35 VTPM_STATUS_CANCELED = 0x8,
36};
37
38static u8 vtpm_status(struct tpm_chip *chip)
39{
40 struct tpm_private *priv = TPM_VPRIV(chip);
41 switch (priv->shr->state) {
42 case VTPM_STATE_IDLE:
43 return VTPM_STATUS_IDLE | VTPM_STATUS_CANCELED;
44 case VTPM_STATE_FINISH:
45 return VTPM_STATUS_IDLE | VTPM_STATUS_RESULT;
46 case VTPM_STATE_SUBMIT:
47 case VTPM_STATE_CANCEL: /* cancel requested, not yet canceled */
48 return VTPM_STATUS_RUNNING;
49 default:
50 return 0;
51 }
52}
53
54static bool vtpm_req_canceled(struct tpm_chip *chip, u8 status)
55{
56 return status & VTPM_STATUS_CANCELED;
57}
58
59static void vtpm_cancel(struct tpm_chip *chip)
60{
61 struct tpm_private *priv = TPM_VPRIV(chip);
62 priv->shr->state = VTPM_STATE_CANCEL;
63 wmb();
64 notify_remote_via_evtchn(priv->evtchn);
65}
66
67static unsigned int shr_data_offset(struct vtpm_shared_page *shr)
68{
69 return sizeof(*shr) + sizeof(u32) * shr->nr_extra_pages;
70}
71
72static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
73{
74 struct tpm_private *priv = TPM_VPRIV(chip);
75 struct vtpm_shared_page *shr = priv->shr;
76 unsigned int offset = shr_data_offset(shr);
77
78 u32 ordinal;
79 unsigned long duration;
80
81 if (offset > PAGE_SIZE)
82 return -EINVAL;
83
84 if (offset + count > PAGE_SIZE)
85 return -EINVAL;
86
87 /* Wait for completion of any existing command or cancellation */
88 if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, chip->vendor.timeout_c,
89 &chip->vendor.read_queue, true) < 0) {
90 vtpm_cancel(chip);
91 return -ETIME;
92 }
93
94 memcpy(offset + (u8 *)shr, buf, count);
95 shr->length = count;
96 barrier();
97 shr->state = VTPM_STATE_SUBMIT;
98 wmb();
99 notify_remote_via_evtchn(priv->evtchn);
100
101 ordinal = be32_to_cpu(((struct tpm_input_header*)buf)->ordinal);
102 duration = tpm_calc_ordinal_duration(chip, ordinal);
103
104 if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, duration,
105 &chip->vendor.read_queue, true) < 0) {
106 /* got a signal or timeout, try to cancel */
107 vtpm_cancel(chip);
108 return -ETIME;
109 }
110
111 return count;
112}
113
114static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
115{
116 struct tpm_private *priv = TPM_VPRIV(chip);
117 struct vtpm_shared_page *shr = priv->shr;
118 unsigned int offset = shr_data_offset(shr);
119 size_t length = shr->length;
120
121 if (shr->state == VTPM_STATE_IDLE)
122 return -ECANCELED;
123
124 /* In theory the wait at the end of _send makes this one unnecessary */
125 if (wait_for_tpm_stat(chip, VTPM_STATUS_RESULT, chip->vendor.timeout_c,
126 &chip->vendor.read_queue, true) < 0) {
127 vtpm_cancel(chip);
128 return -ETIME;
129 }
130
131 if (offset > PAGE_SIZE)
132 return -EIO;
133
134 if (offset + length > PAGE_SIZE)
135 length = PAGE_SIZE - offset;
136
137 if (length > count)
138 length = count;
139
140 memcpy(buf, offset + (u8 *)shr, length);
141
142 return length;
143}
144
145ssize_t tpm_show_locality(struct device *dev, struct device_attribute *attr,
146 char *buf)
147{
148 struct tpm_chip *chip = dev_get_drvdata(dev);
149 struct tpm_private *priv = TPM_VPRIV(chip);
150 u8 locality = priv->shr->locality;
151
152 return sprintf(buf, "%d\n", locality);
153}
154
155ssize_t tpm_store_locality(struct device *dev, struct device_attribute *attr,
156 const char *buf, size_t len)
157{
158 struct tpm_chip *chip = dev_get_drvdata(dev);
159 struct tpm_private *priv = TPM_VPRIV(chip);
160 u8 val;
161
162 int rv = kstrtou8(buf, 0, &val);
163 if (rv)
164 return rv;
165
166 priv->shr->locality = val;
167
168 return len;
169}
170
171static const struct file_operations vtpm_ops = {
172 .owner = THIS_MODULE,
173 .llseek = no_llseek,
174 .open = tpm_open,
175 .read = tpm_read,
176 .write = tpm_write,
177 .release = tpm_release,
178};
179
180static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
181static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
182static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
183static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
184static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
185static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
186 NULL);
187static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
188static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
189static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
190static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
191static DEVICE_ATTR(locality, S_IRUGO | S_IWUSR, tpm_show_locality,
192 tpm_store_locality);
193
194static struct attribute *vtpm_attrs[] = {
195 &dev_attr_pubek.attr,
196 &dev_attr_pcrs.attr,
197 &dev_attr_enabled.attr,
198 &dev_attr_active.attr,
199 &dev_attr_owned.attr,
200 &dev_attr_temp_deactivated.attr,
201 &dev_attr_caps.attr,
202 &dev_attr_cancel.attr,
203 &dev_attr_durations.attr,
204 &dev_attr_timeouts.attr,
205 &dev_attr_locality.attr,
206 NULL,
207};
208
209static struct attribute_group vtpm_attr_grp = {
210 .attrs = vtpm_attrs,
211};
212
213#define TPM_LONG_TIMEOUT (10 * 60 * HZ)
214
215static const struct tpm_vendor_specific tpm_vtpm = {
216 .status = vtpm_status,
217 .recv = vtpm_recv,
218 .send = vtpm_send,
219 .cancel = vtpm_cancel,
220 .req_complete_mask = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT,
221 .req_complete_val = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT,
222 .req_canceled = vtpm_req_canceled,
223 .attr_group = &vtpm_attr_grp,
224 .miscdev = {
225 .fops = &vtpm_ops,
226 },
227 .duration = {
228 TPM_LONG_TIMEOUT,
229 TPM_LONG_TIMEOUT,
230 TPM_LONG_TIMEOUT,
231 },
232};
233
234static irqreturn_t tpmif_interrupt(int dummy, void *dev_id)
235{
236 struct tpm_private *priv = dev_id;
237
238 switch (priv->shr->state) {
239 case VTPM_STATE_IDLE:
240 case VTPM_STATE_FINISH:
241 wake_up_interruptible(&priv->chip->vendor.read_queue);
242 break;
243 case VTPM_STATE_SUBMIT:
244 case VTPM_STATE_CANCEL:
245 default:
246 break;
247 }
248 return IRQ_HANDLED;
249}
250
251static int setup_chip(struct device *dev, struct tpm_private *priv)
252{
253 struct tpm_chip *chip;
254
255 chip = tpm_register_hardware(dev, &tpm_vtpm);
256 if (!chip)
257 return -ENODEV;
258
259 init_waitqueue_head(&chip->vendor.read_queue);
260
261 priv->chip = chip;
262 TPM_VPRIV(chip) = priv;
263
264 return 0;
265}
266
267/* caller must clean up in case of errors */
268static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv)
269{
270 struct xenbus_transaction xbt;
271 const char *message = NULL;
272 int rv;
273
274 priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
275 if (!priv->shr) {
276 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
277 return -ENOMEM;
278 }
279
280 rv = xenbus_grant_ring(dev, virt_to_mfn(priv->shr));
281 if (rv < 0)
282 return rv;
283
284 priv->ring_ref = rv;
285
286 rv = xenbus_alloc_evtchn(dev, &priv->evtchn);
287 if (rv)
288 return rv;
289
290 rv = bind_evtchn_to_irqhandler(priv->evtchn, tpmif_interrupt, 0,
291 "tpmif", priv);
292 if (rv <= 0) {
293 xenbus_dev_fatal(dev, rv, "allocating TPM irq");
294 return rv;
295 }
296 priv->chip->vendor.irq = rv;
297
298 again:
299 rv = xenbus_transaction_start(&xbt);
300 if (rv) {
301 xenbus_dev_fatal(dev, rv, "starting transaction");
302 return rv;
303 }
304
305 rv = xenbus_printf(xbt, dev->nodename,
306 "ring-ref", "%u", priv->ring_ref);
307 if (rv) {
308 message = "writing ring-ref";
309 goto abort_transaction;
310 }
311
312 rv = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
313 priv->evtchn);
314 if (rv) {
315 message = "writing event-channel";
316 goto abort_transaction;
317 }
318
319 rv = xenbus_printf(xbt, dev->nodename, "feature-protocol-v2", "1");
320 if (rv) {
321 message = "writing feature-protocol-v2";
322 goto abort_transaction;
323 }
324
325 rv = xenbus_transaction_end(xbt, 0);
326 if (rv == -EAGAIN)
327 goto again;
328 if (rv) {
329 xenbus_dev_fatal(dev, rv, "completing transaction");
330 return rv;
331 }
332
333 xenbus_switch_state(dev, XenbusStateInitialised);
334
335 return 0;
336
337 abort_transaction:
338 xenbus_transaction_end(xbt, 1);
339 if (message)
340 xenbus_dev_error(dev, rv, "%s", message);
341
342 return rv;
343}
344
345static void ring_free(struct tpm_private *priv)
346{
347 if (!priv)
348 return;
349
350 if (priv->ring_ref)
351 gnttab_end_foreign_access(priv->ring_ref, 0,
352 (unsigned long)priv->shr);
353 else
354 free_page((unsigned long)priv->shr);
355
356 if (priv->chip && priv->chip->vendor.irq)
357 unbind_from_irqhandler(priv->chip->vendor.irq, priv);
358
359 kfree(priv);
360}
361
362static int tpmfront_probe(struct xenbus_device *dev,
363 const struct xenbus_device_id *id)
364{
365 struct tpm_private *priv;
366 int rv;
367
368 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
369 if (!priv) {
370 xenbus_dev_fatal(dev, -ENOMEM, "allocating priv structure");
371 return -ENOMEM;
372 }
373
374 rv = setup_chip(&dev->dev, priv);
375 if (rv) {
376 kfree(priv);
377 return rv;
378 }
379
380 rv = setup_ring(dev, priv);
381 if (rv) {
382 tpm_remove_hardware(&dev->dev);
383 ring_free(priv);
384 return rv;
385 }
386
387 tpm_get_timeouts(priv->chip);
388
389 dev_set_drvdata(&dev->dev, priv->chip);
390
391 return rv;
392}
393
394static int tpmfront_remove(struct xenbus_device *dev)
395{
396 struct tpm_chip *chip = dev_get_drvdata(&dev->dev);
397 struct tpm_private *priv = TPM_VPRIV(chip);
398 tpm_remove_hardware(&dev->dev);
399 ring_free(priv);
400 TPM_VPRIV(chip) = NULL;
401 return 0;
402}
403
404static int tpmfront_resume(struct xenbus_device *dev)
405{
406 /* A suspend/resume/migrate will interrupt a vTPM anyway */
407 tpmfront_remove(dev);
408 return tpmfront_probe(dev, NULL);
409}
410
411static void backend_changed(struct xenbus_device *dev,
412 enum xenbus_state backend_state)
413{
414 int val;
415
416 switch (backend_state) {
417 case XenbusStateInitialised:
418 case XenbusStateConnected:
419 if (dev->state == XenbusStateConnected)
420 break;
421
422 if (xenbus_scanf(XBT_NIL, dev->otherend,
423 "feature-protocol-v2", "%d", &val) < 0)
424 val = 0;
425 if (!val) {
426 xenbus_dev_fatal(dev, -EINVAL,
427 "vTPM protocol 2 required");
428 return;
429 }
430 xenbus_switch_state(dev, XenbusStateConnected);
431 break;
432
433 case XenbusStateClosing:
434 case XenbusStateClosed:
435 device_unregister(&dev->dev);
436 xenbus_frontend_closed(dev);
437 break;
438 default:
439 break;
440 }
441}
442
443static const struct xenbus_device_id tpmfront_ids[] = {
444 { "vtpm" },
445 { "" }
446};
447MODULE_ALIAS("xen:vtpm");
448
449static DEFINE_XENBUS_DRIVER(tpmfront, ,
450 .probe = tpmfront_probe,
451 .remove = tpmfront_remove,
452 .resume = tpmfront_resume,
453 .otherend_changed = backend_changed,
454 );
455
456static int __init xen_tpmfront_init(void)
457{
458 if (!xen_domain())
459 return -ENODEV;
460
461 return xenbus_register_frontend(&tpmfront_driver);
462}
463module_init(xen_tpmfront_init);
464
465static void __exit xen_tpmfront_exit(void)
466{
467 xenbus_unregister_driver(&tpmfront_driver);
468}
469module_exit(xen_tpmfront_exit);
470
471MODULE_AUTHOR("Daniel De Graaf <dgdegra@tycho.nsa.gov>");
472MODULE_DESCRIPTION("Xen vTPM Driver");
473MODULE_LICENSE("GPL");
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 2a2ef97697b2..3101cf6daf56 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -38,6 +38,7 @@
38 38
39#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 39#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
40 40
41#include <linux/cpu.h>
41#include <linux/kernel.h> 42#include <linux/kernel.h>
42#include <linux/sched.h> 43#include <linux/sched.h>
43#include <linux/errno.h> 44#include <linux/errno.h>
@@ -52,6 +53,7 @@
52#include <linux/notifier.h> 53#include <linux/notifier.h>
53#include <linux/memory.h> 54#include <linux/memory.h>
54#include <linux/memory_hotplug.h> 55#include <linux/memory_hotplug.h>
56#include <linux/percpu-defs.h>
55 57
56#include <asm/page.h> 58#include <asm/page.h>
57#include <asm/pgalloc.h> 59#include <asm/pgalloc.h>
@@ -90,6 +92,8 @@ EXPORT_SYMBOL_GPL(balloon_stats);
90 92
91/* We increase/decrease in batches which fit in a page */ 93/* We increase/decrease in batches which fit in a page */
92static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; 94static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
95static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
96
93 97
94/* List of ballooned pages, threaded through the mem_map array. */ 98/* List of ballooned pages, threaded through the mem_map array. */
95static LIST_HEAD(ballooned_pages); 99static LIST_HEAD(ballooned_pages);
@@ -412,7 +416,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
412 if (xen_pv_domain() && !PageHighMem(page)) { 416 if (xen_pv_domain() && !PageHighMem(page)) {
413 ret = HYPERVISOR_update_va_mapping( 417 ret = HYPERVISOR_update_va_mapping(
414 (unsigned long)__va(pfn << PAGE_SHIFT), 418 (unsigned long)__va(pfn << PAGE_SHIFT),
415 __pte_ma(0), 0); 419 pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)),
420 PAGE_KERNEL_RO), 0);
416 BUG_ON(ret); 421 BUG_ON(ret);
417 } 422 }
418#endif 423#endif
@@ -425,7 +430,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
425 /* No more mappings: invalidate P2M and add to balloon. */ 430 /* No more mappings: invalidate P2M and add to balloon. */
426 for (i = 0; i < nr_pages; i++) { 431 for (i = 0; i < nr_pages; i++) {
427 pfn = mfn_to_pfn(frame_list[i]); 432 pfn = mfn_to_pfn(frame_list[i]);
428 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 433 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
434 unsigned long p;
435 struct page *pg;
436 pg = __get_cpu_var(balloon_scratch_page);
437 p = page_to_pfn(pg);
438 __set_phys_to_machine(pfn, pfn_to_mfn(p));
439 }
429 balloon_append(pfn_to_page(pfn)); 440 balloon_append(pfn_to_page(pfn));
430 } 441 }
431 442
@@ -480,6 +491,18 @@ static void balloon_process(struct work_struct *work)
480 mutex_unlock(&balloon_mutex); 491 mutex_unlock(&balloon_mutex);
481} 492}
482 493
494struct page *get_balloon_scratch_page(void)
495{
496 struct page *ret = get_cpu_var(balloon_scratch_page);
497 BUG_ON(ret == NULL);
498 return ret;
499}
500
501void put_balloon_scratch_page(void)
502{
503 put_cpu_var(balloon_scratch_page);
504}
505
483/* Resets the Xen limit, sets new target, and kicks off processing. */ 506/* Resets the Xen limit, sets new target, and kicks off processing. */
484void balloon_set_new_target(unsigned long target) 507void balloon_set_new_target(unsigned long target)
485{ 508{
@@ -573,13 +596,47 @@ static void __init balloon_add_region(unsigned long start_pfn,
573 } 596 }
574} 597}
575 598
599static int __cpuinit balloon_cpu_notify(struct notifier_block *self,
600 unsigned long action, void *hcpu)
601{
602 int cpu = (long)hcpu;
603 switch (action) {
604 case CPU_UP_PREPARE:
605 if (per_cpu(balloon_scratch_page, cpu) != NULL)
606 break;
607 per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
608 if (per_cpu(balloon_scratch_page, cpu) == NULL) {
609 pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
610 return NOTIFY_BAD;
611 }
612 break;
613 default:
614 break;
615 }
616 return NOTIFY_OK;
617}
618
619static struct notifier_block balloon_cpu_notifier __cpuinitdata = {
620 .notifier_call = balloon_cpu_notify,
621};
622
576static int __init balloon_init(void) 623static int __init balloon_init(void)
577{ 624{
578 int i; 625 int i, cpu;
579 626
580 if (!xen_domain()) 627 if (!xen_domain())
581 return -ENODEV; 628 return -ENODEV;
582 629
630 for_each_online_cpu(cpu)
631 {
632 per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
633 if (per_cpu(balloon_scratch_page, cpu) == NULL) {
634 pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
635 return -ENOMEM;
636 }
637 }
638 register_cpu_notifier(&balloon_cpu_notifier);
639
583 pr_info("Initialising balloon driver\n"); 640 pr_info("Initialising balloon driver\n");
584 641
585 balloon_stats.current_pages = xen_pv_domain() 642 balloon_stats.current_pages = xen_pv_domain()
@@ -616,4 +673,15 @@ static int __init balloon_init(void)
616 673
617subsys_initcall(balloon_init); 674subsys_initcall(balloon_init);
618 675
676static int __init balloon_clear(void)
677{
678 int cpu;
679
680 for_each_possible_cpu(cpu)
681 per_cpu(balloon_scratch_page, cpu) = NULL;
682
683 return 0;
684}
685early_initcall(balloon_clear);
686
619MODULE_LICENSE("GPL"); 687MODULE_LICENSE("GPL");
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 5e8be462aed5..4035e833ea26 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -56,6 +56,7 @@
56#include <xen/interface/hvm/params.h> 56#include <xen/interface/hvm/params.h>
57#include <xen/interface/physdev.h> 57#include <xen/interface/physdev.h>
58#include <xen/interface/sched.h> 58#include <xen/interface/sched.h>
59#include <xen/interface/vcpu.h>
59#include <asm/hw_irq.h> 60#include <asm/hw_irq.h>
60 61
61/* 62/*
@@ -1212,7 +1213,17 @@ EXPORT_SYMBOL_GPL(evtchn_put);
1212 1213
1213void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) 1214void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1214{ 1215{
1215 int irq = per_cpu(ipi_to_irq, cpu)[vector]; 1216 int irq;
1217
1218#ifdef CONFIG_X86
1219 if (unlikely(vector == XEN_NMI_VECTOR)) {
1220 int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
1221 if (rc < 0)
1222 printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1223 return;
1224 }
1225#endif
1226 irq = per_cpu(ipi_to_irq, cpu)[vector];
1216 BUG_ON(irq < 0); 1227 BUG_ON(irq < 0);
1217 notify_remote_via_irq(irq); 1228 notify_remote_via_irq(irq);
1218} 1229}
@@ -1379,14 +1390,21 @@ static void __xen_evtchn_do_upcall(void)
1379 1390
1380 pending_bits = active_evtchns(cpu, s, word_idx); 1391 pending_bits = active_evtchns(cpu, s, word_idx);
1381 bit_idx = 0; /* usually scan entire word from start */ 1392 bit_idx = 0; /* usually scan entire word from start */
1393 /*
1394 * We scan the starting word in two parts.
1395 *
1396 * 1st time: start in the middle, scanning the
1397 * upper bits.
1398 *
1399 * 2nd time: scan the whole word (not just the
1400 * parts skipped in the first pass) -- if an
1401 * event in the previously scanned bits is
1402 * pending again it would just be scanned on
1403 * the next loop anyway.
1404 */
1382 if (word_idx == start_word_idx) { 1405 if (word_idx == start_word_idx) {
1383 /* We scan the starting word in two parts */
1384 if (i == 0) 1406 if (i == 0)
1385 /* 1st time: start in the middle */
1386 bit_idx = start_bit_idx; 1407 bit_idx = start_bit_idx;
1387 else
1388 /* 2nd time: mask bits done already */
1389 bit_idx &= (1UL << start_bit_idx) - 1;
1390 } 1408 }
1391 1409
1392 do { 1410 do {
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index b6165e047f48..8b3a69a06c39 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -57,6 +57,7 @@
57 57
58struct per_user_data { 58struct per_user_data {
59 struct mutex bind_mutex; /* serialize bind/unbind operations */ 59 struct mutex bind_mutex; /* serialize bind/unbind operations */
60 struct rb_root evtchns;
60 61
61 /* Notification ring, accessed via /dev/xen/evtchn. */ 62 /* Notification ring, accessed via /dev/xen/evtchn. */
62#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) 63#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
@@ -64,6 +65,7 @@ struct per_user_data {
64 evtchn_port_t *ring; 65 evtchn_port_t *ring;
65 unsigned int ring_cons, ring_prod, ring_overflow; 66 unsigned int ring_cons, ring_prod, ring_overflow;
66 struct mutex ring_cons_mutex; /* protect against concurrent readers */ 67 struct mutex ring_cons_mutex; /* protect against concurrent readers */
68 spinlock_t ring_prod_lock; /* product against concurrent interrupts */
67 69
68 /* Processes wait on this queue when ring is empty. */ 70 /* Processes wait on this queue when ring is empty. */
69 wait_queue_head_t evtchn_wait; 71 wait_queue_head_t evtchn_wait;
@@ -71,54 +73,79 @@ struct per_user_data {
71 const char *name; 73 const char *name;
72}; 74};
73 75
74/* 76struct user_evtchn {
75 * Who's bound to each port? This is logically an array of struct 77 struct rb_node node;
76 * per_user_data *, but we encode the current enabled-state in bit 0. 78 struct per_user_data *user;
77 */ 79 unsigned port;
78static unsigned long *port_user; 80 bool enabled;
79static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ 81};
80 82
81static inline struct per_user_data *get_port_user(unsigned port) 83static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
82{ 84{
83 return (struct per_user_data *)(port_user[port] & ~1); 85 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
84}
85 86
86static inline void set_port_user(unsigned port, struct per_user_data *u) 87 while (*new) {
87{ 88 struct user_evtchn *this;
88 port_user[port] = (unsigned long)u; 89
90 this = container_of(*new, struct user_evtchn, node);
91
92 parent = *new;
93 if (this->port < evtchn->port)
94 new = &((*new)->rb_left);
95 else if (this->port > evtchn->port)
96 new = &((*new)->rb_right);
97 else
98 return -EEXIST;
99 }
100
101 /* Add new node and rebalance tree. */
102 rb_link_node(&evtchn->node, parent, new);
103 rb_insert_color(&evtchn->node, &u->evtchns);
104
105 return 0;
89} 106}
90 107
91static inline bool get_port_enabled(unsigned port) 108static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
92{ 109{
93 return port_user[port] & 1; 110 rb_erase(&evtchn->node, &u->evtchns);
111 kfree(evtchn);
94} 112}
95 113
96static inline void set_port_enabled(unsigned port, bool enabled) 114static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
97{ 115{
98 if (enabled) 116 struct rb_node *node = u->evtchns.rb_node;
99 port_user[port] |= 1; 117
100 else 118 while (node) {
101 port_user[port] &= ~1; 119 struct user_evtchn *evtchn;
120
121 evtchn = container_of(node, struct user_evtchn, node);
122
123 if (evtchn->port < port)
124 node = node->rb_left;
125 else if (evtchn->port > port)
126 node = node->rb_right;
127 else
128 return evtchn;
129 }
130 return NULL;
102} 131}
103 132
104static irqreturn_t evtchn_interrupt(int irq, void *data) 133static irqreturn_t evtchn_interrupt(int irq, void *data)
105{ 134{
106 unsigned int port = (unsigned long)data; 135 struct user_evtchn *evtchn = data;
107 struct per_user_data *u; 136 struct per_user_data *u = evtchn->user;
108
109 spin_lock(&port_user_lock);
110
111 u = get_port_user(port);
112 137
113 WARN(!get_port_enabled(port), 138 WARN(!evtchn->enabled,
114 "Interrupt for port %d, but apparently not enabled; per-user %p\n", 139 "Interrupt for port %d, but apparently not enabled; per-user %p\n",
115 port, u); 140 evtchn->port, u);
116 141
117 disable_irq_nosync(irq); 142 disable_irq_nosync(irq);
118 set_port_enabled(port, false); 143 evtchn->enabled = false;
144
145 spin_lock(&u->ring_prod_lock);
119 146
120 if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { 147 if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
121 u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; 148 u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
122 wmb(); /* Ensure ring contents visible */ 149 wmb(); /* Ensure ring contents visible */
123 if (u->ring_cons == u->ring_prod++) { 150 if (u->ring_cons == u->ring_prod++) {
124 wake_up_interruptible(&u->evtchn_wait); 151 wake_up_interruptible(&u->evtchn_wait);
@@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
128 } else 155 } else
129 u->ring_overflow = 1; 156 u->ring_overflow = 1;
130 157
131 spin_unlock(&port_user_lock); 158 spin_unlock(&u->ring_prod_lock);
132 159
133 return IRQ_HANDLED; 160 return IRQ_HANDLED;
134} 161}
@@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
229 if (copy_from_user(kbuf, buf, count) != 0) 256 if (copy_from_user(kbuf, buf, count) != 0)
230 goto out; 257 goto out;
231 258
232 spin_lock_irq(&port_user_lock); 259 mutex_lock(&u->bind_mutex);
233 260
234 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { 261 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
235 unsigned port = kbuf[i]; 262 unsigned port = kbuf[i];
263 struct user_evtchn *evtchn;
236 264
237 if (port < NR_EVENT_CHANNELS && 265 evtchn = find_evtchn(u, port);
238 get_port_user(port) == u && 266 if (evtchn && !evtchn->enabled) {
239 !get_port_enabled(port)) { 267 evtchn->enabled = true;
240 set_port_enabled(port, true);
241 enable_irq(irq_from_evtchn(port)); 268 enable_irq(irq_from_evtchn(port));
242 } 269 }
243 } 270 }
244 271
245 spin_unlock_irq(&port_user_lock); 272 mutex_unlock(&u->bind_mutex);
246 273
247 rc = count; 274 rc = count;
248 275
@@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
253 280
254static int evtchn_bind_to_user(struct per_user_data *u, int port) 281static int evtchn_bind_to_user(struct per_user_data *u, int port)
255{ 282{
283 struct user_evtchn *evtchn;
284 struct evtchn_close close;
256 int rc = 0; 285 int rc = 0;
257 286
258 /* 287 /*
@@ -263,35 +292,46 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
263 * interrupt handler yet, and our caller has already 292 * interrupt handler yet, and our caller has already
264 * serialized bind operations.) 293 * serialized bind operations.)
265 */ 294 */
266 BUG_ON(get_port_user(port) != NULL); 295
267 set_port_user(port, u); 296 evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
268 set_port_enabled(port, true); /* start enabled */ 297 if (!evtchn)
298 return -ENOMEM;
299
300 evtchn->user = u;
301 evtchn->port = port;
302 evtchn->enabled = true; /* start enabled */
303
304 rc = add_evtchn(u, evtchn);
305 if (rc < 0)
306 goto err;
269 307
270 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, 308 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
271 u->name, (void *)(unsigned long)port); 309 u->name, evtchn);
272 if (rc >= 0) 310 if (rc < 0)
273 rc = evtchn_make_refcounted(port); 311 goto err;
274 else {
275 /* bind failed, should close the port now */
276 struct evtchn_close close;
277 close.port = port;
278 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
279 BUG();
280 set_port_user(port, NULL);
281 }
282 312
313 rc = evtchn_make_refcounted(port);
314 return rc;
315
316err:
317 /* bind failed, should close the port now */
318 close.port = port;
319 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
320 BUG();
321 del_evtchn(u, evtchn);
283 return rc; 322 return rc;
284} 323}
285 324
286static void evtchn_unbind_from_user(struct per_user_data *u, int port) 325static void evtchn_unbind_from_user(struct per_user_data *u,
326 struct user_evtchn *evtchn)
287{ 327{
288 int irq = irq_from_evtchn(port); 328 int irq = irq_from_evtchn(evtchn->port);
289 329
290 BUG_ON(irq < 0); 330 BUG_ON(irq < 0);
291 331
292 unbind_from_irqhandler(irq, (void *)(unsigned long)port); 332 unbind_from_irqhandler(irq, evtchn);
293 333
294 set_port_user(port, NULL); 334 del_evtchn(u, evtchn);
295} 335}
296 336
297static long evtchn_ioctl(struct file *file, 337static long evtchn_ioctl(struct file *file,
@@ -370,6 +410,7 @@ static long evtchn_ioctl(struct file *file,
370 410
371 case IOCTL_EVTCHN_UNBIND: { 411 case IOCTL_EVTCHN_UNBIND: {
372 struct ioctl_evtchn_unbind unbind; 412 struct ioctl_evtchn_unbind unbind;
413 struct user_evtchn *evtchn;
373 414
374 rc = -EFAULT; 415 rc = -EFAULT;
375 if (copy_from_user(&unbind, uarg, sizeof(unbind))) 416 if (copy_from_user(&unbind, uarg, sizeof(unbind)))
@@ -380,29 +421,27 @@ static long evtchn_ioctl(struct file *file,
380 break; 421 break;
381 422
382 rc = -ENOTCONN; 423 rc = -ENOTCONN;
383 if (get_port_user(unbind.port) != u) 424 evtchn = find_evtchn(u, unbind.port);
425 if (!evtchn)
384 break; 426 break;
385 427
386 disable_irq(irq_from_evtchn(unbind.port)); 428 disable_irq(irq_from_evtchn(unbind.port));
387 429 evtchn_unbind_from_user(u, evtchn);
388 evtchn_unbind_from_user(u, unbind.port);
389
390 rc = 0; 430 rc = 0;
391 break; 431 break;
392 } 432 }
393 433
394 case IOCTL_EVTCHN_NOTIFY: { 434 case IOCTL_EVTCHN_NOTIFY: {
395 struct ioctl_evtchn_notify notify; 435 struct ioctl_evtchn_notify notify;
436 struct user_evtchn *evtchn;
396 437
397 rc = -EFAULT; 438 rc = -EFAULT;
398 if (copy_from_user(&notify, uarg, sizeof(notify))) 439 if (copy_from_user(&notify, uarg, sizeof(notify)))
399 break; 440 break;
400 441
401 if (notify.port >= NR_EVENT_CHANNELS) { 442 rc = -ENOTCONN;
402 rc = -EINVAL; 443 evtchn = find_evtchn(u, notify.port);
403 } else if (get_port_user(notify.port) != u) { 444 if (evtchn) {
404 rc = -ENOTCONN;
405 } else {
406 notify_remote_via_evtchn(notify.port); 445 notify_remote_via_evtchn(notify.port);
407 rc = 0; 446 rc = 0;
408 } 447 }
@@ -412,9 +451,9 @@ static long evtchn_ioctl(struct file *file,
412 case IOCTL_EVTCHN_RESET: { 451 case IOCTL_EVTCHN_RESET: {
413 /* Initialise the ring to empty. Clear errors. */ 452 /* Initialise the ring to empty. Clear errors. */
414 mutex_lock(&u->ring_cons_mutex); 453 mutex_lock(&u->ring_cons_mutex);
415 spin_lock_irq(&port_user_lock); 454 spin_lock_irq(&u->ring_prod_lock);
416 u->ring_cons = u->ring_prod = u->ring_overflow = 0; 455 u->ring_cons = u->ring_prod = u->ring_overflow = 0;
417 spin_unlock_irq(&port_user_lock); 456 spin_unlock_irq(&u->ring_prod_lock);
418 mutex_unlock(&u->ring_cons_mutex); 457 mutex_unlock(&u->ring_cons_mutex);
419 rc = 0; 458 rc = 0;
420 break; 459 break;
@@ -473,6 +512,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
473 512
474 mutex_init(&u->bind_mutex); 513 mutex_init(&u->bind_mutex);
475 mutex_init(&u->ring_cons_mutex); 514 mutex_init(&u->ring_cons_mutex);
515 spin_lock_init(&u->ring_prod_lock);
476 516
477 filp->private_data = u; 517 filp->private_data = u;
478 518
@@ -481,15 +521,15 @@ static int evtchn_open(struct inode *inode, struct file *filp)
481 521
482static int evtchn_release(struct inode *inode, struct file *filp) 522static int evtchn_release(struct inode *inode, struct file *filp)
483{ 523{
484 int i;
485 struct per_user_data *u = filp->private_data; 524 struct per_user_data *u = filp->private_data;
525 struct rb_node *node;
486 526
487 for (i = 0; i < NR_EVENT_CHANNELS; i++) { 527 while ((node = u->evtchns.rb_node)) {
488 if (get_port_user(i) != u) 528 struct user_evtchn *evtchn;
489 continue;
490 529
491 disable_irq(irq_from_evtchn(i)); 530 evtchn = rb_entry(node, struct user_evtchn, node);
492 evtchn_unbind_from_user(get_port_user(i), i); 531 disable_irq(irq_from_evtchn(evtchn->port));
532 evtchn_unbind_from_user(u, evtchn);
493 } 533 }
494 534
495 free_page((unsigned long)u->ring); 535 free_page((unsigned long)u->ring);
@@ -523,12 +563,6 @@ static int __init evtchn_init(void)
523 if (!xen_domain()) 563 if (!xen_domain())
524 return -ENODEV; 564 return -ENODEV;
525 565
526 port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
527 if (port_user == NULL)
528 return -ENOMEM;
529
530 spin_lock_init(&port_user_lock);
531
532 /* Create '/dev/xen/evtchn'. */ 566 /* Create '/dev/xen/evtchn'. */
533 err = misc_register(&evtchn_miscdev); 567 err = misc_register(&evtchn_miscdev);
534 if (err != 0) { 568 if (err != 0) {
@@ -543,9 +577,6 @@ static int __init evtchn_init(void)
543 577
544static void __exit evtchn_cleanup(void) 578static void __exit evtchn_cleanup(void)
545{ 579{
546 kfree(port_user);
547 port_user = NULL;
548
549 misc_deregister(&evtchn_miscdev); 580 misc_deregister(&evtchn_miscdev);
550} 581}
551 582
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eab5427c75f5..e41c79c986ea 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -272,19 +272,12 @@ static int map_grant_pages(struct grant_map *map)
272 * with find_grant_ptes. 272 * with find_grant_ptes.
273 */ 273 */
274 for (i = 0; i < map->count; i++) { 274 for (i = 0; i < map->count; i++) {
275 unsigned level;
276 unsigned long address = (unsigned long) 275 unsigned long address = (unsigned long)
277 pfn_to_kaddr(page_to_pfn(map->pages[i])); 276 pfn_to_kaddr(page_to_pfn(map->pages[i]));
278 pte_t *ptep;
279 u64 pte_maddr = 0;
280 BUG_ON(PageHighMem(map->pages[i])); 277 BUG_ON(PageHighMem(map->pages[i]));
281 278
282 ptep = lookup_address(address, &level); 279 gnttab_set_map_op(&map->kmap_ops[i], address,
283 pte_maddr = arbitrary_virt_to_machine(ptep).maddr; 280 map->flags | GNTMAP_host_map,
284 gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
285 map->flags |
286 GNTMAP_host_map |
287 GNTMAP_contains_pte,
288 map->grants[i].ref, 281 map->grants[i].ref,
289 map->grants[i].domid); 282 map->grants[i].domid);
290 } 283 }
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 04cdeb8e3719..c4d2298893b1 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -730,9 +730,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback,
730 void (*fn)(void *), void *arg, u16 count) 730 void (*fn)(void *), void *arg, u16 count)
731{ 731{
732 unsigned long flags; 732 unsigned long flags;
733 struct gnttab_free_callback *cb;
734
733 spin_lock_irqsave(&gnttab_list_lock, flags); 735 spin_lock_irqsave(&gnttab_list_lock, flags);
734 if (callback->next) 736
735 goto out; 737 /* Check if the callback is already on the list */
738 cb = gnttab_free_callback_list;
739 while (cb) {
740 if (cb == callback)
741 goto out;
742 cb = cb->next;
743 }
744
736 callback->fn = fn; 745 callback->fn = fn;
737 callback->arg = arg; 746 callback->arg = arg;
738 callback->count = count; 747 callback->count = count;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index f8e5dd701ecb..8e74590fa1bb 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -43,9 +43,10 @@ MODULE_LICENSE("GPL");
43 43
44#define PRIV_VMA_LOCKED ((void *)1) 44#define PRIV_VMA_LOCKED ((void *)1)
45 45
46#ifndef HAVE_ARCH_PRIVCMD_MMAP 46static int privcmd_vma_range_is_mapped(
47static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); 47 struct vm_area_struct *vma,
48#endif 48 unsigned long addr,
49 unsigned long nr_pages);
49 50
50static long privcmd_ioctl_hypercall(void __user *udata) 51static long privcmd_ioctl_hypercall(void __user *udata)
51{ 52{
@@ -225,9 +226,9 @@ static long privcmd_ioctl_mmap(void __user *udata)
225 vma = find_vma(mm, msg->va); 226 vma = find_vma(mm, msg->va);
226 rc = -EINVAL; 227 rc = -EINVAL;
227 228
228 if (!vma || (msg->va != vma->vm_start) || 229 if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
229 !privcmd_enforce_singleshot_mapping(vma))
230 goto out_up; 230 goto out_up;
231 vma->vm_private_data = PRIV_VMA_LOCKED;
231 } 232 }
232 233
233 state.va = vma->vm_start; 234 state.va = vma->vm_start;
@@ -358,7 +359,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
358 kfree(pages); 359 kfree(pages);
359 return -ENOMEM; 360 return -ENOMEM;
360 } 361 }
361 BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); 362 BUG_ON(vma->vm_private_data != NULL);
362 vma->vm_private_data = pages; 363 vma->vm_private_data = pages;
363 364
364 return 0; 365 return 0;
@@ -421,19 +422,43 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
421 422
422 vma = find_vma(mm, m.addr); 423 vma = find_vma(mm, m.addr);
423 if (!vma || 424 if (!vma ||
424 vma->vm_ops != &privcmd_vm_ops || 425 vma->vm_ops != &privcmd_vm_ops) {
425 (m.addr != vma->vm_start) ||
426 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
427 !privcmd_enforce_singleshot_mapping(vma)) {
428 up_write(&mm->mmap_sem);
429 ret = -EINVAL; 426 ret = -EINVAL;
430 goto out; 427 goto out_unlock;
431 } 428 }
432 if (xen_feature(XENFEAT_auto_translated_physmap)) { 429
433 ret = alloc_empty_pages(vma, m.num); 430 /*
434 if (ret < 0) { 431 * Caller must either:
435 up_write(&mm->mmap_sem); 432 *
436 goto out; 433 * Map the whole VMA range, which will also allocate all the
434 * pages required for the auto_translated_physmap case.
435 *
436 * Or
437 *
438 * Map unmapped holes left from a previous map attempt (e.g.,
439 * because those foreign frames were previously paged out).
440 */
441 if (vma->vm_private_data == NULL) {
442 if (m.addr != vma->vm_start ||
443 m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
444 ret = -EINVAL;
445 goto out_unlock;
446 }
447 if (xen_feature(XENFEAT_auto_translated_physmap)) {
448 ret = alloc_empty_pages(vma, m.num);
449 if (ret < 0)
450 goto out_unlock;
451 } else
452 vma->vm_private_data = PRIV_VMA_LOCKED;
453 } else {
454 if (m.addr < vma->vm_start ||
455 m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
456 ret = -EINVAL;
457 goto out_unlock;
458 }
459 if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
460 ret = -EINVAL;
461 goto out_unlock;
437 } 462 }
438 } 463 }
439 464
@@ -466,8 +491,11 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
466 491
467out: 492out:
468 free_page_list(&pagelist); 493 free_page_list(&pagelist);
469
470 return ret; 494 return ret;
495
496out_unlock:
497 up_write(&mm->mmap_sem);
498 goto out;
471} 499}
472 500
473static long privcmd_ioctl(struct file *file, 501static long privcmd_ioctl(struct file *file,
@@ -540,9 +568,24 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
540 return 0; 568 return 0;
541} 569}
542 570
543static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) 571/*
572 * For MMAPBATCH*. This allows asserting the singleshot mapping
573 * on a per pfn/pte basis. Mapping calls that fail with ENOENT
574 * can be then retried until success.
575 */
576static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
577 unsigned long addr, void *data)
578{
579 return pte_none(*pte) ? 0 : -EBUSY;
580}
581
582static int privcmd_vma_range_is_mapped(
583 struct vm_area_struct *vma,
584 unsigned long addr,
585 unsigned long nr_pages)
544{ 586{
545 return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); 587 return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
588 is_mapped_fn, NULL) != 0;
546} 589}
547 590
548const struct file_operations xen_privcmd_fops = { 591const struct file_operations xen_privcmd_fops = {
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index aadffcf7db9b..1b2277c311d2 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -506,13 +506,13 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
506 to do proper error handling. */ 506 to do proper error handling. */
507 xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, 507 xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
508 attrs); 508 attrs);
509 sgl[0].dma_length = 0; 509 sg_dma_len(sgl) = 0;
510 return DMA_ERROR_CODE; 510 return DMA_ERROR_CODE;
511 } 511 }
512 sg->dma_address = xen_phys_to_bus(map); 512 sg->dma_address = xen_phys_to_bus(map);
513 } else 513 } else
514 sg->dma_address = dev_addr; 514 sg->dma_address = dev_addr;
515 sg->dma_length = sg->length; 515 sg_dma_len(sg) = sg->length;
516 } 516 }
517 return nelems; 517 return nelems;
518} 518}
@@ -533,7 +533,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
533 BUG_ON(dir == DMA_NONE); 533 BUG_ON(dir == DMA_NONE);
534 534
535 for_each_sg(sgl, sg, nelems, i) 535 for_each_sg(sgl, sg, nelems, i)
536 xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); 536 xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
537 537
538} 538}
539EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); 539EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
@@ -555,7 +555,7 @@ xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
555 555
556 for_each_sg(sgl, sg, nelems, i) 556 for_each_sg(sgl, sg, nelems, i)
557 xen_swiotlb_sync_single(hwdev, sg->dma_address, 557 xen_swiotlb_sync_single(hwdev, sg->dma_address,
558 sg->dma_length, dir, target); 558 sg_dma_len(sg), dir, target);
559} 559}
560 560
561void 561void
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 02817a85f877..21e18c18c7a1 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -265,8 +265,10 @@ static ssize_t store_selfballooning(struct device *dev,
265 if (!capable(CAP_SYS_ADMIN)) 265 if (!capable(CAP_SYS_ADMIN))
266 return -EPERM; 266 return -EPERM;
267 267
268 err = strict_strtoul(buf, 10, &tmp); 268 err = kstrtoul(buf, 10, &tmp);
269 if (err || ((tmp != 0) && (tmp != 1))) 269 if (err)
270 return err;
271 if ((tmp != 0) && (tmp != 1))
270 return -EINVAL; 272 return -EINVAL;
271 273
272 xen_selfballooning_enabled = !!tmp; 274 xen_selfballooning_enabled = !!tmp;
@@ -292,8 +294,10 @@ static ssize_t store_selfballoon_interval(struct device *dev,
292 294
293 if (!capable(CAP_SYS_ADMIN)) 295 if (!capable(CAP_SYS_ADMIN))
294 return -EPERM; 296 return -EPERM;
295 err = strict_strtoul(buf, 10, &val); 297 err = kstrtoul(buf, 10, &val);
296 if (err || val == 0) 298 if (err)
299 return err;
300 if (val == 0)
297 return -EINVAL; 301 return -EINVAL;
298 selfballoon_interval = val; 302 selfballoon_interval = val;
299 return count; 303 return count;
@@ -314,8 +318,10 @@ static ssize_t store_selfballoon_downhys(struct device *dev,
314 318
315 if (!capable(CAP_SYS_ADMIN)) 319 if (!capable(CAP_SYS_ADMIN))
316 return -EPERM; 320 return -EPERM;
317 err = strict_strtoul(buf, 10, &val); 321 err = kstrtoul(buf, 10, &val);
318 if (err || val == 0) 322 if (err)
323 return err;
324 if (val == 0)
319 return -EINVAL; 325 return -EINVAL;
320 selfballoon_downhysteresis = val; 326 selfballoon_downhysteresis = val;
321 return count; 327 return count;
@@ -337,8 +343,10 @@ static ssize_t store_selfballoon_uphys(struct device *dev,
337 343
338 if (!capable(CAP_SYS_ADMIN)) 344 if (!capable(CAP_SYS_ADMIN))
339 return -EPERM; 345 return -EPERM;
340 err = strict_strtoul(buf, 10, &val); 346 err = kstrtoul(buf, 10, &val);
341 if (err || val == 0) 347 if (err)
348 return err;
349 if (val == 0)
342 return -EINVAL; 350 return -EINVAL;
343 selfballoon_uphysteresis = val; 351 selfballoon_uphysteresis = val;
344 return count; 352 return count;
@@ -360,8 +368,10 @@ static ssize_t store_selfballoon_min_usable_mb(struct device *dev,
360 368
361 if (!capable(CAP_SYS_ADMIN)) 369 if (!capable(CAP_SYS_ADMIN))
362 return -EPERM; 370 return -EPERM;
363 err = strict_strtoul(buf, 10, &val); 371 err = kstrtoul(buf, 10, &val);
364 if (err || val == 0) 372 if (err)
373 return err;
374 if (val == 0)
365 return -EINVAL; 375 return -EINVAL;
366 selfballoon_min_usable_mb = val; 376 selfballoon_min_usable_mb = val;
367 return count; 377 return count;
@@ -384,8 +394,10 @@ static ssize_t store_selfballoon_reserved_mb(struct device *dev,
384 394
385 if (!capable(CAP_SYS_ADMIN)) 395 if (!capable(CAP_SYS_ADMIN))
386 return -EPERM; 396 return -EPERM;
387 err = strict_strtoul(buf, 10, &val); 397 err = kstrtoul(buf, 10, &val);
388 if (err || val == 0) 398 if (err)
399 return err;
400 if (val == 0)
389 return -EINVAL; 401 return -EINVAL;
390 selfballoon_reserved_mb = val; 402 selfballoon_reserved_mb = val;
391 return count; 403 return count;
@@ -410,8 +422,10 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev,
410 422
411 if (!capable(CAP_SYS_ADMIN)) 423 if (!capable(CAP_SYS_ADMIN))
412 return -EPERM; 424 return -EPERM;
413 err = strict_strtoul(buf, 10, &tmp); 425 err = kstrtoul(buf, 10, &tmp);
414 if (err || ((tmp != 0) && (tmp != 1))) 426 if (err)
427 return err;
428 if ((tmp != 0) && (tmp != 1))
415 return -EINVAL; 429 return -EINVAL;
416 frontswap_selfshrinking = !!tmp; 430 frontswap_selfshrinking = !!tmp;
417 if (!was_enabled && !xen_selfballooning_enabled && 431 if (!was_enabled && !xen_selfballooning_enabled &&
@@ -437,8 +451,10 @@ static ssize_t store_frontswap_inertia(struct device *dev,
437 451
438 if (!capable(CAP_SYS_ADMIN)) 452 if (!capable(CAP_SYS_ADMIN))
439 return -EPERM; 453 return -EPERM;
440 err = strict_strtoul(buf, 10, &val); 454 err = kstrtoul(buf, 10, &val);
441 if (err || val == 0) 455 if (err)
456 return err;
457 if (val == 0)
442 return -EINVAL; 458 return -EINVAL;
443 frontswap_inertia = val; 459 frontswap_inertia = val;
444 frontswap_inertia_counter = val; 460 frontswap_inertia_counter = val;
@@ -460,8 +476,10 @@ static ssize_t store_frontswap_hysteresis(struct device *dev,
460 476
461 if (!capable(CAP_SYS_ADMIN)) 477 if (!capable(CAP_SYS_ADMIN))
462 return -EPERM; 478 return -EPERM;
463 err = strict_strtoul(buf, 10, &val); 479 err = kstrtoul(buf, 10, &val);
464 if (err || val == 0) 480 if (err)
481 return err;
482 if (val == 0)
465 return -EINVAL; 483 return -EINVAL;
466 frontswap_hysteresis = val; 484 frontswap_hysteresis = val;
467 return count; 485 return count;
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index cc2e1a7e44ec..a4c1c6a93691 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -29,6 +29,9 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages,
29 bool highmem); 29 bool highmem);
30void free_xenballooned_pages(int nr_pages, struct page **pages); 30void free_xenballooned_pages(int nr_pages, struct page **pages);
31 31
32struct page *get_balloon_scratch_page(void);
33void put_balloon_scratch_page(void);
34
32struct device; 35struct device;
33#ifdef CONFIG_XEN_SELFBALLOONING 36#ifdef CONFIG_XEN_SELFBALLOONING
34extern int register_xen_selfballooning(struct device *dev); 37extern int register_xen_selfballooning(struct device *dev);
diff --git a/include/xen/interface/io/tpmif.h b/include/xen/interface/io/tpmif.h
new file mode 100644
index 000000000000..28e7dcd75e82
--- /dev/null
+++ b/include/xen/interface/io/tpmif.h
@@ -0,0 +1,52 @@
1/******************************************************************************
2 * tpmif.h
3 *
4 * TPM I/O interface for Xen guest OSes, v2
5 *
6 * This file is in the public domain.
7 *
8 */
9
10#ifndef __XEN_PUBLIC_IO_TPMIF_H__
11#define __XEN_PUBLIC_IO_TPMIF_H__
12
13/*
14 * Xenbus state machine
15 *
16 * Device open:
17 * 1. Both ends start in XenbusStateInitialising
18 * 2. Backend transitions to InitWait (frontend does not wait on this step)
19 * 3. Frontend populates ring-ref, event-channel, feature-protocol-v2
20 * 4. Frontend transitions to Initialised
21 * 5. Backend maps grant and event channel, verifies feature-protocol-v2
22 * 6. Backend transitions to Connected
23 * 7. Frontend verifies feature-protocol-v2, transitions to Connected
24 *
25 * Device close:
26 * 1. State is changed to XenbusStateClosing
27 * 2. Frontend transitions to Closed
28 * 3. Backend unmaps grant and event, changes state to InitWait
29 */
30
31enum vtpm_shared_page_state {
32 VTPM_STATE_IDLE, /* no contents / vTPM idle / cancel complete */
33 VTPM_STATE_SUBMIT, /* request ready / vTPM working */
34 VTPM_STATE_FINISH, /* response ready / vTPM idle */
35 VTPM_STATE_CANCEL, /* cancel requested / vTPM working */
36};
37/* The backend should only change state to IDLE or FINISH, while the
38 * frontend should only change to SUBMIT or CANCEL. */
39
40
41struct vtpm_shared_page {
42 uint32_t length; /* request/response length in bytes */
43
44 uint8_t state; /* enum vtpm_shared_page_state */
45 uint8_t locality; /* for the current request */
46 uint8_t pad;
47
48 uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */
49 uint32_t extra_pages[0]; /* grant IDs; length in nr_extra_pages */
50};
51
52#endif
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index 87e6f8a48661..b05288ce3991 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -170,4 +170,6 @@ struct vcpu_register_vcpu_info {
170}; 170};
171DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); 171DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
172 172
173/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
174#define VCPUOP_send_nmi 11
173#endif /* __XEN_PUBLIC_VCPU_H__ */ 175#endif /* __XEN_PUBLIC_VCPU_H__ */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d23762e6652c..4e8686c7e5a4 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -870,13 +870,13 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
870 swiotlb_full(hwdev, sg->length, dir, 0); 870 swiotlb_full(hwdev, sg->length, dir, 0);
871 swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, 871 swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
872 attrs); 872 attrs);
873 sgl[0].dma_length = 0; 873 sg_dma_len(sgl) = 0;
874 return 0; 874 return 0;
875 } 875 }
876 sg->dma_address = phys_to_dma(hwdev, map); 876 sg->dma_address = phys_to_dma(hwdev, map);
877 } else 877 } else
878 sg->dma_address = dev_addr; 878 sg->dma_address = dev_addr;
879 sg->dma_length = sg->length; 879 sg_dma_len(sg) = sg->length;
880 } 880 }
881 return nelems; 881 return nelems;
882} 882}
@@ -904,7 +904,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
904 BUG_ON(dir == DMA_NONE); 904 BUG_ON(dir == DMA_NONE);
905 905
906 for_each_sg(sgl, sg, nelems, i) 906 for_each_sg(sgl, sg, nelems, i)
907 unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); 907 unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
908 908
909} 909}
910EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); 910EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
@@ -934,7 +934,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
934 934
935 for_each_sg(sgl, sg, nelems, i) 935 for_each_sg(sgl, sg, nelems, i)
936 swiotlb_sync_single(hwdev, sg->dma_address, 936 swiotlb_sync_single(hwdev, sg->dma_address,
937 sg->dma_length, dir, target); 937 sg_dma_len(sg), dir, target);
938} 938}
939 939
940void 940void