aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS1
-rw-r--r--Documentation/tpm/xen-tpmfront.txt113
-rw-r--r--MAINTAINERS16
-rw-r--r--arch/arm/xen/enlighten.c14
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/include/asm/kvm_para.h14
-rw-r--r--arch/x86/include/asm/paravirt.h32
-rw-r--r--arch/x86/include/asm/paravirt_types.h14
-rw-r--r--arch/x86/include/asm/spinlock.h128
-rw-r--r--arch/x86/include/asm/spinlock_types.h16
-rw-r--r--arch/x86/include/asm/xen/events.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/kvm.c262
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c18
-rw-r--r--arch/x86/xen/enlighten.c15
-rw-r--r--arch/x86/xen/irq.c25
-rw-r--r--arch/x86/xen/p2m.c26
-rw-r--r--arch/x86/xen/setup.c29
-rw-r--r--arch/x86/xen/smp.c8
-rw-r--r--arch/x86/xen/spinlock.c387
-rw-r--r--drivers/char/tpm/Kconfig12
-rw-r--r--drivers/char/tpm/Makefile1
-rw-r--r--drivers/char/tpm/xen-tpmfront.c473
-rw-r--r--drivers/tty/hvc/hvc_xen.c6
-rw-r--r--drivers/xen/balloon.c74
-rw-r--r--drivers/xen/events.c30
-rw-r--r--drivers/xen/evtchn.c191
-rw-r--r--drivers/xen/gntdev.c11
-rw-r--r--drivers/xen/grant-table.c13
-rw-r--r--drivers/xen/privcmd.c83
-rw-r--r--drivers/xen/swiotlb-xen.c8
-rw-r--r--drivers/xen/xen-selfballoon.c54
-rw-r--r--include/linux/jump_label.h28
-rw-r--r--include/linux/jump_label_ratelimit.h34
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--include/uapi/linux/kvm_para.h1
-rw-r--r--include/xen/balloon.h3
-rw-r--r--include/xen/interface/io/tpmif.h52
-rw-r--r--include/xen/interface/vcpu.h2
-rw-r--r--kernel/jump_label.c1
-rw-r--r--lib/swiotlb.c8
41 files changed, 1645 insertions, 571 deletions
diff --git a/CREDITS b/CREDITS
index 206d0fcf07a5..646a0a9ad6d1 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1120,6 +1120,7 @@ D: author of userfs filesystem
1120D: Improved mmap and munmap handling 1120D: Improved mmap and munmap handling
1121D: General mm minor tidyups 1121D: General mm minor tidyups
1122D: autofs v4 maintainer 1122D: autofs v4 maintainer
1123D: Xen subsystem
1123S: 987 Alabama St 1124S: 987 Alabama St
1124S: San Francisco 1125S: San Francisco
1125S: CA, 94110 1126S: CA, 94110
diff --git a/Documentation/tpm/xen-tpmfront.txt b/Documentation/tpm/xen-tpmfront.txt
new file mode 100644
index 000000000000..69346de87ff3
--- /dev/null
+++ b/Documentation/tpm/xen-tpmfront.txt
@@ -0,0 +1,113 @@
1Virtual TPM interface for Xen
2
3Authors: Matthew Fioravante (JHUAPL), Daniel De Graaf (NSA)
4
5This document describes the virtual Trusted Platform Module (vTPM) subsystem for
6Xen. The reader is assumed to have familiarity with building and installing Xen,
7Linux, and a basic understanding of the TPM and vTPM concepts.
8
9INTRODUCTION
10
11The goal of this work is to provide a TPM functionality to a virtual guest
12operating system (in Xen terms, a DomU). This allows programs to interact with
13a TPM in a virtual system the same way they interact with a TPM on the physical
14system. Each guest gets its own unique, emulated, software TPM. However, each
15of the vTPM's secrets (Keys, NVRAM, etc) are managed by a vTPM Manager domain,
16which seals the secrets to the Physical TPM. If the process of creating each of
17these domains (manager, vTPM, and guest) is trusted, the vTPM subsystem extends
18the chain of trust rooted in the hardware TPM to virtual machines in Xen. Each
19major component of vTPM is implemented as a separate domain, providing secure
20separation guaranteed by the hypervisor. The vTPM domains are implemented in
21mini-os to reduce memory and processor overhead.
22
23This mini-os vTPM subsystem was built on top of the previous vTPM work done by
24IBM and Intel corporation.
25
26
27DESIGN OVERVIEW
28---------------
29
30The architecture of vTPM is described below:
31
32+------------------+
33| Linux DomU | ...
34| | ^ |
35| v | |
36| xen-tpmfront |
37+------------------+
38 | ^
39 v |
40+------------------+
41| mini-os/tpmback |
42| | ^ |
43| v | |
44| vtpm-stubdom | ...
45| | ^ |
46| v | |
47| mini-os/tpmfront |
48+------------------+
49 | ^
50 v |
51+------------------+
52| mini-os/tpmback |
53| | ^ |
54| v | |
55| vtpmmgr-stubdom |
56| | ^ |
57| v | |
58| mini-os/tpm_tis |
59+------------------+
60 | ^
61 v |
62+------------------+
63| Hardware TPM |
64+------------------+
65
66 * Linux DomU: The Linux based guest that wants to use a vTPM. There may be
67 more than one of these.
68
69 * xen-tpmfront.ko: Linux kernel virtual TPM frontend driver. This driver
70 provides vTPM access to a Linux-based DomU.
71
72 * mini-os/tpmback: Mini-os TPM backend driver. The Linux frontend driver
73 connects to this backend driver to facilitate communications
74 between the Linux DomU and its vTPM. This driver is also
75 used by vtpmmgr-stubdom to communicate with vtpm-stubdom.
76
77 * vtpm-stubdom: A mini-os stub domain that implements a vTPM. There is a
78 one to one mapping between running vtpm-stubdom instances and
79 logical vtpms on the system. The vTPM Platform Configuration
80 Registers (PCRs) are normally all initialized to zero.
81
82 * mini-os/tpmfront: Mini-os TPM frontend driver. The vTPM mini-os domain
83 vtpm-stubdom uses this driver to communicate with
84 vtpmmgr-stubdom. This driver is also used in mini-os
85 domains such as pv-grub that talk to the vTPM domain.
86
87 * vtpmmgr-stubdom: A mini-os domain that implements the vTPM manager. There is
88 only one vTPM manager and it should be running during the
89 entire lifetime of the machine. This domain regulates
90 access to the physical TPM on the system and secures the
91 persistent state of each vTPM.
92
93 * mini-os/tpm_tis: Mini-os TPM version 1.2 TPM Interface Specification (TIS)
94 driver. This driver used by vtpmmgr-stubdom to talk directly to
95 the hardware TPM. Communication is facilitated by mapping
96 hardware memory pages into vtpmmgr-stubdom.
97
98 * Hardware TPM: The physical TPM that is soldered onto the motherboard.
99
100
101INTEGRATION WITH XEN
102--------------------
103
104Support for the vTPM driver was added in Xen using the libxl toolstack in Xen
1054.3. See the Xen documentation (docs/misc/vtpm.txt) for details on setting up
106the vTPM and vTPM Manager stub domains. Once the stub domains are running, a
107vTPM device is set up in the same manner as a disk or network device in the
108domain's configuration file.
109
110In order to use features such as IMA that require a TPM to be loaded prior to
111the initrd, the xen-tpmfront driver must be compiled in to the kernel. If not
112using such features, the driver can be compiled as a module and will be loaded
113as usual.
diff --git a/MAINTAINERS b/MAINTAINERS
index 8197fbd70a3e..94aa87dc6d2a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9247,9 +9247,9 @@ F: drivers/media/tuners/tuner-xc2028.*
9247 9247
9248XEN HYPERVISOR INTERFACE 9248XEN HYPERVISOR INTERFACE
9249M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> 9249M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
9250M: Jeremy Fitzhardinge <jeremy@goop.org> 9250M: Boris Ostrovsky <boris.ostrovsky@oracle.com>
9251L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9251M: David Vrabel <david.vrabel@citrix.com>
9252L: virtualization@lists.linux-foundation.org 9252L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9253S: Supported 9253S: Supported
9254F: arch/x86/xen/ 9254F: arch/x86/xen/
9255F: drivers/*/xen-*front.c 9255F: drivers/*/xen-*front.c
@@ -9260,35 +9260,35 @@ F: include/uapi/xen/
9260 9260
9261XEN HYPERVISOR ARM 9261XEN HYPERVISOR ARM
9262M: Stefano Stabellini <stefano.stabellini@eu.citrix.com> 9262M: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
9263L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9263L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9264S: Supported 9264S: Supported
9265F: arch/arm/xen/ 9265F: arch/arm/xen/
9266F: arch/arm/include/asm/xen/ 9266F: arch/arm/include/asm/xen/
9267 9267
9268XEN HYPERVISOR ARM64 9268XEN HYPERVISOR ARM64
9269M: Stefano Stabellini <stefano.stabellini@eu.citrix.com> 9269M: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
9270L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9270L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9271S: Supported 9271S: Supported
9272F: arch/arm64/xen/ 9272F: arch/arm64/xen/
9273F: arch/arm64/include/asm/xen/ 9273F: arch/arm64/include/asm/xen/
9274 9274
9275XEN NETWORK BACKEND DRIVER 9275XEN NETWORK BACKEND DRIVER
9276M: Ian Campbell <ian.campbell@citrix.com> 9276M: Ian Campbell <ian.campbell@citrix.com>
9277L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9277L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9278L: netdev@vger.kernel.org 9278L: netdev@vger.kernel.org
9279S: Supported 9279S: Supported
9280F: drivers/net/xen-netback/* 9280F: drivers/net/xen-netback/*
9281 9281
9282XEN PCI SUBSYSTEM 9282XEN PCI SUBSYSTEM
9283M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> 9283M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
9284L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9284L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9285S: Supported 9285S: Supported
9286F: arch/x86/pci/*xen* 9286F: arch/x86/pci/*xen*
9287F: drivers/pci/*xen* 9287F: drivers/pci/*xen*
9288 9288
9289XEN SWIOTLB SUBSYSTEM 9289XEN SWIOTLB SUBSYSTEM
9290M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> 9290M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
9291L: xen-devel@lists.xensource.com (moderated for non-subscribers) 9291L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
9292S: Supported 9292S: Supported
9293F: arch/x86/xen/*swiotlb* 9293F: arch/x86/xen/*swiotlb*
9294F: drivers/xen/*swiotlb* 9294F: drivers/xen/*swiotlb*
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 8a6295c86209..83e4f959ee47 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -21,6 +21,8 @@
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/of_irq.h> 22#include <linux/of_irq.h>
23#include <linux/of_address.h> 23#include <linux/of_address.h>
24#include <linux/cpuidle.h>
25#include <linux/cpufreq.h>
24 26
25#include <linux/mm.h> 27#include <linux/mm.h>
26 28
@@ -267,18 +269,28 @@ static int __init xen_guest_init(void)
267 if (!xen_initial_domain()) 269 if (!xen_initial_domain())
268 xenbus_probe(NULL); 270 xenbus_probe(NULL);
269 271
272 /*
273 * Making sure board specific code will not set up ops for
274 * cpu idle and cpu freq.
275 */
276 disable_cpuidle();
277 disable_cpufreq();
278
270 return 0; 279 return 0;
271} 280}
272core_initcall(xen_guest_init); 281core_initcall(xen_guest_init);
273 282
274static int __init xen_pm_init(void) 283static int __init xen_pm_init(void)
275{ 284{
285 if (!xen_domain())
286 return -ENODEV;
287
276 pm_power_off = xen_power_off; 288 pm_power_off = xen_power_off;
277 arm_pm_restart = xen_restart; 289 arm_pm_restart = xen_restart;
278 290
279 return 0; 291 return 0;
280} 292}
281subsys_initcall(xen_pm_init); 293late_initcall(xen_pm_init);
282 294
283static irqreturn_t xen_arm_callback(int irq, void *arg) 295static irqreturn_t xen_arm_callback(int irq, void *arg)
284{ 296{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf92b0ce..b1fb846e6dac 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -632,6 +632,7 @@ config PARAVIRT_DEBUG
632config PARAVIRT_SPINLOCKS 632config PARAVIRT_SPINLOCKS
633 bool "Paravirtualization layer for spinlocks" 633 bool "Paravirtualization layer for spinlocks"
634 depends on PARAVIRT && SMP 634 depends on PARAVIRT && SMP
635 select UNINLINE_SPIN_UNLOCK
635 ---help--- 636 ---help---
636 Paravirtualized spinlocks allow a pvops backend to replace the 637 Paravirtualized spinlocks allow a pvops backend to replace the
637 spinlock implementation with something virtualization-friendly 638 spinlock implementation with something virtualization-friendly
@@ -656,6 +657,15 @@ config KVM_GUEST
656 underlying device model, the host provides the guest with 657 underlying device model, the host provides the guest with
657 timing infrastructure such as time of day, and system time 658 timing infrastructure such as time of day, and system time
658 659
660config KVM_DEBUG_FS
661 bool "Enable debug information for KVM Guests in debugfs"
662 depends on KVM_GUEST && DEBUG_FS
663 default n
664 ---help---
665 This option enables collection of various statistics for KVM guest.
666 Statistics are displayed in debugfs filesystem. Enabling this option
667 may incur significant overhead.
668
659source "arch/x86/lguest/Kconfig" 669source "arch/x86/lguest/Kconfig"
660 670
661config PARAVIRT_TIME_ACCOUNTING 671config PARAVIRT_TIME_ACCOUNTING
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 695399f2d5eb..427afcbf3d55 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -118,10 +118,20 @@ void kvm_async_pf_task_wait(u32 token);
118void kvm_async_pf_task_wake(u32 token); 118void kvm_async_pf_task_wake(u32 token);
119u32 kvm_read_and_reset_pf_reason(void); 119u32 kvm_read_and_reset_pf_reason(void);
120extern void kvm_disable_steal_time(void); 120extern void kvm_disable_steal_time(void);
121#else 121
122#define kvm_guest_init() do { } while (0) 122#ifdef CONFIG_PARAVIRT_SPINLOCKS
123void __init kvm_spinlock_init(void);
124#else /* !CONFIG_PARAVIRT_SPINLOCKS */
125static inline void kvm_spinlock_init(void)
126{
127}
128#endif /* CONFIG_PARAVIRT_SPINLOCKS */
129
130#else /* CONFIG_KVM_GUEST */
131#define kvm_guest_init() do {} while (0)
123#define kvm_async_pf_task_wait(T) do {} while(0) 132#define kvm_async_pf_task_wait(T) do {} while(0)
124#define kvm_async_pf_task_wake(T) do {} while(0) 133#define kvm_async_pf_task_wake(T) do {} while(0)
134
125static inline u32 kvm_read_and_reset_pf_reason(void) 135static inline u32 kvm_read_and_reset_pf_reason(void)
126{ 136{
127 return 0; 137 return 0;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cfdc9ee4c900..401f350ef71b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
712 712
713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) 713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
714 714
715static inline int arch_spin_is_locked(struct arch_spinlock *lock) 715static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
716 __ticket_t ticket)
716{ 717{
717 return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); 718 PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket);
718} 719}
719 720
720static inline int arch_spin_is_contended(struct arch_spinlock *lock) 721static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
722 __ticket_t ticket)
721{ 723{
722 return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); 724 PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
723}
724#define arch_spin_is_contended arch_spin_is_contended
725
726static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
727{
728 PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
729}
730
731static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
732 unsigned long flags)
733{
734 PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
735}
736
737static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
738{
739 return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
740}
741
742static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
743{
744 PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
745} 725}
746 726
747#endif 727#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 0db1fcac668c..04ac40e192eb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -327,13 +327,15 @@ struct pv_mmu_ops {
327}; 327};
328 328
329struct arch_spinlock; 329struct arch_spinlock;
330#ifdef CONFIG_SMP
331#include <asm/spinlock_types.h>
332#else
333typedef u16 __ticket_t;
334#endif
335
330struct pv_lock_ops { 336struct pv_lock_ops {
331 int (*spin_is_locked)(struct arch_spinlock *lock); 337 struct paravirt_callee_save lock_spinning;
332 int (*spin_is_contended)(struct arch_spinlock *lock); 338 void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
333 void (*spin_lock)(struct arch_spinlock *lock);
334 void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
335 int (*spin_trylock)(struct arch_spinlock *lock);
336 void (*spin_unlock)(struct arch_spinlock *lock);
337}; 339};
338 340
339/* This contains all the paravirt structures: we get a convenient 341/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e3ddd7db723f..8963bfeea82a 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -1,11 +1,14 @@
1#ifndef _ASM_X86_SPINLOCK_H 1#ifndef _ASM_X86_SPINLOCK_H
2#define _ASM_X86_SPINLOCK_H 2#define _ASM_X86_SPINLOCK_H
3 3
4#include <linux/jump_label.h>
4#include <linux/atomic.h> 5#include <linux/atomic.h>
5#include <asm/page.h> 6#include <asm/page.h>
6#include <asm/processor.h> 7#include <asm/processor.h>
7#include <linux/compiler.h> 8#include <linux/compiler.h>
8#include <asm/paravirt.h> 9#include <asm/paravirt.h>
10#include <asm/bitops.h>
11
9/* 12/*
10 * Your basic SMP spinlocks, allowing only a single CPU anywhere 13 * Your basic SMP spinlocks, allowing only a single CPU anywhere
11 * 14 *
@@ -34,6 +37,31 @@
34# define UNLOCK_LOCK_PREFIX 37# define UNLOCK_LOCK_PREFIX
35#endif 38#endif
36 39
40/* How long a lock should spin before we consider blocking */
41#define SPIN_THRESHOLD (1 << 15)
42
43extern struct static_key paravirt_ticketlocks_enabled;
44static __always_inline bool static_key_false(struct static_key *key);
45
46#ifdef CONFIG_PARAVIRT_SPINLOCKS
47
48static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
49{
50 set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
51}
52
53#else /* !CONFIG_PARAVIRT_SPINLOCKS */
54static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock,
55 __ticket_t ticket)
56{
57}
58static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
59 __ticket_t ticket)
60{
61}
62
63#endif /* CONFIG_PARAVIRT_SPINLOCKS */
64
37/* 65/*
38 * Ticket locks are conceptually two parts, one indicating the current head of 66 * Ticket locks are conceptually two parts, one indicating the current head of
39 * the queue, and the other indicating the current tail. The lock is acquired 67 * the queue, and the other indicating the current tail. The lock is acquired
@@ -47,81 +75,101 @@
47 * in the high part, because a wide xadd increment of the low part would carry 75 * in the high part, because a wide xadd increment of the low part would carry
48 * up and contaminate the high part. 76 * up and contaminate the high part.
49 */ 77 */
50static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) 78static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
51{ 79{
52 register struct __raw_tickets inc = { .tail = 1 }; 80 register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC };
53 81
54 inc = xadd(&lock->tickets, inc); 82 inc = xadd(&lock->tickets, inc);
83 if (likely(inc.head == inc.tail))
84 goto out;
55 85
86 inc.tail &= ~TICKET_SLOWPATH_FLAG;
56 for (;;) { 87 for (;;) {
57 if (inc.head == inc.tail) 88 unsigned count = SPIN_THRESHOLD;
58 break; 89
59 cpu_relax(); 90 do {
60 inc.head = ACCESS_ONCE(lock->tickets.head); 91 if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
92 goto out;
93 cpu_relax();
94 } while (--count);
95 __ticket_lock_spinning(lock, inc.tail);
61 } 96 }
62 barrier(); /* make sure nothing creeps before the lock is taken */ 97out: barrier(); /* make sure nothing creeps before the lock is taken */
63} 98}
64 99
65static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) 100static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
66{ 101{
67 arch_spinlock_t old, new; 102 arch_spinlock_t old, new;
68 103
69 old.tickets = ACCESS_ONCE(lock->tickets); 104 old.tickets = ACCESS_ONCE(lock->tickets);
70 if (old.tickets.head != old.tickets.tail) 105 if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
71 return 0; 106 return 0;
72 107
73 new.head_tail = old.head_tail + (1 << TICKET_SHIFT); 108 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
74 109
75 /* cmpxchg is a full barrier, so nothing can move before it */ 110 /* cmpxchg is a full barrier, so nothing can move before it */
76 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; 111 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
77} 112}
78 113
79static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) 114static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
115 arch_spinlock_t old)
80{ 116{
81 __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); 117 arch_spinlock_t new;
118
119 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
120
121 /* Perform the unlock on the "before" copy */
122 old.tickets.head += TICKET_LOCK_INC;
123
124 /* Clear the slowpath flag */
125 new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
126
127 /*
128 * If the lock is uncontended, clear the flag - use cmpxchg in
129 * case it changes behind our back though.
130 */
131 if (new.tickets.head != new.tickets.tail ||
132 cmpxchg(&lock->head_tail, old.head_tail,
133 new.head_tail) != old.head_tail) {
134 /*
135 * Lock still has someone queued for it, so wake up an
136 * appropriate waiter.
137 */
138 __ticket_unlock_kick(lock, old.tickets.head);
139 }
82} 140}
83 141
84static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) 142static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
85{ 143{
86 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 144 if (TICKET_SLOWPATH_FLAG &&
145 static_key_false(&paravirt_ticketlocks_enabled)) {
146 arch_spinlock_t prev;
87 147
88 return tmp.tail != tmp.head; 148 prev = *lock;
89} 149 add_smp(&lock->tickets.head, TICKET_LOCK_INC);
90 150
91static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) 151 /* add_smp() is a full mb() */
92{
93 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
94 152
95 return (__ticket_t)(tmp.tail - tmp.head) > 1; 153 if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
154 __ticket_unlock_slowpath(lock, prev);
155 } else
156 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
96} 157}
97 158
98#ifndef CONFIG_PARAVIRT_SPINLOCKS
99
100static inline int arch_spin_is_locked(arch_spinlock_t *lock) 159static inline int arch_spin_is_locked(arch_spinlock_t *lock)
101{ 160{
102 return __ticket_spin_is_locked(lock); 161 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
103}
104
105static inline int arch_spin_is_contended(arch_spinlock_t *lock)
106{
107 return __ticket_spin_is_contended(lock);
108}
109#define arch_spin_is_contended arch_spin_is_contended
110 162
111static __always_inline void arch_spin_lock(arch_spinlock_t *lock) 163 return tmp.tail != tmp.head;
112{
113 __ticket_spin_lock(lock);
114} 164}
115 165
116static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) 166static inline int arch_spin_is_contended(arch_spinlock_t *lock)
117{ 167{
118 return __ticket_spin_trylock(lock); 168 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
119}
120 169
121static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) 170 return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
122{
123 __ticket_spin_unlock(lock);
124} 171}
172#define arch_spin_is_contended arch_spin_is_contended
125 173
126static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, 174static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
127 unsigned long flags) 175 unsigned long flags)
@@ -129,8 +177,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
129 arch_spin_lock(lock); 177 arch_spin_lock(lock);
130} 178}
131 179
132#endif /* CONFIG_PARAVIRT_SPINLOCKS */
133
134static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 180static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
135{ 181{
136 while (arch_spin_is_locked(lock)) 182 while (arch_spin_is_locked(lock))
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index ad0ad07fc006..4f1bea19945b 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -1,13 +1,17 @@
1#ifndef _ASM_X86_SPINLOCK_TYPES_H 1#ifndef _ASM_X86_SPINLOCK_TYPES_H
2#define _ASM_X86_SPINLOCK_TYPES_H 2#define _ASM_X86_SPINLOCK_TYPES_H
3 3
4#ifndef __LINUX_SPINLOCK_TYPES_H
5# error "please don't include this file directly"
6#endif
7
8#include <linux/types.h> 4#include <linux/types.h>
9 5
10#if (CONFIG_NR_CPUS < 256) 6#ifdef CONFIG_PARAVIRT_SPINLOCKS
7#define __TICKET_LOCK_INC 2
8#define TICKET_SLOWPATH_FLAG ((__ticket_t)1)
9#else
10#define __TICKET_LOCK_INC 1
11#define TICKET_SLOWPATH_FLAG ((__ticket_t)0)
12#endif
13
14#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC))
11typedef u8 __ticket_t; 15typedef u8 __ticket_t;
12typedef u16 __ticketpair_t; 16typedef u16 __ticketpair_t;
13#else 17#else
@@ -15,6 +19,8 @@ typedef u16 __ticket_t;
15typedef u32 __ticketpair_t; 19typedef u32 __ticketpair_t;
16#endif 20#endif
17 21
22#define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC)
23
18#define TICKET_SHIFT (sizeof(__ticket_t) * 8) 24#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
19 25
20typedef struct arch_spinlock { 26typedef struct arch_spinlock {
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index ca842f2769ef..608a79d5a466 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -7,6 +7,7 @@ enum ipi_vector {
7 XEN_CALL_FUNCTION_SINGLE_VECTOR, 7 XEN_CALL_FUNCTION_SINGLE_VECTOR,
8 XEN_SPIN_UNLOCK_VECTOR, 8 XEN_SPIN_UNLOCK_VECTOR,
9 XEN_IRQ_WORK_VECTOR, 9 XEN_IRQ_WORK_VECTOR,
10 XEN_NMI_VECTOR,
10 11
11 XEN_NR_IPIS, 12 XEN_NR_IPIS,
12}; 13};
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 06fdbd987e97..94dc8ca434e0 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -23,6 +23,7 @@
23#define KVM_FEATURE_ASYNC_PF 4 23#define KVM_FEATURE_ASYNC_PF 4
24#define KVM_FEATURE_STEAL_TIME 5 24#define KVM_FEATURE_STEAL_TIME 5
25#define KVM_FEATURE_PV_EOI 6 25#define KVM_FEATURE_PV_EOI 6
26#define KVM_FEATURE_PV_UNHALT 7
26 27
27/* The last 8 bits are used to indicate how to interpret the flags field 28/* The last 8 bits are used to indicate how to interpret the flags field
28 * in pvclock structure. If no bits are set, all flags are ignored. 29 * in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a96d32cc55b8..56e2fa4a8b13 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,6 +34,7 @@
34#include <linux/sched.h> 34#include <linux/sched.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/kprobes.h> 36#include <linux/kprobes.h>
37#include <linux/debugfs.h>
37#include <asm/timer.h> 38#include <asm/timer.h>
38#include <asm/cpu.h> 39#include <asm/cpu.h>
39#include <asm/traps.h> 40#include <asm/traps.h>
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
419 WARN_ON(kvm_register_clock("primary cpu clock")); 420 WARN_ON(kvm_register_clock("primary cpu clock"));
420 kvm_guest_cpu_init(); 421 kvm_guest_cpu_init();
421 native_smp_prepare_boot_cpu(); 422 native_smp_prepare_boot_cpu();
423 kvm_spinlock_init();
422} 424}
423 425
424static void kvm_guest_cpu_online(void *dummy) 426static void kvm_guest_cpu_online(void *dummy)
@@ -523,3 +525,263 @@ static __init int activate_jump_labels(void)
523 return 0; 525 return 0;
524} 526}
525arch_initcall(activate_jump_labels); 527arch_initcall(activate_jump_labels);
528
529#ifdef CONFIG_PARAVIRT_SPINLOCKS
530
531/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
532static void kvm_kick_cpu(int cpu)
533{
534 int apicid;
535 unsigned long flags = 0;
536
537 apicid = per_cpu(x86_cpu_to_apicid, cpu);
538 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
539}
540
541enum kvm_contention_stat {
542 TAKEN_SLOW,
543 TAKEN_SLOW_PICKUP,
544 RELEASED_SLOW,
545 RELEASED_SLOW_KICKED,
546 NR_CONTENTION_STATS
547};
548
549#ifdef CONFIG_KVM_DEBUG_FS
550#define HISTO_BUCKETS 30
551
552static struct kvm_spinlock_stats
553{
554 u32 contention_stats[NR_CONTENTION_STATS];
555 u32 histo_spin_blocked[HISTO_BUCKETS+1];
556 u64 time_blocked;
557} spinlock_stats;
558
559static u8 zero_stats;
560
561static inline void check_zero(void)
562{
563 u8 ret;
564 u8 old;
565
566 old = ACCESS_ONCE(zero_stats);
567 if (unlikely(old)) {
568 ret = cmpxchg(&zero_stats, old, 0);
569 /* This ensures only one fellow resets the stat */
570 if (ret == old)
571 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
572 }
573}
574
575static inline void add_stats(enum kvm_contention_stat var, u32 val)
576{
577 check_zero();
578 spinlock_stats.contention_stats[var] += val;
579}
580
581
582static inline u64 spin_time_start(void)
583{
584 return sched_clock();
585}
586
587static void __spin_time_accum(u64 delta, u32 *array)
588{
589 unsigned index;
590
591 index = ilog2(delta);
592 check_zero();
593
594 if (index < HISTO_BUCKETS)
595 array[index]++;
596 else
597 array[HISTO_BUCKETS]++;
598}
599
600static inline void spin_time_accum_blocked(u64 start)
601{
602 u32 delta;
603
604 delta = sched_clock() - start;
605 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
606 spinlock_stats.time_blocked += delta;
607}
608
609static struct dentry *d_spin_debug;
610static struct dentry *d_kvm_debug;
611
612struct dentry *kvm_init_debugfs(void)
613{
614 d_kvm_debug = debugfs_create_dir("kvm", NULL);
615 if (!d_kvm_debug)
616 printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
617
618 return d_kvm_debug;
619}
620
621static int __init kvm_spinlock_debugfs(void)
622{
623 struct dentry *d_kvm;
624
625 d_kvm = kvm_init_debugfs();
626 if (d_kvm == NULL)
627 return -ENOMEM;
628
629 d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
630
631 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
632
633 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
634 &spinlock_stats.contention_stats[TAKEN_SLOW]);
635 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
636 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
637
638 debugfs_create_u32("released_slow", 0444, d_spin_debug,
639 &spinlock_stats.contention_stats[RELEASED_SLOW]);
640 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
641 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
642
643 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
644 &spinlock_stats.time_blocked);
645
646 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
647 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
648
649 return 0;
650}
651fs_initcall(kvm_spinlock_debugfs);
652#else /* !CONFIG_KVM_DEBUG_FS */
653static inline void add_stats(enum kvm_contention_stat var, u32 val)
654{
655}
656
657static inline u64 spin_time_start(void)
658{
659 return 0;
660}
661
662static inline void spin_time_accum_blocked(u64 start)
663{
664}
665#endif /* CONFIG_KVM_DEBUG_FS */
666
667struct kvm_lock_waiting {
668 struct arch_spinlock *lock;
669 __ticket_t want;
670};
671
672/* cpus 'waiting' on a spinlock to become available */
673static cpumask_t waiting_cpus;
674
675/* Track spinlock on which a cpu is waiting */
676static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
677
678static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
679{
680 struct kvm_lock_waiting *w;
681 int cpu;
682 u64 start;
683 unsigned long flags;
684
685 if (in_nmi())
686 return;
687
688 w = &__get_cpu_var(klock_waiting);
689 cpu = smp_processor_id();
690 start = spin_time_start();
691
692 /*
693 * Make sure an interrupt handler can't upset things in a
694 * partially setup state.
695 */
696 local_irq_save(flags);
697
698 /*
699 * The ordering protocol on this is that the "lock" pointer
700 * may only be set non-NULL if the "want" ticket is correct.
701 * If we're updating "want", we must first clear "lock".
702 */
703 w->lock = NULL;
704 smp_wmb();
705 w->want = want;
706 smp_wmb();
707 w->lock = lock;
708
709 add_stats(TAKEN_SLOW, 1);
710
711 /*
712 * This uses set_bit, which is atomic but we should not rely on its
713 * reordering gurantees. So barrier is needed after this call.
714 */
715 cpumask_set_cpu(cpu, &waiting_cpus);
716
717 barrier();
718
719 /*
720 * Mark entry to slowpath before doing the pickup test to make
721 * sure we don't deadlock with an unlocker.
722 */
723 __ticket_enter_slowpath(lock);
724
725 /*
726 * check again make sure it didn't become free while
727 * we weren't looking.
728 */
729 if (ACCESS_ONCE(lock->tickets.head) == want) {
730 add_stats(TAKEN_SLOW_PICKUP, 1);
731 goto out;
732 }
733
734 /*
735 * halt until it's our turn and kicked. Note that we do safe halt
736 * for irq enabled case to avoid hang when lock info is overwritten
737 * in irq spinlock slowpath and no spurious interrupt occur to save us.
738 */
739 if (arch_irqs_disabled_flags(flags))
740 halt();
741 else
742 safe_halt();
743
744out:
745 cpumask_clear_cpu(cpu, &waiting_cpus);
746 w->lock = NULL;
747 local_irq_restore(flags);
748 spin_time_accum_blocked(start);
749}
750PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
751
752/* Kick vcpu waiting on @lock->head to reach value @ticket */
753static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
754{
755 int cpu;
756
757 add_stats(RELEASED_SLOW, 1);
758 for_each_cpu(cpu, &waiting_cpus) {
759 const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
760 if (ACCESS_ONCE(w->lock) == lock &&
761 ACCESS_ONCE(w->want) == ticket) {
762 add_stats(RELEASED_SLOW_KICKED, 1);
763 kvm_kick_cpu(cpu);
764 break;
765 }
766 }
767}
768
769/*
770 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
771 */
772void __init kvm_spinlock_init(void)
773{
774 if (!kvm_para_available())
775 return;
776 /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
777 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
778 return;
779
780 printk(KERN_INFO "KVM setup paravirtual spinlock\n");
781
782 static_key_slow_inc(&paravirt_ticketlocks_enabled);
783
784 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
785 pv_lock_ops.unlock_kick = kvm_unlock_kick;
786}
787#endif /* CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 676b8c77a976..bbb6c7316341 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -4,25 +4,17 @@
4 */ 4 */
5#include <linux/spinlock.h> 5#include <linux/spinlock.h>
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/jump_label.h>
7 8
8#include <asm/paravirt.h> 9#include <asm/paravirt.h>
9 10
10static inline void
11default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
12{
13 arch_spin_lock(lock);
14}
15
16struct pv_lock_ops pv_lock_ops = { 11struct pv_lock_ops pv_lock_ops = {
17#ifdef CONFIG_SMP 12#ifdef CONFIG_SMP
18 .spin_is_locked = __ticket_spin_is_locked, 13 .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
19 .spin_is_contended = __ticket_spin_is_contended, 14 .unlock_kick = paravirt_nop,
20
21 .spin_lock = __ticket_spin_lock,
22 .spin_lock_flags = default_spin_lock_flags,
23 .spin_trylock = __ticket_spin_trylock,
24 .spin_unlock = __ticket_spin_unlock,
25#endif 15#endif
26}; 16};
27EXPORT_SYMBOL(pv_lock_ops); 17EXPORT_SYMBOL(pv_lock_ops);
28 18
19struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE;
20EXPORT_SYMBOL(paravirt_ticketlocks_enabled);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 193097ef3d7d..15939e872db2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -427,8 +427,7 @@ static void __init xen_init_cpuid_mask(void)
427 427
428 if (!xen_initial_domain()) 428 if (!xen_initial_domain())
429 cpuid_leaf1_edx_mask &= 429 cpuid_leaf1_edx_mask &=
430 ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ 430 ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */
431 (1 << X86_FEATURE_ACPI)); /* disable ACPI */
432 431
433 cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); 432 cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32));
434 433
@@ -735,8 +734,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
735 addr = (unsigned long)xen_int3; 734 addr = (unsigned long)xen_int3;
736 else if (addr == (unsigned long)stack_segment) 735 else if (addr == (unsigned long)stack_segment)
737 addr = (unsigned long)xen_stack_segment; 736 addr = (unsigned long)xen_stack_segment;
738 else if (addr == (unsigned long)double_fault || 737 else if (addr == (unsigned long)double_fault) {
739 addr == (unsigned long)nmi) {
740 /* Don't need to handle these */ 738 /* Don't need to handle these */
741 return 0; 739 return 0;
742#ifdef CONFIG_X86_MCE 740#ifdef CONFIG_X86_MCE
@@ -747,7 +745,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
747 */ 745 */
748 ; 746 ;
749#endif 747#endif
750 } else { 748 } else if (addr == (unsigned long)nmi)
749 /*
750 * Use the native version as well.
751 */
752 ;
753 else {
751 /* Some other trap using IST? */ 754 /* Some other trap using IST? */
752 if (WARN_ON(val->ist != 0)) 755 if (WARN_ON(val->ist != 0))
753 return 0; 756 return 0;
@@ -1710,6 +1713,8 @@ static void __init xen_hvm_guest_init(void)
1710 1713
1711 xen_hvm_init_shared_info(); 1714 xen_hvm_init_shared_info();
1712 1715
1716 xen_panic_handler_init();
1717
1713 if (xen_feature(XENFEAT_hvm_callback_vector)) 1718 if (xen_feature(XENFEAT_hvm_callback_vector))
1714 xen_have_vector_callback = 1; 1719 xen_have_vector_callback = 1;
1715 xen_hvm_smp_init(); 1720 xen_hvm_smp_init();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 01a4dc015ae1..0da7f863056f 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -47,23 +47,18 @@ static void xen_restore_fl(unsigned long flags)
47 /* convert from IF type flag */ 47 /* convert from IF type flag */
48 flags = !(flags & X86_EFLAGS_IF); 48 flags = !(flags & X86_EFLAGS_IF);
49 49
50 /* There's a one instruction preempt window here. We need to 50 /* See xen_irq_enable() for why preemption must be disabled. */
51 make sure we're don't switch CPUs between getting the vcpu
52 pointer and updating the mask. */
53 preempt_disable(); 51 preempt_disable();
54 vcpu = this_cpu_read(xen_vcpu); 52 vcpu = this_cpu_read(xen_vcpu);
55 vcpu->evtchn_upcall_mask = flags; 53 vcpu->evtchn_upcall_mask = flags;
56 preempt_enable_no_resched();
57
58 /* Doesn't matter if we get preempted here, because any
59 pending event will get dealt with anyway. */
60 54
61 if (flags == 0) { 55 if (flags == 0) {
62 preempt_check_resched();
63 barrier(); /* unmask then check (avoid races) */ 56 barrier(); /* unmask then check (avoid races) */
64 if (unlikely(vcpu->evtchn_upcall_pending)) 57 if (unlikely(vcpu->evtchn_upcall_pending))
65 xen_force_evtchn_callback(); 58 xen_force_evtchn_callback();
66 } 59 preempt_enable();
60 } else
61 preempt_enable_no_resched();
67} 62}
68PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); 63PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
69 64
@@ -82,10 +77,12 @@ static void xen_irq_enable(void)
82{ 77{
83 struct vcpu_info *vcpu; 78 struct vcpu_info *vcpu;
84 79
85 /* We don't need to worry about being preempted here, since 80 /*
86 either a) interrupts are disabled, so no preemption, or b) 81 * We may be preempted as soon as vcpu->evtchn_upcall_mask is
87 the caller is confused and is trying to re-enable interrupts 82 * cleared, so disable preemption to ensure we check for
88 on an indeterminate processor. */ 83 * events on the VCPU we are still running on.
84 */
85 preempt_disable();
89 86
90 vcpu = this_cpu_read(xen_vcpu); 87 vcpu = this_cpu_read(xen_vcpu);
91 vcpu->evtchn_upcall_mask = 0; 88 vcpu->evtchn_upcall_mask = 0;
@@ -96,6 +93,8 @@ static void xen_irq_enable(void)
96 barrier(); /* unmask then check (avoid races) */ 93 barrier(); /* unmask then check (avoid races) */
97 if (unlikely(vcpu->evtchn_upcall_pending)) 94 if (unlikely(vcpu->evtchn_upcall_pending))
98 xen_force_evtchn_callback(); 95 xen_force_evtchn_callback();
96
97 preempt_enable();
99} 98}
100PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); 99PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
101 100
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95fb2aa5927e..8b901e8d782d 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,6 +161,7 @@
161#include <asm/xen/page.h> 161#include <asm/xen/page.h>
162#include <asm/xen/hypercall.h> 162#include <asm/xen/hypercall.h>
163#include <asm/xen/hypervisor.h> 163#include <asm/xen/hypervisor.h>
164#include <xen/balloon.h>
164#include <xen/grant_table.h> 165#include <xen/grant_table.h>
165 166
166#include "multicalls.h" 167#include "multicalls.h"
@@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page,
967 if (kmap_op != NULL) { 968 if (kmap_op != NULL) {
968 if (!PageHighMem(page)) { 969 if (!PageHighMem(page)) {
969 struct multicall_space mcs; 970 struct multicall_space mcs;
970 struct gnttab_unmap_grant_ref *unmap_op; 971 struct gnttab_unmap_and_replace *unmap_op;
972 struct page *scratch_page = get_balloon_scratch_page();
973 unsigned long scratch_page_address = (unsigned long)
974 __va(page_to_pfn(scratch_page) << PAGE_SHIFT);
971 975
972 /* 976 /*
973 * It might be that we queued all the m2p grant table 977 * It might be that we queued all the m2p grant table
@@ -986,25 +990,31 @@ int m2p_remove_override(struct page *page,
986 printk(KERN_WARNING "m2p_remove_override: " 990 printk(KERN_WARNING "m2p_remove_override: "
987 "pfn %lx mfn %lx, failed to modify kernel mappings", 991 "pfn %lx mfn %lx, failed to modify kernel mappings",
988 pfn, mfn); 992 pfn, mfn);
993 put_balloon_scratch_page();
989 return -1; 994 return -1;
990 } 995 }
991 996
992 mcs = xen_mc_entry( 997 xen_mc_batch();
993 sizeof(struct gnttab_unmap_grant_ref)); 998
999 mcs = __xen_mc_entry(
1000 sizeof(struct gnttab_unmap_and_replace));
994 unmap_op = mcs.args; 1001 unmap_op = mcs.args;
995 unmap_op->host_addr = kmap_op->host_addr; 1002 unmap_op->host_addr = kmap_op->host_addr;
1003 unmap_op->new_addr = scratch_page_address;
996 unmap_op->handle = kmap_op->handle; 1004 unmap_op->handle = kmap_op->handle;
997 unmap_op->dev_bus_addr = 0;
998 1005
999 MULTI_grant_table_op(mcs.mc, 1006 MULTI_grant_table_op(mcs.mc,
1000 GNTTABOP_unmap_grant_ref, unmap_op, 1); 1007 GNTTABOP_unmap_and_replace, unmap_op, 1);
1008
1009 mcs = __xen_mc_entry(0);
1010 MULTI_update_va_mapping(mcs.mc, scratch_page_address,
1011 pfn_pte(page_to_pfn(scratch_page),
1012 PAGE_KERNEL_RO), 0);
1001 1013
1002 xen_mc_issue(PARAVIRT_LAZY_MMU); 1014 xen_mc_issue(PARAVIRT_LAZY_MMU);
1003 1015
1004 set_pte_at(&init_mm, address, ptep,
1005 pfn_pte(pfn, PAGE_KERNEL));
1006 __flush_tlb_single(address);
1007 kmap_op->host_addr = 0; 1016 kmap_op->host_addr = 0;
1017 put_balloon_scratch_page();
1008 } 1018 }
1009 } 1019 }
1010 1020
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8f3eea6b80c5..09f3059cb00b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -33,6 +33,9 @@
33/* These are code, but not functions. Defined in entry.S */ 33/* These are code, but not functions. Defined in entry.S */
34extern const char xen_hypervisor_callback[]; 34extern const char xen_hypervisor_callback[];
35extern const char xen_failsafe_callback[]; 35extern const char xen_failsafe_callback[];
36#ifdef CONFIG_X86_64
37extern const char nmi[];
38#endif
36extern void xen_sysenter_target(void); 39extern void xen_sysenter_target(void);
37extern void xen_syscall_target(void); 40extern void xen_syscall_target(void);
38extern void xen_syscall32_target(void); 41extern void xen_syscall32_target(void);
@@ -215,13 +218,19 @@ static void __init xen_set_identity_and_release_chunk(
215 unsigned long pfn; 218 unsigned long pfn;
216 219
217 /* 220 /*
218 * If the PFNs are currently mapped, the VA mapping also needs 221 * If the PFNs are currently mapped, clear the mappings
219 * to be updated to be 1:1. 222 * (except for the ISA region which must be 1:1 mapped) to
223 * release the refcounts (in Xen) on the original frames.
220 */ 224 */
221 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) 225 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
226 pte_t pte = __pte_ma(0);
227
228 if (pfn < PFN_UP(ISA_END_ADDRESS))
229 pte = mfn_pte(pfn, PAGE_KERNEL_IO);
230
222 (void)HYPERVISOR_update_va_mapping( 231 (void)HYPERVISOR_update_va_mapping(
223 (unsigned long)__va(pfn << PAGE_SHIFT), 232 (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
224 mfn_pte(pfn, PAGE_KERNEL_IO), 0); 233 }
225 234
226 if (start_pfn < nr_pages) 235 if (start_pfn < nr_pages)
227 *released += xen_release_chunk( 236 *released += xen_release_chunk(
@@ -547,7 +556,13 @@ void xen_enable_syscall(void)
547 } 556 }
548#endif /* CONFIG_X86_64 */ 557#endif /* CONFIG_X86_64 */
549} 558}
550 559void __cpuinit xen_enable_nmi(void)
560{
561#ifdef CONFIG_X86_64
562 if (register_callback(CALLBACKTYPE_nmi, nmi))
563 BUG();
564#endif
565}
551void __init xen_arch_setup(void) 566void __init xen_arch_setup(void)
552{ 567{
553 xen_panic_handler_init(); 568 xen_panic_handler_init();
@@ -565,7 +580,7 @@ void __init xen_arch_setup(void)
565 580
566 xen_enable_sysenter(); 581 xen_enable_sysenter();
567 xen_enable_syscall(); 582 xen_enable_syscall();
568 583 xen_enable_nmi();
569#ifdef CONFIG_ACPI 584#ifdef CONFIG_ACPI
570 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 585 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
571 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 586 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b81c88e51daa..9235842cd76a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -279,6 +279,7 @@ static void __init xen_smp_prepare_boot_cpu(void)
279 279
280 xen_filter_cpu_maps(); 280 xen_filter_cpu_maps();
281 xen_setup_vcpu_info_placement(); 281 xen_setup_vcpu_info_placement();
282 xen_init_spinlocks();
282} 283}
283 284
284static void __init xen_smp_prepare_cpus(unsigned int max_cpus) 285static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
@@ -572,6 +573,12 @@ static inline int xen_map_vector(int vector)
572 case IRQ_WORK_VECTOR: 573 case IRQ_WORK_VECTOR:
573 xen_vector = XEN_IRQ_WORK_VECTOR; 574 xen_vector = XEN_IRQ_WORK_VECTOR;
574 break; 575 break;
576#ifdef CONFIG_X86_64
577 case NMI_VECTOR:
578 case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */
579 xen_vector = XEN_NMI_VECTOR;
580 break;
581#endif
575 default: 582 default:
576 xen_vector = -1; 583 xen_vector = -1;
577 printk(KERN_ERR "xen: vector 0x%x is not implemented\n", 584 printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
@@ -680,7 +687,6 @@ void __init xen_smp_init(void)
680{ 687{
681 smp_ops = xen_smp_ops; 688 smp_ops = xen_smp_ops;
682 xen_fill_possible_map(); 689 xen_fill_possible_map();
683 xen_init_spinlocks();
684} 690}
685 691
686static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) 692static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index cf3caee356b3..0438b9324a72 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,45 +17,44 @@
17#include "xen-ops.h" 17#include "xen-ops.h"
18#include "debugfs.h" 18#include "debugfs.h"
19 19
20#ifdef CONFIG_XEN_DEBUG_FS 20enum xen_contention_stat {
21static struct xen_spinlock_stats 21 TAKEN_SLOW,
22{ 22 TAKEN_SLOW_PICKUP,
23 u64 taken; 23 TAKEN_SLOW_SPURIOUS,
24 u32 taken_slow; 24 RELEASED_SLOW,
25 u32 taken_slow_nested; 25 RELEASED_SLOW_KICKED,
26 u32 taken_slow_pickup; 26 NR_CONTENTION_STATS
27 u32 taken_slow_spurious; 27};
28 u32 taken_slow_irqenable;
29 28
30 u64 released;
31 u32 released_slow;
32 u32 released_slow_kicked;
33 29
30#ifdef CONFIG_XEN_DEBUG_FS
34#define HISTO_BUCKETS 30 31#define HISTO_BUCKETS 30
35 u32 histo_spin_total[HISTO_BUCKETS+1]; 32static struct xen_spinlock_stats
36 u32 histo_spin_spinning[HISTO_BUCKETS+1]; 33{
34 u32 contention_stats[NR_CONTENTION_STATS];
37 u32 histo_spin_blocked[HISTO_BUCKETS+1]; 35 u32 histo_spin_blocked[HISTO_BUCKETS+1];
38
39 u64 time_total;
40 u64 time_spinning;
41 u64 time_blocked; 36 u64 time_blocked;
42} spinlock_stats; 37} spinlock_stats;
43 38
44static u8 zero_stats; 39static u8 zero_stats;
45 40
46static unsigned lock_timeout = 1 << 10;
47#define TIMEOUT lock_timeout
48
49static inline void check_zero(void) 41static inline void check_zero(void)
50{ 42{
51 if (unlikely(zero_stats)) { 43 u8 ret;
52 memset(&spinlock_stats, 0, sizeof(spinlock_stats)); 44 u8 old = ACCESS_ONCE(zero_stats);
53 zero_stats = 0; 45 if (unlikely(old)) {
46 ret = cmpxchg(&zero_stats, old, 0);
47 /* This ensures only one fellow resets the stat */
48 if (ret == old)
49 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
54 } 50 }
55} 51}
56 52
57#define ADD_STATS(elem, val) \ 53static inline void add_stats(enum xen_contention_stat var, u32 val)
58 do { check_zero(); spinlock_stats.elem += (val); } while(0) 54{
55 check_zero();
56 spinlock_stats.contention_stats[var] += val;
57}
59 58
60static inline u64 spin_time_start(void) 59static inline u64 spin_time_start(void)
61{ 60{
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array)
74 array[HISTO_BUCKETS]++; 73 array[HISTO_BUCKETS]++;
75} 74}
76 75
77static inline void spin_time_accum_spinning(u64 start)
78{
79 u32 delta = xen_clocksource_read() - start;
80
81 __spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
82 spinlock_stats.time_spinning += delta;
83}
84
85static inline void spin_time_accum_total(u64 start)
86{
87 u32 delta = xen_clocksource_read() - start;
88
89 __spin_time_accum(delta, spinlock_stats.histo_spin_total);
90 spinlock_stats.time_total += delta;
91}
92
93static inline void spin_time_accum_blocked(u64 start) 76static inline void spin_time_accum_blocked(u64 start)
94{ 77{
95 u32 delta = xen_clocksource_read() - start; 78 u32 delta = xen_clocksource_read() - start;
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start)
99} 82}
100#else /* !CONFIG_XEN_DEBUG_FS */ 83#else /* !CONFIG_XEN_DEBUG_FS */
101#define TIMEOUT (1 << 10) 84#define TIMEOUT (1 << 10)
102#define ADD_STATS(elem, val) do { (void)(val); } while(0) 85static inline void add_stats(enum xen_contention_stat var, u32 val)
86{
87}
103 88
104static inline u64 spin_time_start(void) 89static inline u64 spin_time_start(void)
105{ 90{
106 return 0; 91 return 0;
107} 92}
108 93
109static inline void spin_time_accum_total(u64 start)
110{
111}
112static inline void spin_time_accum_spinning(u64 start)
113{
114}
115static inline void spin_time_accum_blocked(u64 start) 94static inline void spin_time_accum_blocked(u64 start)
116{ 95{
117} 96}
@@ -134,227 +113,123 @@ typedef u16 xen_spinners_t;
134 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); 113 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
135#endif 114#endif
136 115
137struct xen_spinlock { 116struct xen_lock_waiting {
138 unsigned char lock; /* 0 -> free; 1 -> locked */ 117 struct arch_spinlock *lock;
139 xen_spinners_t spinners; /* count of waiting cpus */ 118 __ticket_t want;
140}; 119};
141 120
142static int xen_spin_is_locked(struct arch_spinlock *lock)
143{
144 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
145
146 return xl->lock != 0;
147}
148
149static int xen_spin_is_contended(struct arch_spinlock *lock)
150{
151 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
152
153 /* Not strictly true; this is only the count of contended
154 lock-takers entering the slow path. */
155 return xl->spinners != 0;
156}
157
158static int xen_spin_trylock(struct arch_spinlock *lock)
159{
160 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
161 u8 old = 1;
162
163 asm("xchgb %b0,%1"
164 : "+q" (old), "+m" (xl->lock) : : "memory");
165
166 return old == 0;
167}
168
169static DEFINE_PER_CPU(char *, irq_name);
170static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 121static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
171static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); 122static DEFINE_PER_CPU(char *, irq_name);
172 123static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
173/* 124static cpumask_t waiting_cpus;
174 * Mark a cpu as interested in a lock. Returns the CPU's previous
175 * lock of interest, in case we got preempted by an interrupt.
176 */
177static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
178{
179 struct xen_spinlock *prev;
180
181 prev = __this_cpu_read(lock_spinners);
182 __this_cpu_write(lock_spinners, xl);
183
184 wmb(); /* set lock of interest before count */
185
186 inc_spinners(xl);
187
188 return prev;
189}
190
191/*
192 * Mark a cpu as no longer interested in a lock. Restores previous
193 * lock of interest (NULL for none).
194 */
195static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
196{
197 dec_spinners(xl);
198 wmb(); /* decrement count before restoring lock */
199 __this_cpu_write(lock_spinners, prev);
200}
201 125
202static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) 126static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
203{ 127{
204 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
205 struct xen_spinlock *prev;
206 int irq = __this_cpu_read(lock_kicker_irq); 128 int irq = __this_cpu_read(lock_kicker_irq);
207 int ret; 129 struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
130 int cpu = smp_processor_id();
208 u64 start; 131 u64 start;
132 unsigned long flags;
209 133
210 /* If kicker interrupts not initialized yet, just spin */ 134 /* If kicker interrupts not initialized yet, just spin */
211 if (irq == -1) 135 if (irq == -1)
212 return 0; 136 return;
213 137
214 start = spin_time_start(); 138 start = spin_time_start();
215 139
216 /* announce we're spinning */ 140 /*
217 prev = spinning_lock(xl); 141 * Make sure an interrupt handler can't upset things in a
142 * partially setup state.
143 */
144 local_irq_save(flags);
145 /*
146 * We don't really care if we're overwriting some other
147 * (lock,want) pair, as that would mean that we're currently
148 * in an interrupt context, and the outer context had
149 * interrupts enabled. That has already kicked the VCPU out
150 * of xen_poll_irq(), so it will just return spuriously and
151 * retry with newly setup (lock,want).
152 *
153 * The ordering protocol on this is that the "lock" pointer
154 * may only be set non-NULL if the "want" ticket is correct.
155 * If we're updating "want", we must first clear "lock".
156 */
157 w->lock = NULL;
158 smp_wmb();
159 w->want = want;
160 smp_wmb();
161 w->lock = lock;
218 162
219 ADD_STATS(taken_slow, 1); 163 /* This uses set_bit, which atomic and therefore a barrier */
220 ADD_STATS(taken_slow_nested, prev != NULL); 164 cpumask_set_cpu(cpu, &waiting_cpus);
165 add_stats(TAKEN_SLOW, 1);
221 166
222 do { 167 /* clear pending */
223 unsigned long flags; 168 xen_clear_irq_pending(irq);
224 169
225 /* clear pending */ 170 /* Only check lock once pending cleared */
226 xen_clear_irq_pending(irq); 171 barrier();
227 172
228 /* check again make sure it didn't become free while 173 /*
229 we weren't looking */ 174 * Mark entry to slowpath before doing the pickup test to make
230 ret = xen_spin_trylock(lock); 175 * sure we don't deadlock with an unlocker.
231 if (ret) { 176 */
232 ADD_STATS(taken_slow_pickup, 1); 177 __ticket_enter_slowpath(lock);
233 178
234 /* 179 /*
235 * If we interrupted another spinlock while it 180 * check again make sure it didn't become free while
236 * was blocking, make sure it doesn't block 181 * we weren't looking
237 * without rechecking the lock. 182 */
238 */ 183 if (ACCESS_ONCE(lock->tickets.head) == want) {
239 if (prev != NULL) 184 add_stats(TAKEN_SLOW_PICKUP, 1);
240 xen_set_irq_pending(irq); 185 goto out;
241 goto out; 186 }
242 }
243 187
244 flags = arch_local_save_flags(); 188 /* Allow interrupts while blocked */
245 if (irq_enable) { 189 local_irq_restore(flags);
246 ADD_STATS(taken_slow_irqenable, 1);
247 raw_local_irq_enable();
248 }
249 190
250 /* 191 /*
251 * Block until irq becomes pending. If we're 192 * If an interrupt happens here, it will leave the wakeup irq
252 * interrupted at this point (after the trylock but 193 * pending, which will cause xen_poll_irq() to return
253 * before entering the block), then the nested lock 194 * immediately.
254 * handler guarantees that the irq will be left 195 */
255 * pending if there's any chance the lock became free;
256 * xen_poll_irq() returns immediately if the irq is
257 * pending.
258 */
259 xen_poll_irq(irq);
260 196
261 raw_local_irq_restore(flags); 197 /* Block until irq becomes pending (or perhaps a spurious wakeup) */
198 xen_poll_irq(irq);
199 add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
262 200
263 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); 201 local_irq_save(flags);
264 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
265 202
266 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 203 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
267
268out: 204out:
269 unspinning_lock(xl, prev); 205 cpumask_clear_cpu(cpu, &waiting_cpus);
270 spin_time_accum_blocked(start); 206 w->lock = NULL;
271
272 return ret;
273}
274
275static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
276{
277 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
278 unsigned timeout;
279 u8 oldval;
280 u64 start_spin;
281
282 ADD_STATS(taken, 1);
283
284 start_spin = spin_time_start();
285
286 do {
287 u64 start_spin_fast = spin_time_start();
288
289 timeout = TIMEOUT;
290
291 asm("1: xchgb %1,%0\n"
292 " testb %1,%1\n"
293 " jz 3f\n"
294 "2: rep;nop\n"
295 " cmpb $0,%0\n"
296 " je 1b\n"
297 " dec %2\n"
298 " jnz 2b\n"
299 "3:\n"
300 : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
301 : "1" (1)
302 : "memory");
303 207
304 spin_time_accum_spinning(start_spin_fast); 208 local_irq_restore(flags);
305 209
306 } while (unlikely(oldval != 0 && 210 spin_time_accum_blocked(start);
307 (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
308
309 spin_time_accum_total(start_spin);
310}
311
312static void xen_spin_lock(struct arch_spinlock *lock)
313{
314 __xen_spin_lock(lock, false);
315}
316
317static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
318{
319 __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
320} 211}
212PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning);
321 213
322static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) 214static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
323{ 215{
324 int cpu; 216 int cpu;
325 217
326 ADD_STATS(released_slow, 1); 218 add_stats(RELEASED_SLOW, 1);
219
220 for_each_cpu(cpu, &waiting_cpus) {
221 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
327 222
328 for_each_online_cpu(cpu) { 223 /* Make sure we read lock before want */
329 /* XXX should mix up next cpu selection */ 224 if (ACCESS_ONCE(w->lock) == lock &&
330 if (per_cpu(lock_spinners, cpu) == xl) { 225 ACCESS_ONCE(w->want) == next) {
331 ADD_STATS(released_slow_kicked, 1); 226 add_stats(RELEASED_SLOW_KICKED, 1);
332 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 227 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
228 break;
333 } 229 }
334 } 230 }
335} 231}
336 232
337static void xen_spin_unlock(struct arch_spinlock *lock)
338{
339 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
340
341 ADD_STATS(released, 1);
342
343 smp_wmb(); /* make sure no writes get moved after unlock */
344 xl->lock = 0; /* release lock */
345
346 /*
347 * Make sure unlock happens before checking for waiting
348 * spinners. We need a strong barrier to enforce the
349 * write-read ordering to different memory locations, as the
350 * CPU makes no implied guarantees about their ordering.
351 */
352 mb();
353
354 if (unlikely(xl->spinners))
355 xen_spin_unlock_slow(xl);
356}
357
358static irqreturn_t dummy_handler(int irq, void *dev_id) 233static irqreturn_t dummy_handler(int irq, void *dev_id)
359{ 234{
360 BUG(); 235 BUG();
@@ -408,6 +283,8 @@ void xen_uninit_lock_cpu(int cpu)
408 per_cpu(irq_name, cpu) = NULL; 283 per_cpu(irq_name, cpu) = NULL;
409} 284}
410 285
286static bool xen_pvspin __initdata = true;
287
411void __init xen_init_spinlocks(void) 288void __init xen_init_spinlocks(void)
412{ 289{
413 /* 290 /*
@@ -417,15 +294,23 @@ void __init xen_init_spinlocks(void)
417 if (xen_hvm_domain()) 294 if (xen_hvm_domain())
418 return; 295 return;
419 296
420 BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); 297 if (!xen_pvspin) {
298 printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
299 return;
300 }
421 301
422 pv_lock_ops.spin_is_locked = xen_spin_is_locked; 302 static_key_slow_inc(&paravirt_ticketlocks_enabled);
423 pv_lock_ops.spin_is_contended = xen_spin_is_contended; 303
424 pv_lock_ops.spin_lock = xen_spin_lock; 304 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
425 pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; 305 pv_lock_ops.unlock_kick = xen_unlock_kick;
426 pv_lock_ops.spin_trylock = xen_spin_trylock; 306}
427 pv_lock_ops.spin_unlock = xen_spin_unlock; 307
308static __init int xen_parse_nopvspin(char *arg)
309{
310 xen_pvspin = false;
311 return 0;
428} 312}
313early_param("xen_nopvspin", xen_parse_nopvspin);
429 314
430#ifdef CONFIG_XEN_DEBUG_FS 315#ifdef CONFIG_XEN_DEBUG_FS
431 316
@@ -442,37 +327,21 @@ static int __init xen_spinlock_debugfs(void)
442 327
443 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); 328 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
444 329
445 debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
446
447 debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
448 debugfs_create_u32("taken_slow", 0444, d_spin_debug, 330 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
449 &spinlock_stats.taken_slow); 331 &spinlock_stats.contention_stats[TAKEN_SLOW]);
450 debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
451 &spinlock_stats.taken_slow_nested);
452 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, 332 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
453 &spinlock_stats.taken_slow_pickup); 333 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
454 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, 334 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
455 &spinlock_stats.taken_slow_spurious); 335 &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
456 debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
457 &spinlock_stats.taken_slow_irqenable);
458 336
459 debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
460 debugfs_create_u32("released_slow", 0444, d_spin_debug, 337 debugfs_create_u32("released_slow", 0444, d_spin_debug,
461 &spinlock_stats.released_slow); 338 &spinlock_stats.contention_stats[RELEASED_SLOW]);
462 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, 339 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
463 &spinlock_stats.released_slow_kicked); 340 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
464 341
465 debugfs_create_u64("time_spinning", 0444, d_spin_debug,
466 &spinlock_stats.time_spinning);
467 debugfs_create_u64("time_blocked", 0444, d_spin_debug, 342 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
468 &spinlock_stats.time_blocked); 343 &spinlock_stats.time_blocked);
469 debugfs_create_u64("time_total", 0444, d_spin_debug,
470 &spinlock_stats.time_total);
471 344
472 debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
473 spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
474 debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
475 spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
476 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, 345 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
477 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); 346 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
478 347
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index dbfd56446c31..94c0c74434ea 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -91,4 +91,16 @@ config TCG_ST33_I2C
91 To compile this driver as a module, choose M here; the module will be 91 To compile this driver as a module, choose M here; the module will be
92 called tpm_stm_st33_i2c. 92 called tpm_stm_st33_i2c.
93 93
94config TCG_XEN
95 tristate "XEN TPM Interface"
96 depends on TCG_TPM && XEN
97 select XEN_XENBUS_FRONTEND
98 ---help---
99 If you want to make TPM support available to a Xen user domain,
100 say Yes and it will be accessible from within Linux. See
101 the manpages for xl, xl.conf, and docs/misc/vtpm.txt in
102 the Xen source repository for more details.
103 To compile this driver as a module, choose M here; the module
104 will be called xen-tpmfront.
105
94endif # TCG_TPM 106endif # TCG_TPM
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index a3736c97c65a..eb41ff97d0ad 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -18,3 +18,4 @@ obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
18obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o 18obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
19obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o 19obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o
20obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o 20obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o
21obj-$(CONFIG_TCG_XEN) += xen-tpmfront.o
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
new file mode 100644
index 000000000000..7a7929ba2658
--- /dev/null
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -0,0 +1,473 @@
1/*
2 * Implementation of the Xen vTPM device frontend
3 *
4 * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2,
8 * as published by the Free Software Foundation.
9 */
10#include <linux/errno.h>
11#include <linux/err.h>
12#include <linux/interrupt.h>
13#include <xen/events.h>
14#include <xen/interface/io/tpmif.h>
15#include <xen/grant_table.h>
16#include <xen/xenbus.h>
17#include <xen/page.h>
18#include "tpm.h"
19
20struct tpm_private {
21 struct tpm_chip *chip;
22 struct xenbus_device *dev;
23
24 struct vtpm_shared_page *shr;
25
26 unsigned int evtchn;
27 int ring_ref;
28 domid_t backend_id;
29};
30
31enum status_bits {
32 VTPM_STATUS_RUNNING = 0x1,
33 VTPM_STATUS_IDLE = 0x2,
34 VTPM_STATUS_RESULT = 0x4,
35 VTPM_STATUS_CANCELED = 0x8,
36};
37
38static u8 vtpm_status(struct tpm_chip *chip)
39{
40 struct tpm_private *priv = TPM_VPRIV(chip);
41 switch (priv->shr->state) {
42 case VTPM_STATE_IDLE:
43 return VTPM_STATUS_IDLE | VTPM_STATUS_CANCELED;
44 case VTPM_STATE_FINISH:
45 return VTPM_STATUS_IDLE | VTPM_STATUS_RESULT;
46 case VTPM_STATE_SUBMIT:
47 case VTPM_STATE_CANCEL: /* cancel requested, not yet canceled */
48 return VTPM_STATUS_RUNNING;
49 default:
50 return 0;
51 }
52}
53
54static bool vtpm_req_canceled(struct tpm_chip *chip, u8 status)
55{
56 return status & VTPM_STATUS_CANCELED;
57}
58
59static void vtpm_cancel(struct tpm_chip *chip)
60{
61 struct tpm_private *priv = TPM_VPRIV(chip);
62 priv->shr->state = VTPM_STATE_CANCEL;
63 wmb();
64 notify_remote_via_evtchn(priv->evtchn);
65}
66
67static unsigned int shr_data_offset(struct vtpm_shared_page *shr)
68{
69 return sizeof(*shr) + sizeof(u32) * shr->nr_extra_pages;
70}
71
72static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
73{
74 struct tpm_private *priv = TPM_VPRIV(chip);
75 struct vtpm_shared_page *shr = priv->shr;
76 unsigned int offset = shr_data_offset(shr);
77
78 u32 ordinal;
79 unsigned long duration;
80
81 if (offset > PAGE_SIZE)
82 return -EINVAL;
83
84 if (offset + count > PAGE_SIZE)
85 return -EINVAL;
86
87 /* Wait for completion of any existing command or cancellation */
88 if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, chip->vendor.timeout_c,
89 &chip->vendor.read_queue, true) < 0) {
90 vtpm_cancel(chip);
91 return -ETIME;
92 }
93
94 memcpy(offset + (u8 *)shr, buf, count);
95 shr->length = count;
96 barrier();
97 shr->state = VTPM_STATE_SUBMIT;
98 wmb();
99 notify_remote_via_evtchn(priv->evtchn);
100
101 ordinal = be32_to_cpu(((struct tpm_input_header*)buf)->ordinal);
102 duration = tpm_calc_ordinal_duration(chip, ordinal);
103
104 if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, duration,
105 &chip->vendor.read_queue, true) < 0) {
106 /* got a signal or timeout, try to cancel */
107 vtpm_cancel(chip);
108 return -ETIME;
109 }
110
111 return count;
112}
113
114static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
115{
116 struct tpm_private *priv = TPM_VPRIV(chip);
117 struct vtpm_shared_page *shr = priv->shr;
118 unsigned int offset = shr_data_offset(shr);
119 size_t length = shr->length;
120
121 if (shr->state == VTPM_STATE_IDLE)
122 return -ECANCELED;
123
124 /* In theory the wait at the end of _send makes this one unnecessary */
125 if (wait_for_tpm_stat(chip, VTPM_STATUS_RESULT, chip->vendor.timeout_c,
126 &chip->vendor.read_queue, true) < 0) {
127 vtpm_cancel(chip);
128 return -ETIME;
129 }
130
131 if (offset > PAGE_SIZE)
132 return -EIO;
133
134 if (offset + length > PAGE_SIZE)
135 length = PAGE_SIZE - offset;
136
137 if (length > count)
138 length = count;
139
140 memcpy(buf, offset + (u8 *)shr, length);
141
142 return length;
143}
144
145ssize_t tpm_show_locality(struct device *dev, struct device_attribute *attr,
146 char *buf)
147{
148 struct tpm_chip *chip = dev_get_drvdata(dev);
149 struct tpm_private *priv = TPM_VPRIV(chip);
150 u8 locality = priv->shr->locality;
151
152 return sprintf(buf, "%d\n", locality);
153}
154
155ssize_t tpm_store_locality(struct device *dev, struct device_attribute *attr,
156 const char *buf, size_t len)
157{
158 struct tpm_chip *chip = dev_get_drvdata(dev);
159 struct tpm_private *priv = TPM_VPRIV(chip);
160 u8 val;
161
162 int rv = kstrtou8(buf, 0, &val);
163 if (rv)
164 return rv;
165
166 priv->shr->locality = val;
167
168 return len;
169}
170
171static const struct file_operations vtpm_ops = {
172 .owner = THIS_MODULE,
173 .llseek = no_llseek,
174 .open = tpm_open,
175 .read = tpm_read,
176 .write = tpm_write,
177 .release = tpm_release,
178};
179
180static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
181static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
182static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
183static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
184static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
185static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
186 NULL);
187static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
188static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
189static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
190static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
191static DEVICE_ATTR(locality, S_IRUGO | S_IWUSR, tpm_show_locality,
192 tpm_store_locality);
193
194static struct attribute *vtpm_attrs[] = {
195 &dev_attr_pubek.attr,
196 &dev_attr_pcrs.attr,
197 &dev_attr_enabled.attr,
198 &dev_attr_active.attr,
199 &dev_attr_owned.attr,
200 &dev_attr_temp_deactivated.attr,
201 &dev_attr_caps.attr,
202 &dev_attr_cancel.attr,
203 &dev_attr_durations.attr,
204 &dev_attr_timeouts.attr,
205 &dev_attr_locality.attr,
206 NULL,
207};
208
209static struct attribute_group vtpm_attr_grp = {
210 .attrs = vtpm_attrs,
211};
212
213#define TPM_LONG_TIMEOUT (10 * 60 * HZ)
214
215static const struct tpm_vendor_specific tpm_vtpm = {
216 .status = vtpm_status,
217 .recv = vtpm_recv,
218 .send = vtpm_send,
219 .cancel = vtpm_cancel,
220 .req_complete_mask = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT,
221 .req_complete_val = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT,
222 .req_canceled = vtpm_req_canceled,
223 .attr_group = &vtpm_attr_grp,
224 .miscdev = {
225 .fops = &vtpm_ops,
226 },
227 .duration = {
228 TPM_LONG_TIMEOUT,
229 TPM_LONG_TIMEOUT,
230 TPM_LONG_TIMEOUT,
231 },
232};
233
234static irqreturn_t tpmif_interrupt(int dummy, void *dev_id)
235{
236 struct tpm_private *priv = dev_id;
237
238 switch (priv->shr->state) {
239 case VTPM_STATE_IDLE:
240 case VTPM_STATE_FINISH:
241 wake_up_interruptible(&priv->chip->vendor.read_queue);
242 break;
243 case VTPM_STATE_SUBMIT:
244 case VTPM_STATE_CANCEL:
245 default:
246 break;
247 }
248 return IRQ_HANDLED;
249}
250
251static int setup_chip(struct device *dev, struct tpm_private *priv)
252{
253 struct tpm_chip *chip;
254
255 chip = tpm_register_hardware(dev, &tpm_vtpm);
256 if (!chip)
257 return -ENODEV;
258
259 init_waitqueue_head(&chip->vendor.read_queue);
260
261 priv->chip = chip;
262 TPM_VPRIV(chip) = priv;
263
264 return 0;
265}
266
267/* caller must clean up in case of errors */
268static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv)
269{
270 struct xenbus_transaction xbt;
271 const char *message = NULL;
272 int rv;
273
274 priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
275 if (!priv->shr) {
276 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
277 return -ENOMEM;
278 }
279
280 rv = xenbus_grant_ring(dev, virt_to_mfn(priv->shr));
281 if (rv < 0)
282 return rv;
283
284 priv->ring_ref = rv;
285
286 rv = xenbus_alloc_evtchn(dev, &priv->evtchn);
287 if (rv)
288 return rv;
289
290 rv = bind_evtchn_to_irqhandler(priv->evtchn, tpmif_interrupt, 0,
291 "tpmif", priv);
292 if (rv <= 0) {
293 xenbus_dev_fatal(dev, rv, "allocating TPM irq");
294 return rv;
295 }
296 priv->chip->vendor.irq = rv;
297
298 again:
299 rv = xenbus_transaction_start(&xbt);
300 if (rv) {
301 xenbus_dev_fatal(dev, rv, "starting transaction");
302 return rv;
303 }
304
305 rv = xenbus_printf(xbt, dev->nodename,
306 "ring-ref", "%u", priv->ring_ref);
307 if (rv) {
308 message = "writing ring-ref";
309 goto abort_transaction;
310 }
311
312 rv = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
313 priv->evtchn);
314 if (rv) {
315 message = "writing event-channel";
316 goto abort_transaction;
317 }
318
319 rv = xenbus_printf(xbt, dev->nodename, "feature-protocol-v2", "1");
320 if (rv) {
321 message = "writing feature-protocol-v2";
322 goto abort_transaction;
323 }
324
325 rv = xenbus_transaction_end(xbt, 0);
326 if (rv == -EAGAIN)
327 goto again;
328 if (rv) {
329 xenbus_dev_fatal(dev, rv, "completing transaction");
330 return rv;
331 }
332
333 xenbus_switch_state(dev, XenbusStateInitialised);
334
335 return 0;
336
337 abort_transaction:
338 xenbus_transaction_end(xbt, 1);
339 if (message)
340 xenbus_dev_error(dev, rv, "%s", message);
341
342 return rv;
343}
344
345static void ring_free(struct tpm_private *priv)
346{
347 if (!priv)
348 return;
349
350 if (priv->ring_ref)
351 gnttab_end_foreign_access(priv->ring_ref, 0,
352 (unsigned long)priv->shr);
353 else
354 free_page((unsigned long)priv->shr);
355
356 if (priv->chip && priv->chip->vendor.irq)
357 unbind_from_irqhandler(priv->chip->vendor.irq, priv);
358
359 kfree(priv);
360}
361
362static int tpmfront_probe(struct xenbus_device *dev,
363 const struct xenbus_device_id *id)
364{
365 struct tpm_private *priv;
366 int rv;
367
368 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
369 if (!priv) {
370 xenbus_dev_fatal(dev, -ENOMEM, "allocating priv structure");
371 return -ENOMEM;
372 }
373
374 rv = setup_chip(&dev->dev, priv);
375 if (rv) {
376 kfree(priv);
377 return rv;
378 }
379
380 rv = setup_ring(dev, priv);
381 if (rv) {
382 tpm_remove_hardware(&dev->dev);
383 ring_free(priv);
384 return rv;
385 }
386
387 tpm_get_timeouts(priv->chip);
388
389 dev_set_drvdata(&dev->dev, priv->chip);
390
391 return rv;
392}
393
394static int tpmfront_remove(struct xenbus_device *dev)
395{
396 struct tpm_chip *chip = dev_get_drvdata(&dev->dev);
397 struct tpm_private *priv = TPM_VPRIV(chip);
398 tpm_remove_hardware(&dev->dev);
399 ring_free(priv);
400 TPM_VPRIV(chip) = NULL;
401 return 0;
402}
403
404static int tpmfront_resume(struct xenbus_device *dev)
405{
406 /* A suspend/resume/migrate will interrupt a vTPM anyway */
407 tpmfront_remove(dev);
408 return tpmfront_probe(dev, NULL);
409}
410
411static void backend_changed(struct xenbus_device *dev,
412 enum xenbus_state backend_state)
413{
414 int val;
415
416 switch (backend_state) {
417 case XenbusStateInitialised:
418 case XenbusStateConnected:
419 if (dev->state == XenbusStateConnected)
420 break;
421
422 if (xenbus_scanf(XBT_NIL, dev->otherend,
423 "feature-protocol-v2", "%d", &val) < 0)
424 val = 0;
425 if (!val) {
426 xenbus_dev_fatal(dev, -EINVAL,
427 "vTPM protocol 2 required");
428 return;
429 }
430 xenbus_switch_state(dev, XenbusStateConnected);
431 break;
432
433 case XenbusStateClosing:
434 case XenbusStateClosed:
435 device_unregister(&dev->dev);
436 xenbus_frontend_closed(dev);
437 break;
438 default:
439 break;
440 }
441}
442
443static const struct xenbus_device_id tpmfront_ids[] = {
444 { "vtpm" },
445 { "" }
446};
447MODULE_ALIAS("xen:vtpm");
448
449static DEFINE_XENBUS_DRIVER(tpmfront, ,
450 .probe = tpmfront_probe,
451 .remove = tpmfront_remove,
452 .resume = tpmfront_resume,
453 .otherend_changed = backend_changed,
454 );
455
456static int __init xen_tpmfront_init(void)
457{
458 if (!xen_domain())
459 return -ENODEV;
460
461 return xenbus_register_frontend(&tpmfront_driver);
462}
463module_init(xen_tpmfront_init);
464
465static void __exit xen_tpmfront_exit(void)
466{
467 xenbus_unregister_driver(&tpmfront_driver);
468}
469module_exit(xen_tpmfront_exit);
470
471MODULE_AUTHOR("Daniel De Graaf <dgdegra@tycho.nsa.gov>");
472MODULE_DESCRIPTION("Xen vTPM Driver");
473MODULE_LICENSE("GPL");
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 682210d778bd..e61c36cbb866 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -208,7 +208,7 @@ static int xen_hvm_console_init(void)
208 208
209 info = vtermno_to_xencons(HVC_COOKIE); 209 info = vtermno_to_xencons(HVC_COOKIE);
210 if (!info) { 210 if (!info) {
211 info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); 211 info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
212 if (!info) 212 if (!info)
213 return -ENOMEM; 213 return -ENOMEM;
214 } else if (info->intf != NULL) { 214 } else if (info->intf != NULL) {
@@ -257,7 +257,7 @@ static int xen_pv_console_init(void)
257 257
258 info = vtermno_to_xencons(HVC_COOKIE); 258 info = vtermno_to_xencons(HVC_COOKIE);
259 if (!info) { 259 if (!info) {
260 info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); 260 info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
261 if (!info) 261 if (!info)
262 return -ENOMEM; 262 return -ENOMEM;
263 } else if (info->intf != NULL) { 263 } else if (info->intf != NULL) {
@@ -284,7 +284,7 @@ static int xen_initial_domain_console_init(void)
284 284
285 info = vtermno_to_xencons(HVC_COOKIE); 285 info = vtermno_to_xencons(HVC_COOKIE);
286 if (!info) { 286 if (!info) {
287 info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); 287 info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
288 if (!info) 288 if (!info)
289 return -ENOMEM; 289 return -ENOMEM;
290 } 290 }
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 2a2ef97697b2..3101cf6daf56 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -38,6 +38,7 @@
38 38
39#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 39#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
40 40
41#include <linux/cpu.h>
41#include <linux/kernel.h> 42#include <linux/kernel.h>
42#include <linux/sched.h> 43#include <linux/sched.h>
43#include <linux/errno.h> 44#include <linux/errno.h>
@@ -52,6 +53,7 @@
52#include <linux/notifier.h> 53#include <linux/notifier.h>
53#include <linux/memory.h> 54#include <linux/memory.h>
54#include <linux/memory_hotplug.h> 55#include <linux/memory_hotplug.h>
56#include <linux/percpu-defs.h>
55 57
56#include <asm/page.h> 58#include <asm/page.h>
57#include <asm/pgalloc.h> 59#include <asm/pgalloc.h>
@@ -90,6 +92,8 @@ EXPORT_SYMBOL_GPL(balloon_stats);
90 92
91/* We increase/decrease in batches which fit in a page */ 93/* We increase/decrease in batches which fit in a page */
92static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; 94static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
95static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
96
93 97
94/* List of ballooned pages, threaded through the mem_map array. */ 98/* List of ballooned pages, threaded through the mem_map array. */
95static LIST_HEAD(ballooned_pages); 99static LIST_HEAD(ballooned_pages);
@@ -412,7 +416,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
412 if (xen_pv_domain() && !PageHighMem(page)) { 416 if (xen_pv_domain() && !PageHighMem(page)) {
413 ret = HYPERVISOR_update_va_mapping( 417 ret = HYPERVISOR_update_va_mapping(
414 (unsigned long)__va(pfn << PAGE_SHIFT), 418 (unsigned long)__va(pfn << PAGE_SHIFT),
415 __pte_ma(0), 0); 419 pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)),
420 PAGE_KERNEL_RO), 0);
416 BUG_ON(ret); 421 BUG_ON(ret);
417 } 422 }
418#endif 423#endif
@@ -425,7 +430,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
425 /* No more mappings: invalidate P2M and add to balloon. */ 430 /* No more mappings: invalidate P2M and add to balloon. */
426 for (i = 0; i < nr_pages; i++) { 431 for (i = 0; i < nr_pages; i++) {
427 pfn = mfn_to_pfn(frame_list[i]); 432 pfn = mfn_to_pfn(frame_list[i]);
428 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 433 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
434 unsigned long p;
435 struct page *pg;
436 pg = __get_cpu_var(balloon_scratch_page);
437 p = page_to_pfn(pg);
438 __set_phys_to_machine(pfn, pfn_to_mfn(p));
439 }
429 balloon_append(pfn_to_page(pfn)); 440 balloon_append(pfn_to_page(pfn));
430 } 441 }
431 442
@@ -480,6 +491,18 @@ static void balloon_process(struct work_struct *work)
480 mutex_unlock(&balloon_mutex); 491 mutex_unlock(&balloon_mutex);
481} 492}
482 493
494struct page *get_balloon_scratch_page(void)
495{
496 struct page *ret = get_cpu_var(balloon_scratch_page);
497 BUG_ON(ret == NULL);
498 return ret;
499}
500
501void put_balloon_scratch_page(void)
502{
503 put_cpu_var(balloon_scratch_page);
504}
505
483/* Resets the Xen limit, sets new target, and kicks off processing. */ 506/* Resets the Xen limit, sets new target, and kicks off processing. */
484void balloon_set_new_target(unsigned long target) 507void balloon_set_new_target(unsigned long target)
485{ 508{
@@ -573,13 +596,47 @@ static void __init balloon_add_region(unsigned long start_pfn,
573 } 596 }
574} 597}
575 598
599static int __cpuinit balloon_cpu_notify(struct notifier_block *self,
600 unsigned long action, void *hcpu)
601{
602 int cpu = (long)hcpu;
603 switch (action) {
604 case CPU_UP_PREPARE:
605 if (per_cpu(balloon_scratch_page, cpu) != NULL)
606 break;
607 per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
608 if (per_cpu(balloon_scratch_page, cpu) == NULL) {
609 pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
610 return NOTIFY_BAD;
611 }
612 break;
613 default:
614 break;
615 }
616 return NOTIFY_OK;
617}
618
619static struct notifier_block balloon_cpu_notifier __cpuinitdata = {
620 .notifier_call = balloon_cpu_notify,
621};
622
576static int __init balloon_init(void) 623static int __init balloon_init(void)
577{ 624{
578 int i; 625 int i, cpu;
579 626
580 if (!xen_domain()) 627 if (!xen_domain())
581 return -ENODEV; 628 return -ENODEV;
582 629
630 for_each_online_cpu(cpu)
631 {
632 per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
633 if (per_cpu(balloon_scratch_page, cpu) == NULL) {
634 pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
635 return -ENOMEM;
636 }
637 }
638 register_cpu_notifier(&balloon_cpu_notifier);
639
583 pr_info("Initialising balloon driver\n"); 640 pr_info("Initialising balloon driver\n");
584 641
585 balloon_stats.current_pages = xen_pv_domain() 642 balloon_stats.current_pages = xen_pv_domain()
@@ -616,4 +673,15 @@ static int __init balloon_init(void)
616 673
617subsys_initcall(balloon_init); 674subsys_initcall(balloon_init);
618 675
676static int __init balloon_clear(void)
677{
678 int cpu;
679
680 for_each_possible_cpu(cpu)
681 per_cpu(balloon_scratch_page, cpu) = NULL;
682
683 return 0;
684}
685early_initcall(balloon_clear);
686
619MODULE_LICENSE("GPL"); 687MODULE_LICENSE("GPL");
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 5e8be462aed5..4035e833ea26 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -56,6 +56,7 @@
56#include <xen/interface/hvm/params.h> 56#include <xen/interface/hvm/params.h>
57#include <xen/interface/physdev.h> 57#include <xen/interface/physdev.h>
58#include <xen/interface/sched.h> 58#include <xen/interface/sched.h>
59#include <xen/interface/vcpu.h>
59#include <asm/hw_irq.h> 60#include <asm/hw_irq.h>
60 61
61/* 62/*
@@ -1212,7 +1213,17 @@ EXPORT_SYMBOL_GPL(evtchn_put);
1212 1213
1213void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) 1214void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1214{ 1215{
1215 int irq = per_cpu(ipi_to_irq, cpu)[vector]; 1216 int irq;
1217
1218#ifdef CONFIG_X86
1219 if (unlikely(vector == XEN_NMI_VECTOR)) {
1220 int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
1221 if (rc < 0)
1222 printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1223 return;
1224 }
1225#endif
1226 irq = per_cpu(ipi_to_irq, cpu)[vector];
1216 BUG_ON(irq < 0); 1227 BUG_ON(irq < 0);
1217 notify_remote_via_irq(irq); 1228 notify_remote_via_irq(irq);
1218} 1229}
@@ -1379,14 +1390,21 @@ static void __xen_evtchn_do_upcall(void)
1379 1390
1380 pending_bits = active_evtchns(cpu, s, word_idx); 1391 pending_bits = active_evtchns(cpu, s, word_idx);
1381 bit_idx = 0; /* usually scan entire word from start */ 1392 bit_idx = 0; /* usually scan entire word from start */
1393 /*
1394 * We scan the starting word in two parts.
1395 *
1396 * 1st time: start in the middle, scanning the
1397 * upper bits.
1398 *
1399 * 2nd time: scan the whole word (not just the
1400 * parts skipped in the first pass) -- if an
1401 * event in the previously scanned bits is
1402 * pending again it would just be scanned on
1403 * the next loop anyway.
1404 */
1382 if (word_idx == start_word_idx) { 1405 if (word_idx == start_word_idx) {
1383 /* We scan the starting word in two parts */
1384 if (i == 0) 1406 if (i == 0)
1385 /* 1st time: start in the middle */
1386 bit_idx = start_bit_idx; 1407 bit_idx = start_bit_idx;
1387 else
1388 /* 2nd time: mask bits done already */
1389 bit_idx &= (1UL << start_bit_idx) - 1;
1390 } 1408 }
1391 1409
1392 do { 1410 do {
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index b6165e047f48..8b3a69a06c39 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -57,6 +57,7 @@
57 57
58struct per_user_data { 58struct per_user_data {
59 struct mutex bind_mutex; /* serialize bind/unbind operations */ 59 struct mutex bind_mutex; /* serialize bind/unbind operations */
60 struct rb_root evtchns;
60 61
61 /* Notification ring, accessed via /dev/xen/evtchn. */ 62 /* Notification ring, accessed via /dev/xen/evtchn. */
62#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) 63#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
@@ -64,6 +65,7 @@ struct per_user_data {
64 evtchn_port_t *ring; 65 evtchn_port_t *ring;
65 unsigned int ring_cons, ring_prod, ring_overflow; 66 unsigned int ring_cons, ring_prod, ring_overflow;
66 struct mutex ring_cons_mutex; /* protect against concurrent readers */ 67 struct mutex ring_cons_mutex; /* protect against concurrent readers */
68 spinlock_t ring_prod_lock; /* product against concurrent interrupts */
67 69
68 /* Processes wait on this queue when ring is empty. */ 70 /* Processes wait on this queue when ring is empty. */
69 wait_queue_head_t evtchn_wait; 71 wait_queue_head_t evtchn_wait;
@@ -71,54 +73,79 @@ struct per_user_data {
71 const char *name; 73 const char *name;
72}; 74};
73 75
74/* 76struct user_evtchn {
75 * Who's bound to each port? This is logically an array of struct 77 struct rb_node node;
76 * per_user_data *, but we encode the current enabled-state in bit 0. 78 struct per_user_data *user;
77 */ 79 unsigned port;
78static unsigned long *port_user; 80 bool enabled;
79static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ 81};
80 82
81static inline struct per_user_data *get_port_user(unsigned port) 83static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
82{ 84{
83 return (struct per_user_data *)(port_user[port] & ~1); 85 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
84}
85 86
86static inline void set_port_user(unsigned port, struct per_user_data *u) 87 while (*new) {
87{ 88 struct user_evtchn *this;
88 port_user[port] = (unsigned long)u; 89
90 this = container_of(*new, struct user_evtchn, node);
91
92 parent = *new;
93 if (this->port < evtchn->port)
94 new = &((*new)->rb_left);
95 else if (this->port > evtchn->port)
96 new = &((*new)->rb_right);
97 else
98 return -EEXIST;
99 }
100
101 /* Add new node and rebalance tree. */
102 rb_link_node(&evtchn->node, parent, new);
103 rb_insert_color(&evtchn->node, &u->evtchns);
104
105 return 0;
89} 106}
90 107
91static inline bool get_port_enabled(unsigned port) 108static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
92{ 109{
93 return port_user[port] & 1; 110 rb_erase(&evtchn->node, &u->evtchns);
111 kfree(evtchn);
94} 112}
95 113
96static inline void set_port_enabled(unsigned port, bool enabled) 114static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
97{ 115{
98 if (enabled) 116 struct rb_node *node = u->evtchns.rb_node;
99 port_user[port] |= 1; 117
100 else 118 while (node) {
101 port_user[port] &= ~1; 119 struct user_evtchn *evtchn;
120
121 evtchn = container_of(node, struct user_evtchn, node);
122
123 if (evtchn->port < port)
124 node = node->rb_left;
125 else if (evtchn->port > port)
126 node = node->rb_right;
127 else
128 return evtchn;
129 }
130 return NULL;
102} 131}
103 132
104static irqreturn_t evtchn_interrupt(int irq, void *data) 133static irqreturn_t evtchn_interrupt(int irq, void *data)
105{ 134{
106 unsigned int port = (unsigned long)data; 135 struct user_evtchn *evtchn = data;
107 struct per_user_data *u; 136 struct per_user_data *u = evtchn->user;
108
109 spin_lock(&port_user_lock);
110
111 u = get_port_user(port);
112 137
113 WARN(!get_port_enabled(port), 138 WARN(!evtchn->enabled,
114 "Interrupt for port %d, but apparently not enabled; per-user %p\n", 139 "Interrupt for port %d, but apparently not enabled; per-user %p\n",
115 port, u); 140 evtchn->port, u);
116 141
117 disable_irq_nosync(irq); 142 disable_irq_nosync(irq);
118 set_port_enabled(port, false); 143 evtchn->enabled = false;
144
145 spin_lock(&u->ring_prod_lock);
119 146
120 if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { 147 if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
121 u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; 148 u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
122 wmb(); /* Ensure ring contents visible */ 149 wmb(); /* Ensure ring contents visible */
123 if (u->ring_cons == u->ring_prod++) { 150 if (u->ring_cons == u->ring_prod++) {
124 wake_up_interruptible(&u->evtchn_wait); 151 wake_up_interruptible(&u->evtchn_wait);
@@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
128 } else 155 } else
129 u->ring_overflow = 1; 156 u->ring_overflow = 1;
130 157
131 spin_unlock(&port_user_lock); 158 spin_unlock(&u->ring_prod_lock);
132 159
133 return IRQ_HANDLED; 160 return IRQ_HANDLED;
134} 161}
@@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
229 if (copy_from_user(kbuf, buf, count) != 0) 256 if (copy_from_user(kbuf, buf, count) != 0)
230 goto out; 257 goto out;
231 258
232 spin_lock_irq(&port_user_lock); 259 mutex_lock(&u->bind_mutex);
233 260
234 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { 261 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
235 unsigned port = kbuf[i]; 262 unsigned port = kbuf[i];
263 struct user_evtchn *evtchn;
236 264
237 if (port < NR_EVENT_CHANNELS && 265 evtchn = find_evtchn(u, port);
238 get_port_user(port) == u && 266 if (evtchn && !evtchn->enabled) {
239 !get_port_enabled(port)) { 267 evtchn->enabled = true;
240 set_port_enabled(port, true);
241 enable_irq(irq_from_evtchn(port)); 268 enable_irq(irq_from_evtchn(port));
242 } 269 }
243 } 270 }
244 271
245 spin_unlock_irq(&port_user_lock); 272 mutex_unlock(&u->bind_mutex);
246 273
247 rc = count; 274 rc = count;
248 275
@@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
253 280
254static int evtchn_bind_to_user(struct per_user_data *u, int port) 281static int evtchn_bind_to_user(struct per_user_data *u, int port)
255{ 282{
283 struct user_evtchn *evtchn;
284 struct evtchn_close close;
256 int rc = 0; 285 int rc = 0;
257 286
258 /* 287 /*
@@ -263,35 +292,46 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
263 * interrupt handler yet, and our caller has already 292 * interrupt handler yet, and our caller has already
264 * serialized bind operations.) 293 * serialized bind operations.)
265 */ 294 */
266 BUG_ON(get_port_user(port) != NULL); 295
267 set_port_user(port, u); 296 evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
268 set_port_enabled(port, true); /* start enabled */ 297 if (!evtchn)
298 return -ENOMEM;
299
300 evtchn->user = u;
301 evtchn->port = port;
302 evtchn->enabled = true; /* start enabled */
303
304 rc = add_evtchn(u, evtchn);
305 if (rc < 0)
306 goto err;
269 307
270 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, 308 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
271 u->name, (void *)(unsigned long)port); 309 u->name, evtchn);
272 if (rc >= 0) 310 if (rc < 0)
273 rc = evtchn_make_refcounted(port); 311 goto err;
274 else {
275 /* bind failed, should close the port now */
276 struct evtchn_close close;
277 close.port = port;
278 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
279 BUG();
280 set_port_user(port, NULL);
281 }
282 312
313 rc = evtchn_make_refcounted(port);
314 return rc;
315
316err:
317 /* bind failed, should close the port now */
318 close.port = port;
319 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
320 BUG();
321 del_evtchn(u, evtchn);
283 return rc; 322 return rc;
284} 323}
285 324
286static void evtchn_unbind_from_user(struct per_user_data *u, int port) 325static void evtchn_unbind_from_user(struct per_user_data *u,
326 struct user_evtchn *evtchn)
287{ 327{
288 int irq = irq_from_evtchn(port); 328 int irq = irq_from_evtchn(evtchn->port);
289 329
290 BUG_ON(irq < 0); 330 BUG_ON(irq < 0);
291 331
292 unbind_from_irqhandler(irq, (void *)(unsigned long)port); 332 unbind_from_irqhandler(irq, evtchn);
293 333
294 set_port_user(port, NULL); 334 del_evtchn(u, evtchn);
295} 335}
296 336
297static long evtchn_ioctl(struct file *file, 337static long evtchn_ioctl(struct file *file,
@@ -370,6 +410,7 @@ static long evtchn_ioctl(struct file *file,
370 410
371 case IOCTL_EVTCHN_UNBIND: { 411 case IOCTL_EVTCHN_UNBIND: {
372 struct ioctl_evtchn_unbind unbind; 412 struct ioctl_evtchn_unbind unbind;
413 struct user_evtchn *evtchn;
373 414
374 rc = -EFAULT; 415 rc = -EFAULT;
375 if (copy_from_user(&unbind, uarg, sizeof(unbind))) 416 if (copy_from_user(&unbind, uarg, sizeof(unbind)))
@@ -380,29 +421,27 @@ static long evtchn_ioctl(struct file *file,
380 break; 421 break;
381 422
382 rc = -ENOTCONN; 423 rc = -ENOTCONN;
383 if (get_port_user(unbind.port) != u) 424 evtchn = find_evtchn(u, unbind.port);
425 if (!evtchn)
384 break; 426 break;
385 427
386 disable_irq(irq_from_evtchn(unbind.port)); 428 disable_irq(irq_from_evtchn(unbind.port));
387 429 evtchn_unbind_from_user(u, evtchn);
388 evtchn_unbind_from_user(u, unbind.port);
389
390 rc = 0; 430 rc = 0;
391 break; 431 break;
392 } 432 }
393 433
394 case IOCTL_EVTCHN_NOTIFY: { 434 case IOCTL_EVTCHN_NOTIFY: {
395 struct ioctl_evtchn_notify notify; 435 struct ioctl_evtchn_notify notify;
436 struct user_evtchn *evtchn;
396 437
397 rc = -EFAULT; 438 rc = -EFAULT;
398 if (copy_from_user(&notify, uarg, sizeof(notify))) 439 if (copy_from_user(&notify, uarg, sizeof(notify)))
399 break; 440 break;
400 441
401 if (notify.port >= NR_EVENT_CHANNELS) { 442 rc = -ENOTCONN;
402 rc = -EINVAL; 443 evtchn = find_evtchn(u, notify.port);
403 } else if (get_port_user(notify.port) != u) { 444 if (evtchn) {
404 rc = -ENOTCONN;
405 } else {
406 notify_remote_via_evtchn(notify.port); 445 notify_remote_via_evtchn(notify.port);
407 rc = 0; 446 rc = 0;
408 } 447 }
@@ -412,9 +451,9 @@ static long evtchn_ioctl(struct file *file,
412 case IOCTL_EVTCHN_RESET: { 451 case IOCTL_EVTCHN_RESET: {
413 /* Initialise the ring to empty. Clear errors. */ 452 /* Initialise the ring to empty. Clear errors. */
414 mutex_lock(&u->ring_cons_mutex); 453 mutex_lock(&u->ring_cons_mutex);
415 spin_lock_irq(&port_user_lock); 454 spin_lock_irq(&u->ring_prod_lock);
416 u->ring_cons = u->ring_prod = u->ring_overflow = 0; 455 u->ring_cons = u->ring_prod = u->ring_overflow = 0;
417 spin_unlock_irq(&port_user_lock); 456 spin_unlock_irq(&u->ring_prod_lock);
418 mutex_unlock(&u->ring_cons_mutex); 457 mutex_unlock(&u->ring_cons_mutex);
419 rc = 0; 458 rc = 0;
420 break; 459 break;
@@ -473,6 +512,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
473 512
474 mutex_init(&u->bind_mutex); 513 mutex_init(&u->bind_mutex);
475 mutex_init(&u->ring_cons_mutex); 514 mutex_init(&u->ring_cons_mutex);
515 spin_lock_init(&u->ring_prod_lock);
476 516
477 filp->private_data = u; 517 filp->private_data = u;
478 518
@@ -481,15 +521,15 @@ static int evtchn_open(struct inode *inode, struct file *filp)
481 521
482static int evtchn_release(struct inode *inode, struct file *filp) 522static int evtchn_release(struct inode *inode, struct file *filp)
483{ 523{
484 int i;
485 struct per_user_data *u = filp->private_data; 524 struct per_user_data *u = filp->private_data;
525 struct rb_node *node;
486 526
487 for (i = 0; i < NR_EVENT_CHANNELS; i++) { 527 while ((node = u->evtchns.rb_node)) {
488 if (get_port_user(i) != u) 528 struct user_evtchn *evtchn;
489 continue;
490 529
491 disable_irq(irq_from_evtchn(i)); 530 evtchn = rb_entry(node, struct user_evtchn, node);
492 evtchn_unbind_from_user(get_port_user(i), i); 531 disable_irq(irq_from_evtchn(evtchn->port));
532 evtchn_unbind_from_user(u, evtchn);
493 } 533 }
494 534
495 free_page((unsigned long)u->ring); 535 free_page((unsigned long)u->ring);
@@ -523,12 +563,6 @@ static int __init evtchn_init(void)
523 if (!xen_domain()) 563 if (!xen_domain())
524 return -ENODEV; 564 return -ENODEV;
525 565
526 port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
527 if (port_user == NULL)
528 return -ENOMEM;
529
530 spin_lock_init(&port_user_lock);
531
532 /* Create '/dev/xen/evtchn'. */ 566 /* Create '/dev/xen/evtchn'. */
533 err = misc_register(&evtchn_miscdev); 567 err = misc_register(&evtchn_miscdev);
534 if (err != 0) { 568 if (err != 0) {
@@ -543,9 +577,6 @@ static int __init evtchn_init(void)
543 577
544static void __exit evtchn_cleanup(void) 578static void __exit evtchn_cleanup(void)
545{ 579{
546 kfree(port_user);
547 port_user = NULL;
548
549 misc_deregister(&evtchn_miscdev); 580 misc_deregister(&evtchn_miscdev);
550} 581}
551 582
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eab5427c75f5..e41c79c986ea 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -272,19 +272,12 @@ static int map_grant_pages(struct grant_map *map)
272 * with find_grant_ptes. 272 * with find_grant_ptes.
273 */ 273 */
274 for (i = 0; i < map->count; i++) { 274 for (i = 0; i < map->count; i++) {
275 unsigned level;
276 unsigned long address = (unsigned long) 275 unsigned long address = (unsigned long)
277 pfn_to_kaddr(page_to_pfn(map->pages[i])); 276 pfn_to_kaddr(page_to_pfn(map->pages[i]));
278 pte_t *ptep;
279 u64 pte_maddr = 0;
280 BUG_ON(PageHighMem(map->pages[i])); 277 BUG_ON(PageHighMem(map->pages[i]));
281 278
282 ptep = lookup_address(address, &level); 279 gnttab_set_map_op(&map->kmap_ops[i], address,
283 pte_maddr = arbitrary_virt_to_machine(ptep).maddr; 280 map->flags | GNTMAP_host_map,
284 gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
285 map->flags |
286 GNTMAP_host_map |
287 GNTMAP_contains_pte,
288 map->grants[i].ref, 281 map->grants[i].ref,
289 map->grants[i].domid); 282 map->grants[i].domid);
290 } 283 }
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 04cdeb8e3719..c4d2298893b1 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -730,9 +730,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback,
730 void (*fn)(void *), void *arg, u16 count) 730 void (*fn)(void *), void *arg, u16 count)
731{ 731{
732 unsigned long flags; 732 unsigned long flags;
733 struct gnttab_free_callback *cb;
734
733 spin_lock_irqsave(&gnttab_list_lock, flags); 735 spin_lock_irqsave(&gnttab_list_lock, flags);
734 if (callback->next) 736
735 goto out; 737 /* Check if the callback is already on the list */
738 cb = gnttab_free_callback_list;
739 while (cb) {
740 if (cb == callback)
741 goto out;
742 cb = cb->next;
743 }
744
736 callback->fn = fn; 745 callback->fn = fn;
737 callback->arg = arg; 746 callback->arg = arg;
738 callback->count = count; 747 callback->count = count;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index f8e5dd701ecb..8e74590fa1bb 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -43,9 +43,10 @@ MODULE_LICENSE("GPL");
43 43
44#define PRIV_VMA_LOCKED ((void *)1) 44#define PRIV_VMA_LOCKED ((void *)1)
45 45
46#ifndef HAVE_ARCH_PRIVCMD_MMAP 46static int privcmd_vma_range_is_mapped(
47static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); 47 struct vm_area_struct *vma,
48#endif 48 unsigned long addr,
49 unsigned long nr_pages);
49 50
50static long privcmd_ioctl_hypercall(void __user *udata) 51static long privcmd_ioctl_hypercall(void __user *udata)
51{ 52{
@@ -225,9 +226,9 @@ static long privcmd_ioctl_mmap(void __user *udata)
225 vma = find_vma(mm, msg->va); 226 vma = find_vma(mm, msg->va);
226 rc = -EINVAL; 227 rc = -EINVAL;
227 228
228 if (!vma || (msg->va != vma->vm_start) || 229 if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
229 !privcmd_enforce_singleshot_mapping(vma))
230 goto out_up; 230 goto out_up;
231 vma->vm_private_data = PRIV_VMA_LOCKED;
231 } 232 }
232 233
233 state.va = vma->vm_start; 234 state.va = vma->vm_start;
@@ -358,7 +359,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
358 kfree(pages); 359 kfree(pages);
359 return -ENOMEM; 360 return -ENOMEM;
360 } 361 }
361 BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); 362 BUG_ON(vma->vm_private_data != NULL);
362 vma->vm_private_data = pages; 363 vma->vm_private_data = pages;
363 364
364 return 0; 365 return 0;
@@ -421,19 +422,43 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
421 422
422 vma = find_vma(mm, m.addr); 423 vma = find_vma(mm, m.addr);
423 if (!vma || 424 if (!vma ||
424 vma->vm_ops != &privcmd_vm_ops || 425 vma->vm_ops != &privcmd_vm_ops) {
425 (m.addr != vma->vm_start) ||
426 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
427 !privcmd_enforce_singleshot_mapping(vma)) {
428 up_write(&mm->mmap_sem);
429 ret = -EINVAL; 426 ret = -EINVAL;
430 goto out; 427 goto out_unlock;
431 } 428 }
432 if (xen_feature(XENFEAT_auto_translated_physmap)) { 429
433 ret = alloc_empty_pages(vma, m.num); 430 /*
434 if (ret < 0) { 431 * Caller must either:
435 up_write(&mm->mmap_sem); 432 *
436 goto out; 433 * Map the whole VMA range, which will also allocate all the
434 * pages required for the auto_translated_physmap case.
435 *
436 * Or
437 *
438 * Map unmapped holes left from a previous map attempt (e.g.,
439 * because those foreign frames were previously paged out).
440 */
441 if (vma->vm_private_data == NULL) {
442 if (m.addr != vma->vm_start ||
443 m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
444 ret = -EINVAL;
445 goto out_unlock;
446 }
447 if (xen_feature(XENFEAT_auto_translated_physmap)) {
448 ret = alloc_empty_pages(vma, m.num);
449 if (ret < 0)
450 goto out_unlock;
451 } else
452 vma->vm_private_data = PRIV_VMA_LOCKED;
453 } else {
454 if (m.addr < vma->vm_start ||
455 m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
456 ret = -EINVAL;
457 goto out_unlock;
458 }
459 if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
460 ret = -EINVAL;
461 goto out_unlock;
437 } 462 }
438 } 463 }
439 464
@@ -466,8 +491,11 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
466 491
467out: 492out:
468 free_page_list(&pagelist); 493 free_page_list(&pagelist);
469
470 return ret; 494 return ret;
495
496out_unlock:
497 up_write(&mm->mmap_sem);
498 goto out;
471} 499}
472 500
473static long privcmd_ioctl(struct file *file, 501static long privcmd_ioctl(struct file *file,
@@ -540,9 +568,24 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
540 return 0; 568 return 0;
541} 569}
542 570
543static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) 571/*
572 * For MMAPBATCH*. This allows asserting the singleshot mapping
573 * on a per pfn/pte basis. Mapping calls that fail with ENOENT
574 * can be then retried until success.
575 */
576static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
577 unsigned long addr, void *data)
578{
579 return pte_none(*pte) ? 0 : -EBUSY;
580}
581
582static int privcmd_vma_range_is_mapped(
583 struct vm_area_struct *vma,
584 unsigned long addr,
585 unsigned long nr_pages)
544{ 586{
545 return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); 587 return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
588 is_mapped_fn, NULL) != 0;
546} 589}
547 590
548const struct file_operations xen_privcmd_fops = { 591const struct file_operations xen_privcmd_fops = {
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index aadffcf7db9b..1b2277c311d2 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -506,13 +506,13 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
506 to do proper error handling. */ 506 to do proper error handling. */
507 xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, 507 xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
508 attrs); 508 attrs);
509 sgl[0].dma_length = 0; 509 sg_dma_len(sgl) = 0;
510 return DMA_ERROR_CODE; 510 return DMA_ERROR_CODE;
511 } 511 }
512 sg->dma_address = xen_phys_to_bus(map); 512 sg->dma_address = xen_phys_to_bus(map);
513 } else 513 } else
514 sg->dma_address = dev_addr; 514 sg->dma_address = dev_addr;
515 sg->dma_length = sg->length; 515 sg_dma_len(sg) = sg->length;
516 } 516 }
517 return nelems; 517 return nelems;
518} 518}
@@ -533,7 +533,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
533 BUG_ON(dir == DMA_NONE); 533 BUG_ON(dir == DMA_NONE);
534 534
535 for_each_sg(sgl, sg, nelems, i) 535 for_each_sg(sgl, sg, nelems, i)
536 xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); 536 xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
537 537
538} 538}
539EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); 539EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
@@ -555,7 +555,7 @@ xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
555 555
556 for_each_sg(sgl, sg, nelems, i) 556 for_each_sg(sgl, sg, nelems, i)
557 xen_swiotlb_sync_single(hwdev, sg->dma_address, 557 xen_swiotlb_sync_single(hwdev, sg->dma_address,
558 sg->dma_length, dir, target); 558 sg_dma_len(sg), dir, target);
559} 559}
560 560
561void 561void
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 02817a85f877..21e18c18c7a1 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -265,8 +265,10 @@ static ssize_t store_selfballooning(struct device *dev,
265 if (!capable(CAP_SYS_ADMIN)) 265 if (!capable(CAP_SYS_ADMIN))
266 return -EPERM; 266 return -EPERM;
267 267
268 err = strict_strtoul(buf, 10, &tmp); 268 err = kstrtoul(buf, 10, &tmp);
269 if (err || ((tmp != 0) && (tmp != 1))) 269 if (err)
270 return err;
271 if ((tmp != 0) && (tmp != 1))
270 return -EINVAL; 272 return -EINVAL;
271 273
272 xen_selfballooning_enabled = !!tmp; 274 xen_selfballooning_enabled = !!tmp;
@@ -292,8 +294,10 @@ static ssize_t store_selfballoon_interval(struct device *dev,
292 294
293 if (!capable(CAP_SYS_ADMIN)) 295 if (!capable(CAP_SYS_ADMIN))
294 return -EPERM; 296 return -EPERM;
295 err = strict_strtoul(buf, 10, &val); 297 err = kstrtoul(buf, 10, &val);
296 if (err || val == 0) 298 if (err)
299 return err;
300 if (val == 0)
297 return -EINVAL; 301 return -EINVAL;
298 selfballoon_interval = val; 302 selfballoon_interval = val;
299 return count; 303 return count;
@@ -314,8 +318,10 @@ static ssize_t store_selfballoon_downhys(struct device *dev,
314 318
315 if (!capable(CAP_SYS_ADMIN)) 319 if (!capable(CAP_SYS_ADMIN))
316 return -EPERM; 320 return -EPERM;
317 err = strict_strtoul(buf, 10, &val); 321 err = kstrtoul(buf, 10, &val);
318 if (err || val == 0) 322 if (err)
323 return err;
324 if (val == 0)
319 return -EINVAL; 325 return -EINVAL;
320 selfballoon_downhysteresis = val; 326 selfballoon_downhysteresis = val;
321 return count; 327 return count;
@@ -337,8 +343,10 @@ static ssize_t store_selfballoon_uphys(struct device *dev,
337 343
338 if (!capable(CAP_SYS_ADMIN)) 344 if (!capable(CAP_SYS_ADMIN))
339 return -EPERM; 345 return -EPERM;
340 err = strict_strtoul(buf, 10, &val); 346 err = kstrtoul(buf, 10, &val);
341 if (err || val == 0) 347 if (err)
348 return err;
349 if (val == 0)
342 return -EINVAL; 350 return -EINVAL;
343 selfballoon_uphysteresis = val; 351 selfballoon_uphysteresis = val;
344 return count; 352 return count;
@@ -360,8 +368,10 @@ static ssize_t store_selfballoon_min_usable_mb(struct device *dev,
360 368
361 if (!capable(CAP_SYS_ADMIN)) 369 if (!capable(CAP_SYS_ADMIN))
362 return -EPERM; 370 return -EPERM;
363 err = strict_strtoul(buf, 10, &val); 371 err = kstrtoul(buf, 10, &val);
364 if (err || val == 0) 372 if (err)
373 return err;
374 if (val == 0)
365 return -EINVAL; 375 return -EINVAL;
366 selfballoon_min_usable_mb = val; 376 selfballoon_min_usable_mb = val;
367 return count; 377 return count;
@@ -384,8 +394,10 @@ static ssize_t store_selfballoon_reserved_mb(struct device *dev,
384 394
385 if (!capable(CAP_SYS_ADMIN)) 395 if (!capable(CAP_SYS_ADMIN))
386 return -EPERM; 396 return -EPERM;
387 err = strict_strtoul(buf, 10, &val); 397 err = kstrtoul(buf, 10, &val);
388 if (err || val == 0) 398 if (err)
399 return err;
400 if (val == 0)
389 return -EINVAL; 401 return -EINVAL;
390 selfballoon_reserved_mb = val; 402 selfballoon_reserved_mb = val;
391 return count; 403 return count;
@@ -410,8 +422,10 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev,
410 422
411 if (!capable(CAP_SYS_ADMIN)) 423 if (!capable(CAP_SYS_ADMIN))
412 return -EPERM; 424 return -EPERM;
413 err = strict_strtoul(buf, 10, &tmp); 425 err = kstrtoul(buf, 10, &tmp);
414 if (err || ((tmp != 0) && (tmp != 1))) 426 if (err)
427 return err;
428 if ((tmp != 0) && (tmp != 1))
415 return -EINVAL; 429 return -EINVAL;
416 frontswap_selfshrinking = !!tmp; 430 frontswap_selfshrinking = !!tmp;
417 if (!was_enabled && !xen_selfballooning_enabled && 431 if (!was_enabled && !xen_selfballooning_enabled &&
@@ -437,8 +451,10 @@ static ssize_t store_frontswap_inertia(struct device *dev,
437 451
438 if (!capable(CAP_SYS_ADMIN)) 452 if (!capable(CAP_SYS_ADMIN))
439 return -EPERM; 453 return -EPERM;
440 err = strict_strtoul(buf, 10, &val); 454 err = kstrtoul(buf, 10, &val);
441 if (err || val == 0) 455 if (err)
456 return err;
457 if (val == 0)
442 return -EINVAL; 458 return -EINVAL;
443 frontswap_inertia = val; 459 frontswap_inertia = val;
444 frontswap_inertia_counter = val; 460 frontswap_inertia_counter = val;
@@ -460,8 +476,10 @@ static ssize_t store_frontswap_hysteresis(struct device *dev,
460 476
461 if (!capable(CAP_SYS_ADMIN)) 477 if (!capable(CAP_SYS_ADMIN))
462 return -EPERM; 478 return -EPERM;
463 err = strict_strtoul(buf, 10, &val); 479 err = kstrtoul(buf, 10, &val);
464 if (err || val == 0) 480 if (err)
481 return err;
482 if (val == 0)
465 return -EINVAL; 483 return -EINVAL;
466 frontswap_hysteresis = val; 484 frontswap_hysteresis = val;
467 return count; 485 return count;
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0976fc46d1e0..a5079072da66 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -48,7 +48,6 @@
48 48
49#include <linux/types.h> 49#include <linux/types.h>
50#include <linux/compiler.h> 50#include <linux/compiler.h>
51#include <linux/workqueue.h>
52 51
53#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) 52#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
54 53
@@ -61,12 +60,6 @@ struct static_key {
61#endif 60#endif
62}; 61};
63 62
64struct static_key_deferred {
65 struct static_key key;
66 unsigned long timeout;
67 struct delayed_work work;
68};
69
70# include <asm/jump_label.h> 63# include <asm/jump_label.h>
71# define HAVE_JUMP_LABEL 64# define HAVE_JUMP_LABEL
72#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ 65#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
@@ -78,6 +71,7 @@ enum jump_label_type {
78 71
79struct module; 72struct module;
80 73
74#include <linux/atomic.h>
81#ifdef HAVE_JUMP_LABEL 75#ifdef HAVE_JUMP_LABEL
82 76
83#define JUMP_LABEL_TRUE_BRANCH 1UL 77#define JUMP_LABEL_TRUE_BRANCH 1UL
@@ -119,10 +113,7 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
119extern int jump_label_text_reserved(void *start, void *end); 113extern int jump_label_text_reserved(void *start, void *end);
120extern void static_key_slow_inc(struct static_key *key); 114extern void static_key_slow_inc(struct static_key *key);
121extern void static_key_slow_dec(struct static_key *key); 115extern void static_key_slow_dec(struct static_key *key);
122extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
123extern void jump_label_apply_nops(struct module *mod); 116extern void jump_label_apply_nops(struct module *mod);
124extern void
125jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
126 117
127#define STATIC_KEY_INIT_TRUE ((struct static_key) \ 118#define STATIC_KEY_INIT_TRUE ((struct static_key) \
128 { .enabled = ATOMIC_INIT(1), .entries = (void *)1 }) 119 { .enabled = ATOMIC_INIT(1), .entries = (void *)1 })
@@ -131,8 +122,6 @@ jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
131 122
132#else /* !HAVE_JUMP_LABEL */ 123#else /* !HAVE_JUMP_LABEL */
133 124
134#include <linux/atomic.h>
135
136struct static_key { 125struct static_key {
137 atomic_t enabled; 126 atomic_t enabled;
138}; 127};
@@ -141,10 +130,6 @@ static __always_inline void jump_label_init(void)
141{ 130{
142} 131}
143 132
144struct static_key_deferred {
145 struct static_key key;
146};
147
148static __always_inline bool static_key_false(struct static_key *key) 133static __always_inline bool static_key_false(struct static_key *key)
149{ 134{
150 if (unlikely(atomic_read(&key->enabled)) > 0) 135 if (unlikely(atomic_read(&key->enabled)) > 0)
@@ -169,11 +154,6 @@ static inline void static_key_slow_dec(struct static_key *key)
169 atomic_dec(&key->enabled); 154 atomic_dec(&key->enabled);
170} 155}
171 156
172static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
173{
174 static_key_slow_dec(&key->key);
175}
176
177static inline int jump_label_text_reserved(void *start, void *end) 157static inline int jump_label_text_reserved(void *start, void *end)
178{ 158{
179 return 0; 159 return 0;
@@ -187,12 +167,6 @@ static inline int jump_label_apply_nops(struct module *mod)
187 return 0; 167 return 0;
188} 168}
189 169
190static inline void
191jump_label_rate_limit(struct static_key_deferred *key,
192 unsigned long rl)
193{
194}
195
196#define STATIC_KEY_INIT_TRUE ((struct static_key) \ 170#define STATIC_KEY_INIT_TRUE ((struct static_key) \
197 { .enabled = ATOMIC_INIT(1) }) 171 { .enabled = ATOMIC_INIT(1) })
198#define STATIC_KEY_INIT_FALSE ((struct static_key) \ 172#define STATIC_KEY_INIT_FALSE ((struct static_key) \
diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
new file mode 100644
index 000000000000..113788389b3d
--- /dev/null
+++ b/include/linux/jump_label_ratelimit.h
@@ -0,0 +1,34 @@
1#ifndef _LINUX_JUMP_LABEL_RATELIMIT_H
2#define _LINUX_JUMP_LABEL_RATELIMIT_H
3
4#include <linux/jump_label.h>
5#include <linux/workqueue.h>
6
7#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
8struct static_key_deferred {
9 struct static_key key;
10 unsigned long timeout;
11 struct delayed_work work;
12};
13#endif
14
15#ifdef HAVE_JUMP_LABEL
16extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
17extern void
18jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
19
20#else /* !HAVE_JUMP_LABEL */
21struct static_key_deferred {
22 struct static_key key;
23};
24static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
25{
26 static_key_slow_dec(&key->key);
27}
28static inline void
29jump_label_rate_limit(struct static_key_deferred *key,
30 unsigned long rl)
31{
32}
33#endif /* HAVE_JUMP_LABEL */
34#endif /* _LINUX_JUMP_LABEL_RATELIMIT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c43f6eabad5b..226be8da3f85 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -48,6 +48,7 @@ struct perf_guest_info_callbacks {
48#include <linux/cpu.h> 48#include <linux/cpu.h>
49#include <linux/irq_work.h> 49#include <linux/irq_work.h>
50#include <linux/static_key.h> 50#include <linux/static_key.h>
51#include <linux/jump_label_ratelimit.h>
51#include <linux/atomic.h> 52#include <linux/atomic.h>
52#include <linux/sysfs.h> 53#include <linux/sysfs.h>
53#include <linux/perf_regs.h> 54#include <linux/perf_regs.h>
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index cea2c5c72d26..2841f86eae0b 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -19,6 +19,7 @@
19#define KVM_HC_MMU_OP 2 19#define KVM_HC_MMU_OP 2
20#define KVM_HC_FEATURES 3 20#define KVM_HC_FEATURES 3
21#define KVM_HC_PPC_MAP_MAGIC_PAGE 4 21#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
22#define KVM_HC_KICK_CPU 5
22 23
23/* 24/*
24 * hypercalls use architecture specific 25 * hypercalls use architecture specific
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index cc2e1a7e44ec..a4c1c6a93691 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -29,6 +29,9 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages,
29 bool highmem); 29 bool highmem);
30void free_xenballooned_pages(int nr_pages, struct page **pages); 30void free_xenballooned_pages(int nr_pages, struct page **pages);
31 31
32struct page *get_balloon_scratch_page(void);
33void put_balloon_scratch_page(void);
34
32struct device; 35struct device;
33#ifdef CONFIG_XEN_SELFBALLOONING 36#ifdef CONFIG_XEN_SELFBALLOONING
34extern int register_xen_selfballooning(struct device *dev); 37extern int register_xen_selfballooning(struct device *dev);
diff --git a/include/xen/interface/io/tpmif.h b/include/xen/interface/io/tpmif.h
new file mode 100644
index 000000000000..28e7dcd75e82
--- /dev/null
+++ b/include/xen/interface/io/tpmif.h
@@ -0,0 +1,52 @@
1/******************************************************************************
2 * tpmif.h
3 *
4 * TPM I/O interface for Xen guest OSes, v2
5 *
6 * This file is in the public domain.
7 *
8 */
9
10#ifndef __XEN_PUBLIC_IO_TPMIF_H__
11#define __XEN_PUBLIC_IO_TPMIF_H__
12
13/*
14 * Xenbus state machine
15 *
16 * Device open:
17 * 1. Both ends start in XenbusStateInitialising
18 * 2. Backend transitions to InitWait (frontend does not wait on this step)
19 * 3. Frontend populates ring-ref, event-channel, feature-protocol-v2
20 * 4. Frontend transitions to Initialised
21 * 5. Backend maps grant and event channel, verifies feature-protocol-v2
22 * 6. Backend transitions to Connected
23 * 7. Frontend verifies feature-protocol-v2, transitions to Connected
24 *
25 * Device close:
26 * 1. State is changed to XenbusStateClosing
27 * 2. Frontend transitions to Closed
28 * 3. Backend unmaps grant and event, changes state to InitWait
29 */
30
31enum vtpm_shared_page_state {
32 VTPM_STATE_IDLE, /* no contents / vTPM idle / cancel complete */
33 VTPM_STATE_SUBMIT, /* request ready / vTPM working */
34 VTPM_STATE_FINISH, /* response ready / vTPM idle */
35 VTPM_STATE_CANCEL, /* cancel requested / vTPM working */
36};
37/* The backend should only change state to IDLE or FINISH, while the
38 * frontend should only change to SUBMIT or CANCEL. */
39
40
41struct vtpm_shared_page {
42 uint32_t length; /* request/response length in bytes */
43
44 uint8_t state; /* enum vtpm_shared_page_state */
45 uint8_t locality; /* for the current request */
46 uint8_t pad;
47
48 uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */
49 uint32_t extra_pages[0]; /* grant IDs; length in nr_extra_pages */
50};
51
52#endif
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index 87e6f8a48661..b05288ce3991 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -170,4 +170,6 @@ struct vcpu_register_vcpu_info {
170}; 170};
171DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); 171DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
172 172
173/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
174#define VCPUOP_send_nmi 11
173#endif /* __XEN_PUBLIC_VCPU_H__ */ 175#endif /* __XEN_PUBLIC_VCPU_H__ */
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 60f48fa0fd0d..297a9247a3b3 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -13,6 +13,7 @@
13#include <linux/sort.h> 13#include <linux/sort.h>
14#include <linux/err.h> 14#include <linux/err.h>
15#include <linux/static_key.h> 15#include <linux/static_key.h>
16#include <linux/jump_label_ratelimit.h>
16 17
17#ifdef HAVE_JUMP_LABEL 18#ifdef HAVE_JUMP_LABEL
18 19
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d23762e6652c..4e8686c7e5a4 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -870,13 +870,13 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
870 swiotlb_full(hwdev, sg->length, dir, 0); 870 swiotlb_full(hwdev, sg->length, dir, 0);
871 swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, 871 swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
872 attrs); 872 attrs);
873 sgl[0].dma_length = 0; 873 sg_dma_len(sgl) = 0;
874 return 0; 874 return 0;
875 } 875 }
876 sg->dma_address = phys_to_dma(hwdev, map); 876 sg->dma_address = phys_to_dma(hwdev, map);
877 } else 877 } else
878 sg->dma_address = dev_addr; 878 sg->dma_address = dev_addr;
879 sg->dma_length = sg->length; 879 sg_dma_len(sg) = sg->length;
880 } 880 }
881 return nelems; 881 return nelems;
882} 882}
@@ -904,7 +904,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
904 BUG_ON(dir == DMA_NONE); 904 BUG_ON(dir == DMA_NONE);
905 905
906 for_each_sg(sgl, sg, nelems, i) 906 for_each_sg(sgl, sg, nelems, i)
907 unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); 907 unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
908 908
909} 909}
910EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); 910EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
@@ -934,7 +934,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
934 934
935 for_each_sg(sgl, sg, nelems, i) 935 for_each_sg(sgl, sg, nelems, i)
936 swiotlb_sync_single(hwdev, sg->dma_address, 936 swiotlb_sync_single(hwdev, sg->dma_address,
937 sg->dma_length, dir, target); 937 sg_dma_len(sg), dir, target);
938} 938}
939 939
940void 940void