diff options
41 files changed, 1645 insertions, 571 deletions
@@ -1120,6 +1120,7 @@ D: author of userfs filesystem | |||
1120 | D: Improved mmap and munmap handling | 1120 | D: Improved mmap and munmap handling |
1121 | D: General mm minor tidyups | 1121 | D: General mm minor tidyups |
1122 | D: autofs v4 maintainer | 1122 | D: autofs v4 maintainer |
1123 | D: Xen subsystem | ||
1123 | S: 987 Alabama St | 1124 | S: 987 Alabama St |
1124 | S: San Francisco | 1125 | S: San Francisco |
1125 | S: CA, 94110 | 1126 | S: CA, 94110 |
diff --git a/Documentation/tpm/xen-tpmfront.txt b/Documentation/tpm/xen-tpmfront.txt new file mode 100644 index 000000000000..69346de87ff3 --- /dev/null +++ b/Documentation/tpm/xen-tpmfront.txt | |||
@@ -0,0 +1,113 @@ | |||
1 | Virtual TPM interface for Xen | ||
2 | |||
3 | Authors: Matthew Fioravante (JHUAPL), Daniel De Graaf (NSA) | ||
4 | |||
5 | This document describes the virtual Trusted Platform Module (vTPM) subsystem for | ||
6 | Xen. The reader is assumed to have familiarity with building and installing Xen, | ||
7 | Linux, and a basic understanding of the TPM and vTPM concepts. | ||
8 | |||
9 | INTRODUCTION | ||
10 | |||
11 | The goal of this work is to provide a TPM functionality to a virtual guest | ||
12 | operating system (in Xen terms, a DomU). This allows programs to interact with | ||
13 | a TPM in a virtual system the same way they interact with a TPM on the physical | ||
14 | system. Each guest gets its own unique, emulated, software TPM. However, each | ||
15 | of the vTPM's secrets (Keys, NVRAM, etc) are managed by a vTPM Manager domain, | ||
16 | which seals the secrets to the Physical TPM. If the process of creating each of | ||
17 | these domains (manager, vTPM, and guest) is trusted, the vTPM subsystem extends | ||
18 | the chain of trust rooted in the hardware TPM to virtual machines in Xen. Each | ||
19 | major component of vTPM is implemented as a separate domain, providing secure | ||
20 | separation guaranteed by the hypervisor. The vTPM domains are implemented in | ||
21 | mini-os to reduce memory and processor overhead. | ||
22 | |||
23 | This mini-os vTPM subsystem was built on top of the previous vTPM work done by | ||
24 | IBM and Intel corporation. | ||
25 | |||
26 | |||
27 | DESIGN OVERVIEW | ||
28 | --------------- | ||
29 | |||
30 | The architecture of vTPM is described below: | ||
31 | |||
32 | +------------------+ | ||
33 | | Linux DomU | ... | ||
34 | | | ^ | | ||
35 | | v | | | ||
36 | | xen-tpmfront | | ||
37 | +------------------+ | ||
38 | | ^ | ||
39 | v | | ||
40 | +------------------+ | ||
41 | | mini-os/tpmback | | ||
42 | | | ^ | | ||
43 | | v | | | ||
44 | | vtpm-stubdom | ... | ||
45 | | | ^ | | ||
46 | | v | | | ||
47 | | mini-os/tpmfront | | ||
48 | +------------------+ | ||
49 | | ^ | ||
50 | v | | ||
51 | +------------------+ | ||
52 | | mini-os/tpmback | | ||
53 | | | ^ | | ||
54 | | v | | | ||
55 | | vtpmmgr-stubdom | | ||
56 | | | ^ | | ||
57 | | v | | | ||
58 | | mini-os/tpm_tis | | ||
59 | +------------------+ | ||
60 | | ^ | ||
61 | v | | ||
62 | +------------------+ | ||
63 | | Hardware TPM | | ||
64 | +------------------+ | ||
65 | |||
66 | * Linux DomU: The Linux based guest that wants to use a vTPM. There may be | ||
67 | more than one of these. | ||
68 | |||
69 | * xen-tpmfront.ko: Linux kernel virtual TPM frontend driver. This driver | ||
70 | provides vTPM access to a Linux-based DomU. | ||
71 | |||
72 | * mini-os/tpmback: Mini-os TPM backend driver. The Linux frontend driver | ||
73 | connects to this backend driver to facilitate communications | ||
74 | between the Linux DomU and its vTPM. This driver is also | ||
75 | used by vtpmmgr-stubdom to communicate with vtpm-stubdom. | ||
76 | |||
77 | * vtpm-stubdom: A mini-os stub domain that implements a vTPM. There is a | ||
78 | one to one mapping between running vtpm-stubdom instances and | ||
79 | logical vtpms on the system. The vTPM Platform Configuration | ||
80 | Registers (PCRs) are normally all initialized to zero. | ||
81 | |||
82 | * mini-os/tpmfront: Mini-os TPM frontend driver. The vTPM mini-os domain | ||
83 | vtpm-stubdom uses this driver to communicate with | ||
84 | vtpmmgr-stubdom. This driver is also used in mini-os | ||
85 | domains such as pv-grub that talk to the vTPM domain. | ||
86 | |||
87 | * vtpmmgr-stubdom: A mini-os domain that implements the vTPM manager. There is | ||
88 | only one vTPM manager and it should be running during the | ||
89 | entire lifetime of the machine. This domain regulates | ||
90 | access to the physical TPM on the system and secures the | ||
91 | persistent state of each vTPM. | ||
92 | |||
93 | * mini-os/tpm_tis: Mini-os TPM version 1.2 TPM Interface Specification (TIS) | ||
94 | driver. This driver used by vtpmmgr-stubdom to talk directly to | ||
95 | the hardware TPM. Communication is facilitated by mapping | ||
96 | hardware memory pages into vtpmmgr-stubdom. | ||
97 | |||
98 | * Hardware TPM: The physical TPM that is soldered onto the motherboard. | ||
99 | |||
100 | |||
101 | INTEGRATION WITH XEN | ||
102 | -------------------- | ||
103 | |||
104 | Support for the vTPM driver was added in Xen using the libxl toolstack in Xen | ||
105 | 4.3. See the Xen documentation (docs/misc/vtpm.txt) for details on setting up | ||
106 | the vTPM and vTPM Manager stub domains. Once the stub domains are running, a | ||
107 | vTPM device is set up in the same manner as a disk or network device in the | ||
108 | domain's configuration file. | ||
109 | |||
110 | In order to use features such as IMA that require a TPM to be loaded prior to | ||
111 | the initrd, the xen-tpmfront driver must be compiled in to the kernel. If not | ||
112 | using such features, the driver can be compiled as a module and will be loaded | ||
113 | as usual. | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 8197fbd70a3e..94aa87dc6d2a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -9247,9 +9247,9 @@ F: drivers/media/tuners/tuner-xc2028.* | |||
9247 | 9247 | ||
9248 | XEN HYPERVISOR INTERFACE | 9248 | XEN HYPERVISOR INTERFACE |
9249 | M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 9249 | M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> |
9250 | M: Jeremy Fitzhardinge <jeremy@goop.org> | 9250 | M: Boris Ostrovsky <boris.ostrovsky@oracle.com> |
9251 | L: xen-devel@lists.xensource.com (moderated for non-subscribers) | 9251 | M: David Vrabel <david.vrabel@citrix.com> |
9252 | L: virtualization@lists.linux-foundation.org | 9252 | L: xen-devel@lists.xenproject.org (moderated for non-subscribers) |
9253 | S: Supported | 9253 | S: Supported |
9254 | F: arch/x86/xen/ | 9254 | F: arch/x86/xen/ |
9255 | F: drivers/*/xen-*front.c | 9255 | F: drivers/*/xen-*front.c |
@@ -9260,35 +9260,35 @@ F: include/uapi/xen/ | |||
9260 | 9260 | ||
9261 | XEN HYPERVISOR ARM | 9261 | XEN HYPERVISOR ARM |
9262 | M: Stefano Stabellini <stefano.stabellini@eu.citrix.com> | 9262 | M: Stefano Stabellini <stefano.stabellini@eu.citrix.com> |
9263 | L: xen-devel@lists.xensource.com (moderated for non-subscribers) | 9263 | L: xen-devel@lists.xenproject.org (moderated for non-subscribers) |
9264 | S: Supported | 9264 | S: Supported |
9265 | F: arch/arm/xen/ | 9265 | F: arch/arm/xen/ |
9266 | F: arch/arm/include/asm/xen/ | 9266 | F: arch/arm/include/asm/xen/ |
9267 | 9267 | ||
9268 | XEN HYPERVISOR ARM64 | 9268 | XEN HYPERVISOR ARM64 |
9269 | M: Stefano Stabellini <stefano.stabellini@eu.citrix.com> | 9269 | M: Stefano Stabellini <stefano.stabellini@eu.citrix.com> |
9270 | L: xen-devel@lists.xensource.com (moderated for non-subscribers) | 9270 | L: xen-devel@lists.xenproject.org (moderated for non-subscribers) |
9271 | S: Supported | 9271 | S: Supported |
9272 | F: arch/arm64/xen/ | 9272 | F: arch/arm64/xen/ |
9273 | F: arch/arm64/include/asm/xen/ | 9273 | F: arch/arm64/include/asm/xen/ |
9274 | 9274 | ||
9275 | XEN NETWORK BACKEND DRIVER | 9275 | XEN NETWORK BACKEND DRIVER |
9276 | M: Ian Campbell <ian.campbell@citrix.com> | 9276 | M: Ian Campbell <ian.campbell@citrix.com> |
9277 | L: xen-devel@lists.xensource.com (moderated for non-subscribers) | 9277 | L: xen-devel@lists.xenproject.org (moderated for non-subscribers) |
9278 | L: netdev@vger.kernel.org | 9278 | L: netdev@vger.kernel.org |
9279 | S: Supported | 9279 | S: Supported |
9280 | F: drivers/net/xen-netback/* | 9280 | F: drivers/net/xen-netback/* |
9281 | 9281 | ||
9282 | XEN PCI SUBSYSTEM | 9282 | XEN PCI SUBSYSTEM |
9283 | M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 9283 | M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> |
9284 | L: xen-devel@lists.xensource.com (moderated for non-subscribers) | 9284 | L: xen-devel@lists.xenproject.org (moderated for non-subscribers) |
9285 | S: Supported | 9285 | S: Supported |
9286 | F: arch/x86/pci/*xen* | 9286 | F: arch/x86/pci/*xen* |
9287 | F: drivers/pci/*xen* | 9287 | F: drivers/pci/*xen* |
9288 | 9288 | ||
9289 | XEN SWIOTLB SUBSYSTEM | 9289 | XEN SWIOTLB SUBSYSTEM |
9290 | M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 9290 | M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> |
9291 | L: xen-devel@lists.xensource.com (moderated for non-subscribers) | 9291 | L: xen-devel@lists.xenproject.org (moderated for non-subscribers) |
9292 | S: Supported | 9292 | S: Supported |
9293 | F: arch/x86/xen/*swiotlb* | 9293 | F: arch/x86/xen/*swiotlb* |
9294 | F: drivers/xen/*swiotlb* | 9294 | F: drivers/xen/*swiotlb* |
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 8a6295c86209..83e4f959ee47 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include <linux/of.h> | 21 | #include <linux/of.h> |
22 | #include <linux/of_irq.h> | 22 | #include <linux/of_irq.h> |
23 | #include <linux/of_address.h> | 23 | #include <linux/of_address.h> |
24 | #include <linux/cpuidle.h> | ||
25 | #include <linux/cpufreq.h> | ||
24 | 26 | ||
25 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
26 | 28 | ||
@@ -267,18 +269,28 @@ static int __init xen_guest_init(void) | |||
267 | if (!xen_initial_domain()) | 269 | if (!xen_initial_domain()) |
268 | xenbus_probe(NULL); | 270 | xenbus_probe(NULL); |
269 | 271 | ||
272 | /* | ||
273 | * Making sure board specific code will not set up ops for | ||
274 | * cpu idle and cpu freq. | ||
275 | */ | ||
276 | disable_cpuidle(); | ||
277 | disable_cpufreq(); | ||
278 | |||
270 | return 0; | 279 | return 0; |
271 | } | 280 | } |
272 | core_initcall(xen_guest_init); | 281 | core_initcall(xen_guest_init); |
273 | 282 | ||
274 | static int __init xen_pm_init(void) | 283 | static int __init xen_pm_init(void) |
275 | { | 284 | { |
285 | if (!xen_domain()) | ||
286 | return -ENODEV; | ||
287 | |||
276 | pm_power_off = xen_power_off; | 288 | pm_power_off = xen_power_off; |
277 | arm_pm_restart = xen_restart; | 289 | arm_pm_restart = xen_restart; |
278 | 290 | ||
279 | return 0; | 291 | return 0; |
280 | } | 292 | } |
281 | subsys_initcall(xen_pm_init); | 293 | late_initcall(xen_pm_init); |
282 | 294 | ||
283 | static irqreturn_t xen_arm_callback(int irq, void *arg) | 295 | static irqreturn_t xen_arm_callback(int irq, void *arg) |
284 | { | 296 | { |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b32ebf92b0ce..b1fb846e6dac 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -632,6 +632,7 @@ config PARAVIRT_DEBUG | |||
632 | config PARAVIRT_SPINLOCKS | 632 | config PARAVIRT_SPINLOCKS |
633 | bool "Paravirtualization layer for spinlocks" | 633 | bool "Paravirtualization layer for spinlocks" |
634 | depends on PARAVIRT && SMP | 634 | depends on PARAVIRT && SMP |
635 | select UNINLINE_SPIN_UNLOCK | ||
635 | ---help--- | 636 | ---help--- |
636 | Paravirtualized spinlocks allow a pvops backend to replace the | 637 | Paravirtualized spinlocks allow a pvops backend to replace the |
637 | spinlock implementation with something virtualization-friendly | 638 | spinlock implementation with something virtualization-friendly |
@@ -656,6 +657,15 @@ config KVM_GUEST | |||
656 | underlying device model, the host provides the guest with | 657 | underlying device model, the host provides the guest with |
657 | timing infrastructure such as time of day, and system time | 658 | timing infrastructure such as time of day, and system time |
658 | 659 | ||
660 | config KVM_DEBUG_FS | ||
661 | bool "Enable debug information for KVM Guests in debugfs" | ||
662 | depends on KVM_GUEST && DEBUG_FS | ||
663 | default n | ||
664 | ---help--- | ||
665 | This option enables collection of various statistics for KVM guest. | ||
666 | Statistics are displayed in debugfs filesystem. Enabling this option | ||
667 | may incur significant overhead. | ||
668 | |||
659 | source "arch/x86/lguest/Kconfig" | 669 | source "arch/x86/lguest/Kconfig" |
660 | 670 | ||
661 | config PARAVIRT_TIME_ACCOUNTING | 671 | config PARAVIRT_TIME_ACCOUNTING |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 695399f2d5eb..427afcbf3d55 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -118,10 +118,20 @@ void kvm_async_pf_task_wait(u32 token); | |||
118 | void kvm_async_pf_task_wake(u32 token); | 118 | void kvm_async_pf_task_wake(u32 token); |
119 | u32 kvm_read_and_reset_pf_reason(void); | 119 | u32 kvm_read_and_reset_pf_reason(void); |
120 | extern void kvm_disable_steal_time(void); | 120 | extern void kvm_disable_steal_time(void); |
121 | #else | 121 | |
122 | #define kvm_guest_init() do { } while (0) | 122 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
123 | void __init kvm_spinlock_init(void); | ||
124 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ | ||
125 | static inline void kvm_spinlock_init(void) | ||
126 | { | ||
127 | } | ||
128 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
129 | |||
130 | #else /* CONFIG_KVM_GUEST */ | ||
131 | #define kvm_guest_init() do {} while (0) | ||
123 | #define kvm_async_pf_task_wait(T) do {} while(0) | 132 | #define kvm_async_pf_task_wait(T) do {} while(0) |
124 | #define kvm_async_pf_task_wake(T) do {} while(0) | 133 | #define kvm_async_pf_task_wake(T) do {} while(0) |
134 | |||
125 | static inline u32 kvm_read_and_reset_pf_reason(void) | 135 | static inline u32 kvm_read_and_reset_pf_reason(void) |
126 | { | 136 | { |
127 | return 0; | 137 | return 0; |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index cfdc9ee4c900..401f350ef71b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | |||
712 | 712 | ||
713 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) | 713 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) |
714 | 714 | ||
715 | static inline int arch_spin_is_locked(struct arch_spinlock *lock) | 715 | static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, |
716 | __ticket_t ticket) | ||
716 | { | 717 | { |
717 | return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); | 718 | PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket); |
718 | } | 719 | } |
719 | 720 | ||
720 | static inline int arch_spin_is_contended(struct arch_spinlock *lock) | 721 | static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, |
722 | __ticket_t ticket) | ||
721 | { | 723 | { |
722 | return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); | 724 | PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); |
723 | } | ||
724 | #define arch_spin_is_contended arch_spin_is_contended | ||
725 | |||
726 | static __always_inline void arch_spin_lock(struct arch_spinlock *lock) | ||
727 | { | ||
728 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); | ||
729 | } | ||
730 | |||
731 | static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock, | ||
732 | unsigned long flags) | ||
733 | { | ||
734 | PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); | ||
735 | } | ||
736 | |||
737 | static __always_inline int arch_spin_trylock(struct arch_spinlock *lock) | ||
738 | { | ||
739 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); | ||
740 | } | ||
741 | |||
742 | static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) | ||
743 | { | ||
744 | PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); | ||
745 | } | 725 | } |
746 | 726 | ||
747 | #endif | 727 | #endif |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0db1fcac668c..04ac40e192eb 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -327,13 +327,15 @@ struct pv_mmu_ops { | |||
327 | }; | 327 | }; |
328 | 328 | ||
329 | struct arch_spinlock; | 329 | struct arch_spinlock; |
330 | #ifdef CONFIG_SMP | ||
331 | #include <asm/spinlock_types.h> | ||
332 | #else | ||
333 | typedef u16 __ticket_t; | ||
334 | #endif | ||
335 | |||
330 | struct pv_lock_ops { | 336 | struct pv_lock_ops { |
331 | int (*spin_is_locked)(struct arch_spinlock *lock); | 337 | struct paravirt_callee_save lock_spinning; |
332 | int (*spin_is_contended)(struct arch_spinlock *lock); | 338 | void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); |
333 | void (*spin_lock)(struct arch_spinlock *lock); | ||
334 | void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags); | ||
335 | int (*spin_trylock)(struct arch_spinlock *lock); | ||
336 | void (*spin_unlock)(struct arch_spinlock *lock); | ||
337 | }; | 339 | }; |
338 | 340 | ||
339 | /* This contains all the paravirt structures: we get a convenient | 341 | /* This contains all the paravirt structures: we get a convenient |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index e3ddd7db723f..8963bfeea82a 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -1,11 +1,14 @@ | |||
1 | #ifndef _ASM_X86_SPINLOCK_H | 1 | #ifndef _ASM_X86_SPINLOCK_H |
2 | #define _ASM_X86_SPINLOCK_H | 2 | #define _ASM_X86_SPINLOCK_H |
3 | 3 | ||
4 | #include <linux/jump_label.h> | ||
4 | #include <linux/atomic.h> | 5 | #include <linux/atomic.h> |
5 | #include <asm/page.h> | 6 | #include <asm/page.h> |
6 | #include <asm/processor.h> | 7 | #include <asm/processor.h> |
7 | #include <linux/compiler.h> | 8 | #include <linux/compiler.h> |
8 | #include <asm/paravirt.h> | 9 | #include <asm/paravirt.h> |
10 | #include <asm/bitops.h> | ||
11 | |||
9 | /* | 12 | /* |
10 | * Your basic SMP spinlocks, allowing only a single CPU anywhere | 13 | * Your basic SMP spinlocks, allowing only a single CPU anywhere |
11 | * | 14 | * |
@@ -34,6 +37,31 @@ | |||
34 | # define UNLOCK_LOCK_PREFIX | 37 | # define UNLOCK_LOCK_PREFIX |
35 | #endif | 38 | #endif |
36 | 39 | ||
40 | /* How long a lock should spin before we consider blocking */ | ||
41 | #define SPIN_THRESHOLD (1 << 15) | ||
42 | |||
43 | extern struct static_key paravirt_ticketlocks_enabled; | ||
44 | static __always_inline bool static_key_false(struct static_key *key); | ||
45 | |||
46 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
47 | |||
48 | static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) | ||
49 | { | ||
50 | set_bit(0, (volatile unsigned long *)&lock->tickets.tail); | ||
51 | } | ||
52 | |||
53 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ | ||
54 | static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock, | ||
55 | __ticket_t ticket) | ||
56 | { | ||
57 | } | ||
58 | static inline void __ticket_unlock_kick(arch_spinlock_t *lock, | ||
59 | __ticket_t ticket) | ||
60 | { | ||
61 | } | ||
62 | |||
63 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
64 | |||
37 | /* | 65 | /* |
38 | * Ticket locks are conceptually two parts, one indicating the current head of | 66 | * Ticket locks are conceptually two parts, one indicating the current head of |
39 | * the queue, and the other indicating the current tail. The lock is acquired | 67 | * the queue, and the other indicating the current tail. The lock is acquired |
@@ -47,81 +75,101 @@ | |||
47 | * in the high part, because a wide xadd increment of the low part would carry | 75 | * in the high part, because a wide xadd increment of the low part would carry |
48 | * up and contaminate the high part. | 76 | * up and contaminate the high part. |
49 | */ | 77 | */ |
50 | static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) | 78 | static __always_inline void arch_spin_lock(arch_spinlock_t *lock) |
51 | { | 79 | { |
52 | register struct __raw_tickets inc = { .tail = 1 }; | 80 | register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC }; |
53 | 81 | ||
54 | inc = xadd(&lock->tickets, inc); | 82 | inc = xadd(&lock->tickets, inc); |
83 | if (likely(inc.head == inc.tail)) | ||
84 | goto out; | ||
55 | 85 | ||
86 | inc.tail &= ~TICKET_SLOWPATH_FLAG; | ||
56 | for (;;) { | 87 | for (;;) { |
57 | if (inc.head == inc.tail) | 88 | unsigned count = SPIN_THRESHOLD; |
58 | break; | 89 | |
59 | cpu_relax(); | 90 | do { |
60 | inc.head = ACCESS_ONCE(lock->tickets.head); | 91 | if (ACCESS_ONCE(lock->tickets.head) == inc.tail) |
92 | goto out; | ||
93 | cpu_relax(); | ||
94 | } while (--count); | ||
95 | __ticket_lock_spinning(lock, inc.tail); | ||
61 | } | 96 | } |
62 | barrier(); /* make sure nothing creeps before the lock is taken */ | 97 | out: barrier(); /* make sure nothing creeps before the lock is taken */ |
63 | } | 98 | } |
64 | 99 | ||
65 | static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) | 100 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) |
66 | { | 101 | { |
67 | arch_spinlock_t old, new; | 102 | arch_spinlock_t old, new; |
68 | 103 | ||
69 | old.tickets = ACCESS_ONCE(lock->tickets); | 104 | old.tickets = ACCESS_ONCE(lock->tickets); |
70 | if (old.tickets.head != old.tickets.tail) | 105 | if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) |
71 | return 0; | 106 | return 0; |
72 | 107 | ||
73 | new.head_tail = old.head_tail + (1 << TICKET_SHIFT); | 108 | new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); |
74 | 109 | ||
75 | /* cmpxchg is a full barrier, so nothing can move before it */ | 110 | /* cmpxchg is a full barrier, so nothing can move before it */ |
76 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; | 111 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; |
77 | } | 112 | } |
78 | 113 | ||
79 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | 114 | static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock, |
115 | arch_spinlock_t old) | ||
80 | { | 116 | { |
81 | __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); | 117 | arch_spinlock_t new; |
118 | |||
119 | BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS); | ||
120 | |||
121 | /* Perform the unlock on the "before" copy */ | ||
122 | old.tickets.head += TICKET_LOCK_INC; | ||
123 | |||
124 | /* Clear the slowpath flag */ | ||
125 | new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT); | ||
126 | |||
127 | /* | ||
128 | * If the lock is uncontended, clear the flag - use cmpxchg in | ||
129 | * case it changes behind our back though. | ||
130 | */ | ||
131 | if (new.tickets.head != new.tickets.tail || | ||
132 | cmpxchg(&lock->head_tail, old.head_tail, | ||
133 | new.head_tail) != old.head_tail) { | ||
134 | /* | ||
135 | * Lock still has someone queued for it, so wake up an | ||
136 | * appropriate waiter. | ||
137 | */ | ||
138 | __ticket_unlock_kick(lock, old.tickets.head); | ||
139 | } | ||
82 | } | 140 | } |
83 | 141 | ||
84 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) | 142 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) |
85 | { | 143 | { |
86 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | 144 | if (TICKET_SLOWPATH_FLAG && |
145 | static_key_false(¶virt_ticketlocks_enabled)) { | ||
146 | arch_spinlock_t prev; | ||
87 | 147 | ||
88 | return tmp.tail != tmp.head; | 148 | prev = *lock; |
89 | } | 149 | add_smp(&lock->tickets.head, TICKET_LOCK_INC); |
90 | 150 | ||
91 | static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) | 151 | /* add_smp() is a full mb() */ |
92 | { | ||
93 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | ||
94 | 152 | ||
95 | return (__ticket_t)(tmp.tail - tmp.head) > 1; | 153 | if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) |
154 | __ticket_unlock_slowpath(lock, prev); | ||
155 | } else | ||
156 | __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); | ||
96 | } | 157 | } |
97 | 158 | ||
98 | #ifndef CONFIG_PARAVIRT_SPINLOCKS | ||
99 | |||
100 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) | 159 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) |
101 | { | 160 | { |
102 | return __ticket_spin_is_locked(lock); | 161 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); |
103 | } | ||
104 | |||
105 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) | ||
106 | { | ||
107 | return __ticket_spin_is_contended(lock); | ||
108 | } | ||
109 | #define arch_spin_is_contended arch_spin_is_contended | ||
110 | 162 | ||
111 | static __always_inline void arch_spin_lock(arch_spinlock_t *lock) | 163 | return tmp.tail != tmp.head; |
112 | { | ||
113 | __ticket_spin_lock(lock); | ||
114 | } | 164 | } |
115 | 165 | ||
116 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) | 166 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) |
117 | { | 167 | { |
118 | return __ticket_spin_trylock(lock); | 168 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); |
119 | } | ||
120 | 169 | ||
121 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) | 170 | return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; |
122 | { | ||
123 | __ticket_spin_unlock(lock); | ||
124 | } | 171 | } |
172 | #define arch_spin_is_contended arch_spin_is_contended | ||
125 | 173 | ||
126 | static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | 174 | static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, |
127 | unsigned long flags) | 175 | unsigned long flags) |
@@ -129,8 +177,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | |||
129 | arch_spin_lock(lock); | 177 | arch_spin_lock(lock); |
130 | } | 178 | } |
131 | 179 | ||
132 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
133 | |||
134 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | 180 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) |
135 | { | 181 | { |
136 | while (arch_spin_is_locked(lock)) | 182 | while (arch_spin_is_locked(lock)) |
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index ad0ad07fc006..4f1bea19945b 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h | |||
@@ -1,13 +1,17 @@ | |||
1 | #ifndef _ASM_X86_SPINLOCK_TYPES_H | 1 | #ifndef _ASM_X86_SPINLOCK_TYPES_H |
2 | #define _ASM_X86_SPINLOCK_TYPES_H | 2 | #define _ASM_X86_SPINLOCK_TYPES_H |
3 | 3 | ||
4 | #ifndef __LINUX_SPINLOCK_TYPES_H | ||
5 | # error "please don't include this file directly" | ||
6 | #endif | ||
7 | |||
8 | #include <linux/types.h> | 4 | #include <linux/types.h> |
9 | 5 | ||
10 | #if (CONFIG_NR_CPUS < 256) | 6 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
7 | #define __TICKET_LOCK_INC 2 | ||
8 | #define TICKET_SLOWPATH_FLAG ((__ticket_t)1) | ||
9 | #else | ||
10 | #define __TICKET_LOCK_INC 1 | ||
11 | #define TICKET_SLOWPATH_FLAG ((__ticket_t)0) | ||
12 | #endif | ||
13 | |||
14 | #if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC)) | ||
11 | typedef u8 __ticket_t; | 15 | typedef u8 __ticket_t; |
12 | typedef u16 __ticketpair_t; | 16 | typedef u16 __ticketpair_t; |
13 | #else | 17 | #else |
@@ -15,6 +19,8 @@ typedef u16 __ticket_t; | |||
15 | typedef u32 __ticketpair_t; | 19 | typedef u32 __ticketpair_t; |
16 | #endif | 20 | #endif |
17 | 21 | ||
22 | #define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC) | ||
23 | |||
18 | #define TICKET_SHIFT (sizeof(__ticket_t) * 8) | 24 | #define TICKET_SHIFT (sizeof(__ticket_t) * 8) |
19 | 25 | ||
20 | typedef struct arch_spinlock { | 26 | typedef struct arch_spinlock { |
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index ca842f2769ef..608a79d5a466 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h | |||
@@ -7,6 +7,7 @@ enum ipi_vector { | |||
7 | XEN_CALL_FUNCTION_SINGLE_VECTOR, | 7 | XEN_CALL_FUNCTION_SINGLE_VECTOR, |
8 | XEN_SPIN_UNLOCK_VECTOR, | 8 | XEN_SPIN_UNLOCK_VECTOR, |
9 | XEN_IRQ_WORK_VECTOR, | 9 | XEN_IRQ_WORK_VECTOR, |
10 | XEN_NMI_VECTOR, | ||
10 | 11 | ||
11 | XEN_NR_IPIS, | 12 | XEN_NR_IPIS, |
12 | }; | 13 | }; |
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 06fdbd987e97..94dc8ca434e0 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #define KVM_FEATURE_ASYNC_PF 4 | 23 | #define KVM_FEATURE_ASYNC_PF 4 |
24 | #define KVM_FEATURE_STEAL_TIME 5 | 24 | #define KVM_FEATURE_STEAL_TIME 5 |
25 | #define KVM_FEATURE_PV_EOI 6 | 25 | #define KVM_FEATURE_PV_EOI 6 |
26 | #define KVM_FEATURE_PV_UNHALT 7 | ||
26 | 27 | ||
27 | /* The last 8 bits are used to indicate how to interpret the flags field | 28 | /* The last 8 bits are used to indicate how to interpret the flags field |
28 | * in pvclock structure. If no bits are set, all flags are ignored. | 29 | * in pvclock structure. If no bits are set, all flags are ignored. |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a96d32cc55b8..56e2fa4a8b13 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/sched.h> | 34 | #include <linux/sched.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/kprobes.h> | 36 | #include <linux/kprobes.h> |
37 | #include <linux/debugfs.h> | ||
37 | #include <asm/timer.h> | 38 | #include <asm/timer.h> |
38 | #include <asm/cpu.h> | 39 | #include <asm/cpu.h> |
39 | #include <asm/traps.h> | 40 | #include <asm/traps.h> |
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
419 | WARN_ON(kvm_register_clock("primary cpu clock")); | 420 | WARN_ON(kvm_register_clock("primary cpu clock")); |
420 | kvm_guest_cpu_init(); | 421 | kvm_guest_cpu_init(); |
421 | native_smp_prepare_boot_cpu(); | 422 | native_smp_prepare_boot_cpu(); |
423 | kvm_spinlock_init(); | ||
422 | } | 424 | } |
423 | 425 | ||
424 | static void kvm_guest_cpu_online(void *dummy) | 426 | static void kvm_guest_cpu_online(void *dummy) |
@@ -523,3 +525,263 @@ static __init int activate_jump_labels(void) | |||
523 | return 0; | 525 | return 0; |
524 | } | 526 | } |
525 | arch_initcall(activate_jump_labels); | 527 | arch_initcall(activate_jump_labels); |
528 | |||
529 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
530 | |||
531 | /* Kick a cpu by its apicid. Used to wake up a halted vcpu */ | ||
532 | static void kvm_kick_cpu(int cpu) | ||
533 | { | ||
534 | int apicid; | ||
535 | unsigned long flags = 0; | ||
536 | |||
537 | apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
538 | kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); | ||
539 | } | ||
540 | |||
541 | enum kvm_contention_stat { | ||
542 | TAKEN_SLOW, | ||
543 | TAKEN_SLOW_PICKUP, | ||
544 | RELEASED_SLOW, | ||
545 | RELEASED_SLOW_KICKED, | ||
546 | NR_CONTENTION_STATS | ||
547 | }; | ||
548 | |||
549 | #ifdef CONFIG_KVM_DEBUG_FS | ||
550 | #define HISTO_BUCKETS 30 | ||
551 | |||
552 | static struct kvm_spinlock_stats | ||
553 | { | ||
554 | u32 contention_stats[NR_CONTENTION_STATS]; | ||
555 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | ||
556 | u64 time_blocked; | ||
557 | } spinlock_stats; | ||
558 | |||
559 | static u8 zero_stats; | ||
560 | |||
561 | static inline void check_zero(void) | ||
562 | { | ||
563 | u8 ret; | ||
564 | u8 old; | ||
565 | |||
566 | old = ACCESS_ONCE(zero_stats); | ||
567 | if (unlikely(old)) { | ||
568 | ret = cmpxchg(&zero_stats, old, 0); | ||
569 | /* This ensures only one fellow resets the stat */ | ||
570 | if (ret == old) | ||
571 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
572 | } | ||
573 | } | ||
574 | |||
575 | static inline void add_stats(enum kvm_contention_stat var, u32 val) | ||
576 | { | ||
577 | check_zero(); | ||
578 | spinlock_stats.contention_stats[var] += val; | ||
579 | } | ||
580 | |||
581 | |||
582 | static inline u64 spin_time_start(void) | ||
583 | { | ||
584 | return sched_clock(); | ||
585 | } | ||
586 | |||
587 | static void __spin_time_accum(u64 delta, u32 *array) | ||
588 | { | ||
589 | unsigned index; | ||
590 | |||
591 | index = ilog2(delta); | ||
592 | check_zero(); | ||
593 | |||
594 | if (index < HISTO_BUCKETS) | ||
595 | array[index]++; | ||
596 | else | ||
597 | array[HISTO_BUCKETS]++; | ||
598 | } | ||
599 | |||
600 | static inline void spin_time_accum_blocked(u64 start) | ||
601 | { | ||
602 | u32 delta; | ||
603 | |||
604 | delta = sched_clock() - start; | ||
605 | __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); | ||
606 | spinlock_stats.time_blocked += delta; | ||
607 | } | ||
608 | |||
609 | static struct dentry *d_spin_debug; | ||
610 | static struct dentry *d_kvm_debug; | ||
611 | |||
612 | struct dentry *kvm_init_debugfs(void) | ||
613 | { | ||
614 | d_kvm_debug = debugfs_create_dir("kvm", NULL); | ||
615 | if (!d_kvm_debug) | ||
616 | printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); | ||
617 | |||
618 | return d_kvm_debug; | ||
619 | } | ||
620 | |||
621 | static int __init kvm_spinlock_debugfs(void) | ||
622 | { | ||
623 | struct dentry *d_kvm; | ||
624 | |||
625 | d_kvm = kvm_init_debugfs(); | ||
626 | if (d_kvm == NULL) | ||
627 | return -ENOMEM; | ||
628 | |||
629 | d_spin_debug = debugfs_create_dir("spinlocks", d_kvm); | ||
630 | |||
631 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | ||
632 | |||
633 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | ||
634 | &spinlock_stats.contention_stats[TAKEN_SLOW]); | ||
635 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | ||
636 | &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); | ||
637 | |||
638 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | ||
639 | &spinlock_stats.contention_stats[RELEASED_SLOW]); | ||
640 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | ||
641 | &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); | ||
642 | |||
643 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | ||
644 | &spinlock_stats.time_blocked); | ||
645 | |||
646 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | ||
647 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | ||
648 | |||
649 | return 0; | ||
650 | } | ||
651 | fs_initcall(kvm_spinlock_debugfs); | ||
652 | #else /* !CONFIG_KVM_DEBUG_FS */ | ||
653 | static inline void add_stats(enum kvm_contention_stat var, u32 val) | ||
654 | { | ||
655 | } | ||
656 | |||
657 | static inline u64 spin_time_start(void) | ||
658 | { | ||
659 | return 0; | ||
660 | } | ||
661 | |||
662 | static inline void spin_time_accum_blocked(u64 start) | ||
663 | { | ||
664 | } | ||
665 | #endif /* CONFIG_KVM_DEBUG_FS */ | ||
666 | |||
667 | struct kvm_lock_waiting { | ||
668 | struct arch_spinlock *lock; | ||
669 | __ticket_t want; | ||
670 | }; | ||
671 | |||
672 | /* cpus 'waiting' on a spinlock to become available */ | ||
673 | static cpumask_t waiting_cpus; | ||
674 | |||
675 | /* Track spinlock on which a cpu is waiting */ | ||
676 | static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); | ||
677 | |||
678 | static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | ||
679 | { | ||
680 | struct kvm_lock_waiting *w; | ||
681 | int cpu; | ||
682 | u64 start; | ||
683 | unsigned long flags; | ||
684 | |||
685 | if (in_nmi()) | ||
686 | return; | ||
687 | |||
688 | w = &__get_cpu_var(klock_waiting); | ||
689 | cpu = smp_processor_id(); | ||
690 | start = spin_time_start(); | ||
691 | |||
692 | /* | ||
693 | * Make sure an interrupt handler can't upset things in a | ||
694 | * partially setup state. | ||
695 | */ | ||
696 | local_irq_save(flags); | ||
697 | |||
698 | /* | ||
699 | * The ordering protocol on this is that the "lock" pointer | ||
700 | * may only be set non-NULL if the "want" ticket is correct. | ||
701 | * If we're updating "want", we must first clear "lock". | ||
702 | */ | ||
703 | w->lock = NULL; | ||
704 | smp_wmb(); | ||
705 | w->want = want; | ||
706 | smp_wmb(); | ||
707 | w->lock = lock; | ||
708 | |||
709 | add_stats(TAKEN_SLOW, 1); | ||
710 | |||
711 | /* | ||
712 | * This uses set_bit, which is atomic but we should not rely on its | ||
713 | * reordering gurantees. So barrier is needed after this call. | ||
714 | */ | ||
715 | cpumask_set_cpu(cpu, &waiting_cpus); | ||
716 | |||
717 | barrier(); | ||
718 | |||
719 | /* | ||
720 | * Mark entry to slowpath before doing the pickup test to make | ||
721 | * sure we don't deadlock with an unlocker. | ||
722 | */ | ||
723 | __ticket_enter_slowpath(lock); | ||
724 | |||
725 | /* | ||
726 | * check again make sure it didn't become free while | ||
727 | * we weren't looking. | ||
728 | */ | ||
729 | if (ACCESS_ONCE(lock->tickets.head) == want) { | ||
730 | add_stats(TAKEN_SLOW_PICKUP, 1); | ||
731 | goto out; | ||
732 | } | ||
733 | |||
734 | /* | ||
735 | * halt until it's our turn and kicked. Note that we do safe halt | ||
736 | * for irq enabled case to avoid hang when lock info is overwritten | ||
737 | * in irq spinlock slowpath and no spurious interrupt occur to save us. | ||
738 | */ | ||
739 | if (arch_irqs_disabled_flags(flags)) | ||
740 | halt(); | ||
741 | else | ||
742 | safe_halt(); | ||
743 | |||
744 | out: | ||
745 | cpumask_clear_cpu(cpu, &waiting_cpus); | ||
746 | w->lock = NULL; | ||
747 | local_irq_restore(flags); | ||
748 | spin_time_accum_blocked(start); | ||
749 | } | ||
750 | PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning); | ||
751 | |||
752 | /* Kick vcpu waiting on @lock->head to reach value @ticket */ | ||
753 | static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) | ||
754 | { | ||
755 | int cpu; | ||
756 | |||
757 | add_stats(RELEASED_SLOW, 1); | ||
758 | for_each_cpu(cpu, &waiting_cpus) { | ||
759 | const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); | ||
760 | if (ACCESS_ONCE(w->lock) == lock && | ||
761 | ACCESS_ONCE(w->want) == ticket) { | ||
762 | add_stats(RELEASED_SLOW_KICKED, 1); | ||
763 | kvm_kick_cpu(cpu); | ||
764 | break; | ||
765 | } | ||
766 | } | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. | ||
771 | */ | ||
772 | void __init kvm_spinlock_init(void) | ||
773 | { | ||
774 | if (!kvm_para_available()) | ||
775 | return; | ||
776 | /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ | ||
777 | if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) | ||
778 | return; | ||
779 | |||
780 | printk(KERN_INFO "KVM setup paravirtual spinlock\n"); | ||
781 | |||
782 | static_key_slow_inc(¶virt_ticketlocks_enabled); | ||
783 | |||
784 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); | ||
785 | pv_lock_ops.unlock_kick = kvm_unlock_kick; | ||
786 | } | ||
787 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 676b8c77a976..bbb6c7316341 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -4,25 +4,17 @@ | |||
4 | */ | 4 | */ |
5 | #include <linux/spinlock.h> | 5 | #include <linux/spinlock.h> |
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/jump_label.h> | ||
7 | 8 | ||
8 | #include <asm/paravirt.h> | 9 | #include <asm/paravirt.h> |
9 | 10 | ||
10 | static inline void | ||
11 | default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) | ||
12 | { | ||
13 | arch_spin_lock(lock); | ||
14 | } | ||
15 | |||
16 | struct pv_lock_ops pv_lock_ops = { | 11 | struct pv_lock_ops pv_lock_ops = { |
17 | #ifdef CONFIG_SMP | 12 | #ifdef CONFIG_SMP |
18 | .spin_is_locked = __ticket_spin_is_locked, | 13 | .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), |
19 | .spin_is_contended = __ticket_spin_is_contended, | 14 | .unlock_kick = paravirt_nop, |
20 | |||
21 | .spin_lock = __ticket_spin_lock, | ||
22 | .spin_lock_flags = default_spin_lock_flags, | ||
23 | .spin_trylock = __ticket_spin_trylock, | ||
24 | .spin_unlock = __ticket_spin_unlock, | ||
25 | #endif | 15 | #endif |
26 | }; | 16 | }; |
27 | EXPORT_SYMBOL(pv_lock_ops); | 17 | EXPORT_SYMBOL(pv_lock_ops); |
28 | 18 | ||
19 | struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE; | ||
20 | EXPORT_SYMBOL(paravirt_ticketlocks_enabled); | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 193097ef3d7d..15939e872db2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -427,8 +427,7 @@ static void __init xen_init_cpuid_mask(void) | |||
427 | 427 | ||
428 | if (!xen_initial_domain()) | 428 | if (!xen_initial_domain()) |
429 | cpuid_leaf1_edx_mask &= | 429 | cpuid_leaf1_edx_mask &= |
430 | ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ | 430 | ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */ |
431 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ | ||
432 | 431 | ||
433 | cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); | 432 | cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); |
434 | 433 | ||
@@ -735,8 +734,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
735 | addr = (unsigned long)xen_int3; | 734 | addr = (unsigned long)xen_int3; |
736 | else if (addr == (unsigned long)stack_segment) | 735 | else if (addr == (unsigned long)stack_segment) |
737 | addr = (unsigned long)xen_stack_segment; | 736 | addr = (unsigned long)xen_stack_segment; |
738 | else if (addr == (unsigned long)double_fault || | 737 | else if (addr == (unsigned long)double_fault) { |
739 | addr == (unsigned long)nmi) { | ||
740 | /* Don't need to handle these */ | 738 | /* Don't need to handle these */ |
741 | return 0; | 739 | return 0; |
742 | #ifdef CONFIG_X86_MCE | 740 | #ifdef CONFIG_X86_MCE |
@@ -747,7 +745,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
747 | */ | 745 | */ |
748 | ; | 746 | ; |
749 | #endif | 747 | #endif |
750 | } else { | 748 | } else if (addr == (unsigned long)nmi) |
749 | /* | ||
750 | * Use the native version as well. | ||
751 | */ | ||
752 | ; | ||
753 | else { | ||
751 | /* Some other trap using IST? */ | 754 | /* Some other trap using IST? */ |
752 | if (WARN_ON(val->ist != 0)) | 755 | if (WARN_ON(val->ist != 0)) |
753 | return 0; | 756 | return 0; |
@@ -1710,6 +1713,8 @@ static void __init xen_hvm_guest_init(void) | |||
1710 | 1713 | ||
1711 | xen_hvm_init_shared_info(); | 1714 | xen_hvm_init_shared_info(); |
1712 | 1715 | ||
1716 | xen_panic_handler_init(); | ||
1717 | |||
1713 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1718 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
1714 | xen_have_vector_callback = 1; | 1719 | xen_have_vector_callback = 1; |
1715 | xen_hvm_smp_init(); | 1720 | xen_hvm_smp_init(); |
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 01a4dc015ae1..0da7f863056f 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -47,23 +47,18 @@ static void xen_restore_fl(unsigned long flags) | |||
47 | /* convert from IF type flag */ | 47 | /* convert from IF type flag */ |
48 | flags = !(flags & X86_EFLAGS_IF); | 48 | flags = !(flags & X86_EFLAGS_IF); |
49 | 49 | ||
50 | /* There's a one instruction preempt window here. We need to | 50 | /* See xen_irq_enable() for why preemption must be disabled. */ |
51 | make sure we're don't switch CPUs between getting the vcpu | ||
52 | pointer and updating the mask. */ | ||
53 | preempt_disable(); | 51 | preempt_disable(); |
54 | vcpu = this_cpu_read(xen_vcpu); | 52 | vcpu = this_cpu_read(xen_vcpu); |
55 | vcpu->evtchn_upcall_mask = flags; | 53 | vcpu->evtchn_upcall_mask = flags; |
56 | preempt_enable_no_resched(); | ||
57 | |||
58 | /* Doesn't matter if we get preempted here, because any | ||
59 | pending event will get dealt with anyway. */ | ||
60 | 54 | ||
61 | if (flags == 0) { | 55 | if (flags == 0) { |
62 | preempt_check_resched(); | ||
63 | barrier(); /* unmask then check (avoid races) */ | 56 | barrier(); /* unmask then check (avoid races) */ |
64 | if (unlikely(vcpu->evtchn_upcall_pending)) | 57 | if (unlikely(vcpu->evtchn_upcall_pending)) |
65 | xen_force_evtchn_callback(); | 58 | xen_force_evtchn_callback(); |
66 | } | 59 | preempt_enable(); |
60 | } else | ||
61 | preempt_enable_no_resched(); | ||
67 | } | 62 | } |
68 | PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); | 63 | PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); |
69 | 64 | ||
@@ -82,10 +77,12 @@ static void xen_irq_enable(void) | |||
82 | { | 77 | { |
83 | struct vcpu_info *vcpu; | 78 | struct vcpu_info *vcpu; |
84 | 79 | ||
85 | /* We don't need to worry about being preempted here, since | 80 | /* |
86 | either a) interrupts are disabled, so no preemption, or b) | 81 | * We may be preempted as soon as vcpu->evtchn_upcall_mask is |
87 | the caller is confused and is trying to re-enable interrupts | 82 | * cleared, so disable preemption to ensure we check for |
88 | on an indeterminate processor. */ | 83 | * events on the VCPU we are still running on. |
84 | */ | ||
85 | preempt_disable(); | ||
89 | 86 | ||
90 | vcpu = this_cpu_read(xen_vcpu); | 87 | vcpu = this_cpu_read(xen_vcpu); |
91 | vcpu->evtchn_upcall_mask = 0; | 88 | vcpu->evtchn_upcall_mask = 0; |
@@ -96,6 +93,8 @@ static void xen_irq_enable(void) | |||
96 | barrier(); /* unmask then check (avoid races) */ | 93 | barrier(); /* unmask then check (avoid races) */ |
97 | if (unlikely(vcpu->evtchn_upcall_pending)) | 94 | if (unlikely(vcpu->evtchn_upcall_pending)) |
98 | xen_force_evtchn_callback(); | 95 | xen_force_evtchn_callback(); |
96 | |||
97 | preempt_enable(); | ||
99 | } | 98 | } |
100 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); | 99 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); |
101 | 100 | ||
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 95fb2aa5927e..8b901e8d782d 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -161,6 +161,7 @@ | |||
161 | #include <asm/xen/page.h> | 161 | #include <asm/xen/page.h> |
162 | #include <asm/xen/hypercall.h> | 162 | #include <asm/xen/hypercall.h> |
163 | #include <asm/xen/hypervisor.h> | 163 | #include <asm/xen/hypervisor.h> |
164 | #include <xen/balloon.h> | ||
164 | #include <xen/grant_table.h> | 165 | #include <xen/grant_table.h> |
165 | 166 | ||
166 | #include "multicalls.h" | 167 | #include "multicalls.h" |
@@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page, | |||
967 | if (kmap_op != NULL) { | 968 | if (kmap_op != NULL) { |
968 | if (!PageHighMem(page)) { | 969 | if (!PageHighMem(page)) { |
969 | struct multicall_space mcs; | 970 | struct multicall_space mcs; |
970 | struct gnttab_unmap_grant_ref *unmap_op; | 971 | struct gnttab_unmap_and_replace *unmap_op; |
972 | struct page *scratch_page = get_balloon_scratch_page(); | ||
973 | unsigned long scratch_page_address = (unsigned long) | ||
974 | __va(page_to_pfn(scratch_page) << PAGE_SHIFT); | ||
971 | 975 | ||
972 | /* | 976 | /* |
973 | * It might be that we queued all the m2p grant table | 977 | * It might be that we queued all the m2p grant table |
@@ -986,25 +990,31 @@ int m2p_remove_override(struct page *page, | |||
986 | printk(KERN_WARNING "m2p_remove_override: " | 990 | printk(KERN_WARNING "m2p_remove_override: " |
987 | "pfn %lx mfn %lx, failed to modify kernel mappings", | 991 | "pfn %lx mfn %lx, failed to modify kernel mappings", |
988 | pfn, mfn); | 992 | pfn, mfn); |
993 | put_balloon_scratch_page(); | ||
989 | return -1; | 994 | return -1; |
990 | } | 995 | } |
991 | 996 | ||
992 | mcs = xen_mc_entry( | 997 | xen_mc_batch(); |
993 | sizeof(struct gnttab_unmap_grant_ref)); | 998 | |
999 | mcs = __xen_mc_entry( | ||
1000 | sizeof(struct gnttab_unmap_and_replace)); | ||
994 | unmap_op = mcs.args; | 1001 | unmap_op = mcs.args; |
995 | unmap_op->host_addr = kmap_op->host_addr; | 1002 | unmap_op->host_addr = kmap_op->host_addr; |
1003 | unmap_op->new_addr = scratch_page_address; | ||
996 | unmap_op->handle = kmap_op->handle; | 1004 | unmap_op->handle = kmap_op->handle; |
997 | unmap_op->dev_bus_addr = 0; | ||
998 | 1005 | ||
999 | MULTI_grant_table_op(mcs.mc, | 1006 | MULTI_grant_table_op(mcs.mc, |
1000 | GNTTABOP_unmap_grant_ref, unmap_op, 1); | 1007 | GNTTABOP_unmap_and_replace, unmap_op, 1); |
1008 | |||
1009 | mcs = __xen_mc_entry(0); | ||
1010 | MULTI_update_va_mapping(mcs.mc, scratch_page_address, | ||
1011 | pfn_pte(page_to_pfn(scratch_page), | ||
1012 | PAGE_KERNEL_RO), 0); | ||
1001 | 1013 | ||
1002 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 1014 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
1003 | 1015 | ||
1004 | set_pte_at(&init_mm, address, ptep, | ||
1005 | pfn_pte(pfn, PAGE_KERNEL)); | ||
1006 | __flush_tlb_single(address); | ||
1007 | kmap_op->host_addr = 0; | 1016 | kmap_op->host_addr = 0; |
1017 | put_balloon_scratch_page(); | ||
1008 | } | 1018 | } |
1009 | } | 1019 | } |
1010 | 1020 | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8f3eea6b80c5..09f3059cb00b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -33,6 +33,9 @@ | |||
33 | /* These are code, but not functions. Defined in entry.S */ | 33 | /* These are code, but not functions. Defined in entry.S */ |
34 | extern const char xen_hypervisor_callback[]; | 34 | extern const char xen_hypervisor_callback[]; |
35 | extern const char xen_failsafe_callback[]; | 35 | extern const char xen_failsafe_callback[]; |
36 | #ifdef CONFIG_X86_64 | ||
37 | extern const char nmi[]; | ||
38 | #endif | ||
36 | extern void xen_sysenter_target(void); | 39 | extern void xen_sysenter_target(void); |
37 | extern void xen_syscall_target(void); | 40 | extern void xen_syscall_target(void); |
38 | extern void xen_syscall32_target(void); | 41 | extern void xen_syscall32_target(void); |
@@ -215,13 +218,19 @@ static void __init xen_set_identity_and_release_chunk( | |||
215 | unsigned long pfn; | 218 | unsigned long pfn; |
216 | 219 | ||
217 | /* | 220 | /* |
218 | * If the PFNs are currently mapped, the VA mapping also needs | 221 | * If the PFNs are currently mapped, clear the mappings |
219 | * to be updated to be 1:1. | 222 | * (except for the ISA region which must be 1:1 mapped) to |
223 | * release the refcounts (in Xen) on the original frames. | ||
220 | */ | 224 | */ |
221 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) | 225 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { |
226 | pte_t pte = __pte_ma(0); | ||
227 | |||
228 | if (pfn < PFN_UP(ISA_END_ADDRESS)) | ||
229 | pte = mfn_pte(pfn, PAGE_KERNEL_IO); | ||
230 | |||
222 | (void)HYPERVISOR_update_va_mapping( | 231 | (void)HYPERVISOR_update_va_mapping( |
223 | (unsigned long)__va(pfn << PAGE_SHIFT), | 232 | (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0); |
224 | mfn_pte(pfn, PAGE_KERNEL_IO), 0); | 233 | } |
225 | 234 | ||
226 | if (start_pfn < nr_pages) | 235 | if (start_pfn < nr_pages) |
227 | *released += xen_release_chunk( | 236 | *released += xen_release_chunk( |
@@ -547,7 +556,13 @@ void xen_enable_syscall(void) | |||
547 | } | 556 | } |
548 | #endif /* CONFIG_X86_64 */ | 557 | #endif /* CONFIG_X86_64 */ |
549 | } | 558 | } |
550 | 559 | void __cpuinit xen_enable_nmi(void) | |
560 | { | ||
561 | #ifdef CONFIG_X86_64 | ||
562 | if (register_callback(CALLBACKTYPE_nmi, nmi)) | ||
563 | BUG(); | ||
564 | #endif | ||
565 | } | ||
551 | void __init xen_arch_setup(void) | 566 | void __init xen_arch_setup(void) |
552 | { | 567 | { |
553 | xen_panic_handler_init(); | 568 | xen_panic_handler_init(); |
@@ -565,7 +580,7 @@ void __init xen_arch_setup(void) | |||
565 | 580 | ||
566 | xen_enable_sysenter(); | 581 | xen_enable_sysenter(); |
567 | xen_enable_syscall(); | 582 | xen_enable_syscall(); |
568 | 583 | xen_enable_nmi(); | |
569 | #ifdef CONFIG_ACPI | 584 | #ifdef CONFIG_ACPI |
570 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | 585 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
571 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | 586 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index b81c88e51daa..9235842cd76a 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -279,6 +279,7 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
279 | 279 | ||
280 | xen_filter_cpu_maps(); | 280 | xen_filter_cpu_maps(); |
281 | xen_setup_vcpu_info_placement(); | 281 | xen_setup_vcpu_info_placement(); |
282 | xen_init_spinlocks(); | ||
282 | } | 283 | } |
283 | 284 | ||
284 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | 285 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) |
@@ -572,6 +573,12 @@ static inline int xen_map_vector(int vector) | |||
572 | case IRQ_WORK_VECTOR: | 573 | case IRQ_WORK_VECTOR: |
573 | xen_vector = XEN_IRQ_WORK_VECTOR; | 574 | xen_vector = XEN_IRQ_WORK_VECTOR; |
574 | break; | 575 | break; |
576 | #ifdef CONFIG_X86_64 | ||
577 | case NMI_VECTOR: | ||
578 | case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */ | ||
579 | xen_vector = XEN_NMI_VECTOR; | ||
580 | break; | ||
581 | #endif | ||
575 | default: | 582 | default: |
576 | xen_vector = -1; | 583 | xen_vector = -1; |
577 | printk(KERN_ERR "xen: vector 0x%x is not implemented\n", | 584 | printk(KERN_ERR "xen: vector 0x%x is not implemented\n", |
@@ -680,7 +687,6 @@ void __init xen_smp_init(void) | |||
680 | { | 687 | { |
681 | smp_ops = xen_smp_ops; | 688 | smp_ops = xen_smp_ops; |
682 | xen_fill_possible_map(); | 689 | xen_fill_possible_map(); |
683 | xen_init_spinlocks(); | ||
684 | } | 690 | } |
685 | 691 | ||
686 | static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | 692 | static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) |
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index cf3caee356b3..0438b9324a72 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -17,45 +17,44 @@ | |||
17 | #include "xen-ops.h" | 17 | #include "xen-ops.h" |
18 | #include "debugfs.h" | 18 | #include "debugfs.h" |
19 | 19 | ||
20 | #ifdef CONFIG_XEN_DEBUG_FS | 20 | enum xen_contention_stat { |
21 | static struct xen_spinlock_stats | 21 | TAKEN_SLOW, |
22 | { | 22 | TAKEN_SLOW_PICKUP, |
23 | u64 taken; | 23 | TAKEN_SLOW_SPURIOUS, |
24 | u32 taken_slow; | 24 | RELEASED_SLOW, |
25 | u32 taken_slow_nested; | 25 | RELEASED_SLOW_KICKED, |
26 | u32 taken_slow_pickup; | 26 | NR_CONTENTION_STATS |
27 | u32 taken_slow_spurious; | 27 | }; |
28 | u32 taken_slow_irqenable; | ||
29 | 28 | ||
30 | u64 released; | ||
31 | u32 released_slow; | ||
32 | u32 released_slow_kicked; | ||
33 | 29 | ||
30 | #ifdef CONFIG_XEN_DEBUG_FS | ||
34 | #define HISTO_BUCKETS 30 | 31 | #define HISTO_BUCKETS 30 |
35 | u32 histo_spin_total[HISTO_BUCKETS+1]; | 32 | static struct xen_spinlock_stats |
36 | u32 histo_spin_spinning[HISTO_BUCKETS+1]; | 33 | { |
34 | u32 contention_stats[NR_CONTENTION_STATS]; | ||
37 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | 35 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; |
38 | |||
39 | u64 time_total; | ||
40 | u64 time_spinning; | ||
41 | u64 time_blocked; | 36 | u64 time_blocked; |
42 | } spinlock_stats; | 37 | } spinlock_stats; |
43 | 38 | ||
44 | static u8 zero_stats; | 39 | static u8 zero_stats; |
45 | 40 | ||
46 | static unsigned lock_timeout = 1 << 10; | ||
47 | #define TIMEOUT lock_timeout | ||
48 | |||
49 | static inline void check_zero(void) | 41 | static inline void check_zero(void) |
50 | { | 42 | { |
51 | if (unlikely(zero_stats)) { | 43 | u8 ret; |
52 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | 44 | u8 old = ACCESS_ONCE(zero_stats); |
53 | zero_stats = 0; | 45 | if (unlikely(old)) { |
46 | ret = cmpxchg(&zero_stats, old, 0); | ||
47 | /* This ensures only one fellow resets the stat */ | ||
48 | if (ret == old) | ||
49 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
54 | } | 50 | } |
55 | } | 51 | } |
56 | 52 | ||
57 | #define ADD_STATS(elem, val) \ | 53 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
58 | do { check_zero(); spinlock_stats.elem += (val); } while(0) | 54 | { |
55 | check_zero(); | ||
56 | spinlock_stats.contention_stats[var] += val; | ||
57 | } | ||
59 | 58 | ||
60 | static inline u64 spin_time_start(void) | 59 | static inline u64 spin_time_start(void) |
61 | { | 60 | { |
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array) | |||
74 | array[HISTO_BUCKETS]++; | 73 | array[HISTO_BUCKETS]++; |
75 | } | 74 | } |
76 | 75 | ||
77 | static inline void spin_time_accum_spinning(u64 start) | ||
78 | { | ||
79 | u32 delta = xen_clocksource_read() - start; | ||
80 | |||
81 | __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); | ||
82 | spinlock_stats.time_spinning += delta; | ||
83 | } | ||
84 | |||
85 | static inline void spin_time_accum_total(u64 start) | ||
86 | { | ||
87 | u32 delta = xen_clocksource_read() - start; | ||
88 | |||
89 | __spin_time_accum(delta, spinlock_stats.histo_spin_total); | ||
90 | spinlock_stats.time_total += delta; | ||
91 | } | ||
92 | |||
93 | static inline void spin_time_accum_blocked(u64 start) | 76 | static inline void spin_time_accum_blocked(u64 start) |
94 | { | 77 | { |
95 | u32 delta = xen_clocksource_read() - start; | 78 | u32 delta = xen_clocksource_read() - start; |
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start) | |||
99 | } | 82 | } |
100 | #else /* !CONFIG_XEN_DEBUG_FS */ | 83 | #else /* !CONFIG_XEN_DEBUG_FS */ |
101 | #define TIMEOUT (1 << 10) | 84 | #define TIMEOUT (1 << 10) |
102 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | 85 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
86 | { | ||
87 | } | ||
103 | 88 | ||
104 | static inline u64 spin_time_start(void) | 89 | static inline u64 spin_time_start(void) |
105 | { | 90 | { |
106 | return 0; | 91 | return 0; |
107 | } | 92 | } |
108 | 93 | ||
109 | static inline void spin_time_accum_total(u64 start) | ||
110 | { | ||
111 | } | ||
112 | static inline void spin_time_accum_spinning(u64 start) | ||
113 | { | ||
114 | } | ||
115 | static inline void spin_time_accum_blocked(u64 start) | 94 | static inline void spin_time_accum_blocked(u64 start) |
116 | { | 95 | { |
117 | } | 96 | } |
@@ -134,227 +113,123 @@ typedef u16 xen_spinners_t; | |||
134 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); | 113 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); |
135 | #endif | 114 | #endif |
136 | 115 | ||
137 | struct xen_spinlock { | 116 | struct xen_lock_waiting { |
138 | unsigned char lock; /* 0 -> free; 1 -> locked */ | 117 | struct arch_spinlock *lock; |
139 | xen_spinners_t spinners; /* count of waiting cpus */ | 118 | __ticket_t want; |
140 | }; | 119 | }; |
141 | 120 | ||
142 | static int xen_spin_is_locked(struct arch_spinlock *lock) | ||
143 | { | ||
144 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
145 | |||
146 | return xl->lock != 0; | ||
147 | } | ||
148 | |||
149 | static int xen_spin_is_contended(struct arch_spinlock *lock) | ||
150 | { | ||
151 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
152 | |||
153 | /* Not strictly true; this is only the count of contended | ||
154 | lock-takers entering the slow path. */ | ||
155 | return xl->spinners != 0; | ||
156 | } | ||
157 | |||
158 | static int xen_spin_trylock(struct arch_spinlock *lock) | ||
159 | { | ||
160 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
161 | u8 old = 1; | ||
162 | |||
163 | asm("xchgb %b0,%1" | ||
164 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
165 | |||
166 | return old == 0; | ||
167 | } | ||
168 | |||
169 | static DEFINE_PER_CPU(char *, irq_name); | ||
170 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | 121 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; |
171 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | 122 | static DEFINE_PER_CPU(char *, irq_name); |
172 | 123 | static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); | |
173 | /* | 124 | static cpumask_t waiting_cpus; |
174 | * Mark a cpu as interested in a lock. Returns the CPU's previous | ||
175 | * lock of interest, in case we got preempted by an interrupt. | ||
176 | */ | ||
177 | static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | ||
178 | { | ||
179 | struct xen_spinlock *prev; | ||
180 | |||
181 | prev = __this_cpu_read(lock_spinners); | ||
182 | __this_cpu_write(lock_spinners, xl); | ||
183 | |||
184 | wmb(); /* set lock of interest before count */ | ||
185 | |||
186 | inc_spinners(xl); | ||
187 | |||
188 | return prev; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Mark a cpu as no longer interested in a lock. Restores previous | ||
193 | * lock of interest (NULL for none). | ||
194 | */ | ||
195 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) | ||
196 | { | ||
197 | dec_spinners(xl); | ||
198 | wmb(); /* decrement count before restoring lock */ | ||
199 | __this_cpu_write(lock_spinners, prev); | ||
200 | } | ||
201 | 125 | ||
202 | static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) | 126 | static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) |
203 | { | 127 | { |
204 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
205 | struct xen_spinlock *prev; | ||
206 | int irq = __this_cpu_read(lock_kicker_irq); | 128 | int irq = __this_cpu_read(lock_kicker_irq); |
207 | int ret; | 129 | struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); |
130 | int cpu = smp_processor_id(); | ||
208 | u64 start; | 131 | u64 start; |
132 | unsigned long flags; | ||
209 | 133 | ||
210 | /* If kicker interrupts not initialized yet, just spin */ | 134 | /* If kicker interrupts not initialized yet, just spin */ |
211 | if (irq == -1) | 135 | if (irq == -1) |
212 | return 0; | 136 | return; |
213 | 137 | ||
214 | start = spin_time_start(); | 138 | start = spin_time_start(); |
215 | 139 | ||
216 | /* announce we're spinning */ | 140 | /* |
217 | prev = spinning_lock(xl); | 141 | * Make sure an interrupt handler can't upset things in a |
142 | * partially setup state. | ||
143 | */ | ||
144 | local_irq_save(flags); | ||
145 | /* | ||
146 | * We don't really care if we're overwriting some other | ||
147 | * (lock,want) pair, as that would mean that we're currently | ||
148 | * in an interrupt context, and the outer context had | ||
149 | * interrupts enabled. That has already kicked the VCPU out | ||
150 | * of xen_poll_irq(), so it will just return spuriously and | ||
151 | * retry with newly setup (lock,want). | ||
152 | * | ||
153 | * The ordering protocol on this is that the "lock" pointer | ||
154 | * may only be set non-NULL if the "want" ticket is correct. | ||
155 | * If we're updating "want", we must first clear "lock". | ||
156 | */ | ||
157 | w->lock = NULL; | ||
158 | smp_wmb(); | ||
159 | w->want = want; | ||
160 | smp_wmb(); | ||
161 | w->lock = lock; | ||
218 | 162 | ||
219 | ADD_STATS(taken_slow, 1); | 163 | /* This uses set_bit, which atomic and therefore a barrier */ |
220 | ADD_STATS(taken_slow_nested, prev != NULL); | 164 | cpumask_set_cpu(cpu, &waiting_cpus); |
165 | add_stats(TAKEN_SLOW, 1); | ||
221 | 166 | ||
222 | do { | 167 | /* clear pending */ |
223 | unsigned long flags; | 168 | xen_clear_irq_pending(irq); |
224 | 169 | ||
225 | /* clear pending */ | 170 | /* Only check lock once pending cleared */ |
226 | xen_clear_irq_pending(irq); | 171 | barrier(); |
227 | 172 | ||
228 | /* check again make sure it didn't become free while | 173 | /* |
229 | we weren't looking */ | 174 | * Mark entry to slowpath before doing the pickup test to make |
230 | ret = xen_spin_trylock(lock); | 175 | * sure we don't deadlock with an unlocker. |
231 | if (ret) { | 176 | */ |
232 | ADD_STATS(taken_slow_pickup, 1); | 177 | __ticket_enter_slowpath(lock); |
233 | 178 | ||
234 | /* | 179 | /* |
235 | * If we interrupted another spinlock while it | 180 | * check again make sure it didn't become free while |
236 | * was blocking, make sure it doesn't block | 181 | * we weren't looking |
237 | * without rechecking the lock. | 182 | */ |
238 | */ | 183 | if (ACCESS_ONCE(lock->tickets.head) == want) { |
239 | if (prev != NULL) | 184 | add_stats(TAKEN_SLOW_PICKUP, 1); |
240 | xen_set_irq_pending(irq); | 185 | goto out; |
241 | goto out; | 186 | } |
242 | } | ||
243 | 187 | ||
244 | flags = arch_local_save_flags(); | 188 | /* Allow interrupts while blocked */ |
245 | if (irq_enable) { | 189 | local_irq_restore(flags); |
246 | ADD_STATS(taken_slow_irqenable, 1); | ||
247 | raw_local_irq_enable(); | ||
248 | } | ||
249 | 190 | ||
250 | /* | 191 | /* |
251 | * Block until irq becomes pending. If we're | 192 | * If an interrupt happens here, it will leave the wakeup irq |
252 | * interrupted at this point (after the trylock but | 193 | * pending, which will cause xen_poll_irq() to return |
253 | * before entering the block), then the nested lock | 194 | * immediately. |
254 | * handler guarantees that the irq will be left | 195 | */ |
255 | * pending if there's any chance the lock became free; | ||
256 | * xen_poll_irq() returns immediately if the irq is | ||
257 | * pending. | ||
258 | */ | ||
259 | xen_poll_irq(irq); | ||
260 | 196 | ||
261 | raw_local_irq_restore(flags); | 197 | /* Block until irq becomes pending (or perhaps a spurious wakeup) */ |
198 | xen_poll_irq(irq); | ||
199 | add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); | ||
262 | 200 | ||
263 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); | 201 | local_irq_save(flags); |
264 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ | ||
265 | 202 | ||
266 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); | 203 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); |
267 | |||
268 | out: | 204 | out: |
269 | unspinning_lock(xl, prev); | 205 | cpumask_clear_cpu(cpu, &waiting_cpus); |
270 | spin_time_accum_blocked(start); | 206 | w->lock = NULL; |
271 | |||
272 | return ret; | ||
273 | } | ||
274 | |||
275 | static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) | ||
276 | { | ||
277 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
278 | unsigned timeout; | ||
279 | u8 oldval; | ||
280 | u64 start_spin; | ||
281 | |||
282 | ADD_STATS(taken, 1); | ||
283 | |||
284 | start_spin = spin_time_start(); | ||
285 | |||
286 | do { | ||
287 | u64 start_spin_fast = spin_time_start(); | ||
288 | |||
289 | timeout = TIMEOUT; | ||
290 | |||
291 | asm("1: xchgb %1,%0\n" | ||
292 | " testb %1,%1\n" | ||
293 | " jz 3f\n" | ||
294 | "2: rep;nop\n" | ||
295 | " cmpb $0,%0\n" | ||
296 | " je 1b\n" | ||
297 | " dec %2\n" | ||
298 | " jnz 2b\n" | ||
299 | "3:\n" | ||
300 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
301 | : "1" (1) | ||
302 | : "memory"); | ||
303 | 207 | ||
304 | spin_time_accum_spinning(start_spin_fast); | 208 | local_irq_restore(flags); |
305 | 209 | ||
306 | } while (unlikely(oldval != 0 && | 210 | spin_time_accum_blocked(start); |
307 | (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); | ||
308 | |||
309 | spin_time_accum_total(start_spin); | ||
310 | } | ||
311 | |||
312 | static void xen_spin_lock(struct arch_spinlock *lock) | ||
313 | { | ||
314 | __xen_spin_lock(lock, false); | ||
315 | } | ||
316 | |||
317 | static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) | ||
318 | { | ||
319 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); | ||
320 | } | 211 | } |
212 | PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning); | ||
321 | 213 | ||
322 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | 214 | static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) |
323 | { | 215 | { |
324 | int cpu; | 216 | int cpu; |
325 | 217 | ||
326 | ADD_STATS(released_slow, 1); | 218 | add_stats(RELEASED_SLOW, 1); |
219 | |||
220 | for_each_cpu(cpu, &waiting_cpus) { | ||
221 | const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); | ||
327 | 222 | ||
328 | for_each_online_cpu(cpu) { | 223 | /* Make sure we read lock before want */ |
329 | /* XXX should mix up next cpu selection */ | 224 | if (ACCESS_ONCE(w->lock) == lock && |
330 | if (per_cpu(lock_spinners, cpu) == xl) { | 225 | ACCESS_ONCE(w->want) == next) { |
331 | ADD_STATS(released_slow_kicked, 1); | 226 | add_stats(RELEASED_SLOW_KICKED, 1); |
332 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | 227 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); |
228 | break; | ||
333 | } | 229 | } |
334 | } | 230 | } |
335 | } | 231 | } |
336 | 232 | ||
337 | static void xen_spin_unlock(struct arch_spinlock *lock) | ||
338 | { | ||
339 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
340 | |||
341 | ADD_STATS(released, 1); | ||
342 | |||
343 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
344 | xl->lock = 0; /* release lock */ | ||
345 | |||
346 | /* | ||
347 | * Make sure unlock happens before checking for waiting | ||
348 | * spinners. We need a strong barrier to enforce the | ||
349 | * write-read ordering to different memory locations, as the | ||
350 | * CPU makes no implied guarantees about their ordering. | ||
351 | */ | ||
352 | mb(); | ||
353 | |||
354 | if (unlikely(xl->spinners)) | ||
355 | xen_spin_unlock_slow(xl); | ||
356 | } | ||
357 | |||
358 | static irqreturn_t dummy_handler(int irq, void *dev_id) | 233 | static irqreturn_t dummy_handler(int irq, void *dev_id) |
359 | { | 234 | { |
360 | BUG(); | 235 | BUG(); |
@@ -408,6 +283,8 @@ void xen_uninit_lock_cpu(int cpu) | |||
408 | per_cpu(irq_name, cpu) = NULL; | 283 | per_cpu(irq_name, cpu) = NULL; |
409 | } | 284 | } |
410 | 285 | ||
286 | static bool xen_pvspin __initdata = true; | ||
287 | |||
411 | void __init xen_init_spinlocks(void) | 288 | void __init xen_init_spinlocks(void) |
412 | { | 289 | { |
413 | /* | 290 | /* |
@@ -417,15 +294,23 @@ void __init xen_init_spinlocks(void) | |||
417 | if (xen_hvm_domain()) | 294 | if (xen_hvm_domain()) |
418 | return; | 295 | return; |
419 | 296 | ||
420 | BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); | 297 | if (!xen_pvspin) { |
298 | printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); | ||
299 | return; | ||
300 | } | ||
421 | 301 | ||
422 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | 302 | static_key_slow_inc(¶virt_ticketlocks_enabled); |
423 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | 303 | |
424 | pv_lock_ops.spin_lock = xen_spin_lock; | 304 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); |
425 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; | 305 | pv_lock_ops.unlock_kick = xen_unlock_kick; |
426 | pv_lock_ops.spin_trylock = xen_spin_trylock; | 306 | } |
427 | pv_lock_ops.spin_unlock = xen_spin_unlock; | 307 | |
308 | static __init int xen_parse_nopvspin(char *arg) | ||
309 | { | ||
310 | xen_pvspin = false; | ||
311 | return 0; | ||
428 | } | 312 | } |
313 | early_param("xen_nopvspin", xen_parse_nopvspin); | ||
429 | 314 | ||
430 | #ifdef CONFIG_XEN_DEBUG_FS | 315 | #ifdef CONFIG_XEN_DEBUG_FS |
431 | 316 | ||
@@ -442,37 +327,21 @@ static int __init xen_spinlock_debugfs(void) | |||
442 | 327 | ||
443 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | 328 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); |
444 | 329 | ||
445 | debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); | ||
446 | |||
447 | debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); | ||
448 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | 330 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, |
449 | &spinlock_stats.taken_slow); | 331 | &spinlock_stats.contention_stats[TAKEN_SLOW]); |
450 | debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, | ||
451 | &spinlock_stats.taken_slow_nested); | ||
452 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | 332 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, |
453 | &spinlock_stats.taken_slow_pickup); | 333 | &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); |
454 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, | 334 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, |
455 | &spinlock_stats.taken_slow_spurious); | 335 | &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]); |
456 | debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, | ||
457 | &spinlock_stats.taken_slow_irqenable); | ||
458 | 336 | ||
459 | debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); | ||
460 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | 337 | debugfs_create_u32("released_slow", 0444, d_spin_debug, |
461 | &spinlock_stats.released_slow); | 338 | &spinlock_stats.contention_stats[RELEASED_SLOW]); |
462 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | 339 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, |
463 | &spinlock_stats.released_slow_kicked); | 340 | &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); |
464 | 341 | ||
465 | debugfs_create_u64("time_spinning", 0444, d_spin_debug, | ||
466 | &spinlock_stats.time_spinning); | ||
467 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | 342 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, |
468 | &spinlock_stats.time_blocked); | 343 | &spinlock_stats.time_blocked); |
469 | debugfs_create_u64("time_total", 0444, d_spin_debug, | ||
470 | &spinlock_stats.time_total); | ||
471 | 344 | ||
472 | debugfs_create_u32_array("histo_total", 0444, d_spin_debug, | ||
473 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | ||
474 | debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | ||
475 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | ||
476 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | 345 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, |
477 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | 346 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); |
478 | 347 | ||
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index dbfd56446c31..94c0c74434ea 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig | |||
@@ -91,4 +91,16 @@ config TCG_ST33_I2C | |||
91 | To compile this driver as a module, choose M here; the module will be | 91 | To compile this driver as a module, choose M here; the module will be |
92 | called tpm_stm_st33_i2c. | 92 | called tpm_stm_st33_i2c. |
93 | 93 | ||
94 | config TCG_XEN | ||
95 | tristate "XEN TPM Interface" | ||
96 | depends on TCG_TPM && XEN | ||
97 | select XEN_XENBUS_FRONTEND | ||
98 | ---help--- | ||
99 | If you want to make TPM support available to a Xen user domain, | ||
100 | say Yes and it will be accessible from within Linux. See | ||
101 | the manpages for xl, xl.conf, and docs/misc/vtpm.txt in | ||
102 | the Xen source repository for more details. | ||
103 | To compile this driver as a module, choose M here; the module | ||
104 | will be called xen-tpmfront. | ||
105 | |||
94 | endif # TCG_TPM | 106 | endif # TCG_TPM |
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index a3736c97c65a..eb41ff97d0ad 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile | |||
@@ -18,3 +18,4 @@ obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o | |||
18 | obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o | 18 | obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o |
19 | obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o | 19 | obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o |
20 | obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o | 20 | obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o |
21 | obj-$(CONFIG_TCG_XEN) += xen-tpmfront.o | ||
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c new file mode 100644 index 000000000000..7a7929ba2658 --- /dev/null +++ b/drivers/char/tpm/xen-tpmfront.c | |||
@@ -0,0 +1,473 @@ | |||
1 | /* | ||
2 | * Implementation of the Xen vTPM device frontend | ||
3 | * | ||
4 | * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2, | ||
8 | * as published by the Free Software Foundation. | ||
9 | */ | ||
10 | #include <linux/errno.h> | ||
11 | #include <linux/err.h> | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <xen/events.h> | ||
14 | #include <xen/interface/io/tpmif.h> | ||
15 | #include <xen/grant_table.h> | ||
16 | #include <xen/xenbus.h> | ||
17 | #include <xen/page.h> | ||
18 | #include "tpm.h" | ||
19 | |||
20 | struct tpm_private { | ||
21 | struct tpm_chip *chip; | ||
22 | struct xenbus_device *dev; | ||
23 | |||
24 | struct vtpm_shared_page *shr; | ||
25 | |||
26 | unsigned int evtchn; | ||
27 | int ring_ref; | ||
28 | domid_t backend_id; | ||
29 | }; | ||
30 | |||
31 | enum status_bits { | ||
32 | VTPM_STATUS_RUNNING = 0x1, | ||
33 | VTPM_STATUS_IDLE = 0x2, | ||
34 | VTPM_STATUS_RESULT = 0x4, | ||
35 | VTPM_STATUS_CANCELED = 0x8, | ||
36 | }; | ||
37 | |||
38 | static u8 vtpm_status(struct tpm_chip *chip) | ||
39 | { | ||
40 | struct tpm_private *priv = TPM_VPRIV(chip); | ||
41 | switch (priv->shr->state) { | ||
42 | case VTPM_STATE_IDLE: | ||
43 | return VTPM_STATUS_IDLE | VTPM_STATUS_CANCELED; | ||
44 | case VTPM_STATE_FINISH: | ||
45 | return VTPM_STATUS_IDLE | VTPM_STATUS_RESULT; | ||
46 | case VTPM_STATE_SUBMIT: | ||
47 | case VTPM_STATE_CANCEL: /* cancel requested, not yet canceled */ | ||
48 | return VTPM_STATUS_RUNNING; | ||
49 | default: | ||
50 | return 0; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | static bool vtpm_req_canceled(struct tpm_chip *chip, u8 status) | ||
55 | { | ||
56 | return status & VTPM_STATUS_CANCELED; | ||
57 | } | ||
58 | |||
59 | static void vtpm_cancel(struct tpm_chip *chip) | ||
60 | { | ||
61 | struct tpm_private *priv = TPM_VPRIV(chip); | ||
62 | priv->shr->state = VTPM_STATE_CANCEL; | ||
63 | wmb(); | ||
64 | notify_remote_via_evtchn(priv->evtchn); | ||
65 | } | ||
66 | |||
67 | static unsigned int shr_data_offset(struct vtpm_shared_page *shr) | ||
68 | { | ||
69 | return sizeof(*shr) + sizeof(u32) * shr->nr_extra_pages; | ||
70 | } | ||
71 | |||
72 | static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) | ||
73 | { | ||
74 | struct tpm_private *priv = TPM_VPRIV(chip); | ||
75 | struct vtpm_shared_page *shr = priv->shr; | ||
76 | unsigned int offset = shr_data_offset(shr); | ||
77 | |||
78 | u32 ordinal; | ||
79 | unsigned long duration; | ||
80 | |||
81 | if (offset > PAGE_SIZE) | ||
82 | return -EINVAL; | ||
83 | |||
84 | if (offset + count > PAGE_SIZE) | ||
85 | return -EINVAL; | ||
86 | |||
87 | /* Wait for completion of any existing command or cancellation */ | ||
88 | if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, chip->vendor.timeout_c, | ||
89 | &chip->vendor.read_queue, true) < 0) { | ||
90 | vtpm_cancel(chip); | ||
91 | return -ETIME; | ||
92 | } | ||
93 | |||
94 | memcpy(offset + (u8 *)shr, buf, count); | ||
95 | shr->length = count; | ||
96 | barrier(); | ||
97 | shr->state = VTPM_STATE_SUBMIT; | ||
98 | wmb(); | ||
99 | notify_remote_via_evtchn(priv->evtchn); | ||
100 | |||
101 | ordinal = be32_to_cpu(((struct tpm_input_header*)buf)->ordinal); | ||
102 | duration = tpm_calc_ordinal_duration(chip, ordinal); | ||
103 | |||
104 | if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, duration, | ||
105 | &chip->vendor.read_queue, true) < 0) { | ||
106 | /* got a signal or timeout, try to cancel */ | ||
107 | vtpm_cancel(chip); | ||
108 | return -ETIME; | ||
109 | } | ||
110 | |||
111 | return count; | ||
112 | } | ||
113 | |||
114 | static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) | ||
115 | { | ||
116 | struct tpm_private *priv = TPM_VPRIV(chip); | ||
117 | struct vtpm_shared_page *shr = priv->shr; | ||
118 | unsigned int offset = shr_data_offset(shr); | ||
119 | size_t length = shr->length; | ||
120 | |||
121 | if (shr->state == VTPM_STATE_IDLE) | ||
122 | return -ECANCELED; | ||
123 | |||
124 | /* In theory the wait at the end of _send makes this one unnecessary */ | ||
125 | if (wait_for_tpm_stat(chip, VTPM_STATUS_RESULT, chip->vendor.timeout_c, | ||
126 | &chip->vendor.read_queue, true) < 0) { | ||
127 | vtpm_cancel(chip); | ||
128 | return -ETIME; | ||
129 | } | ||
130 | |||
131 | if (offset > PAGE_SIZE) | ||
132 | return -EIO; | ||
133 | |||
134 | if (offset + length > PAGE_SIZE) | ||
135 | length = PAGE_SIZE - offset; | ||
136 | |||
137 | if (length > count) | ||
138 | length = count; | ||
139 | |||
140 | memcpy(buf, offset + (u8 *)shr, length); | ||
141 | |||
142 | return length; | ||
143 | } | ||
144 | |||
145 | ssize_t tpm_show_locality(struct device *dev, struct device_attribute *attr, | ||
146 | char *buf) | ||
147 | { | ||
148 | struct tpm_chip *chip = dev_get_drvdata(dev); | ||
149 | struct tpm_private *priv = TPM_VPRIV(chip); | ||
150 | u8 locality = priv->shr->locality; | ||
151 | |||
152 | return sprintf(buf, "%d\n", locality); | ||
153 | } | ||
154 | |||
155 | ssize_t tpm_store_locality(struct device *dev, struct device_attribute *attr, | ||
156 | const char *buf, size_t len) | ||
157 | { | ||
158 | struct tpm_chip *chip = dev_get_drvdata(dev); | ||
159 | struct tpm_private *priv = TPM_VPRIV(chip); | ||
160 | u8 val; | ||
161 | |||
162 | int rv = kstrtou8(buf, 0, &val); | ||
163 | if (rv) | ||
164 | return rv; | ||
165 | |||
166 | priv->shr->locality = val; | ||
167 | |||
168 | return len; | ||
169 | } | ||
170 | |||
171 | static const struct file_operations vtpm_ops = { | ||
172 | .owner = THIS_MODULE, | ||
173 | .llseek = no_llseek, | ||
174 | .open = tpm_open, | ||
175 | .read = tpm_read, | ||
176 | .write = tpm_write, | ||
177 | .release = tpm_release, | ||
178 | }; | ||
179 | |||
180 | static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); | ||
181 | static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); | ||
182 | static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); | ||
183 | static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); | ||
184 | static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); | ||
185 | static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, | ||
186 | NULL); | ||
187 | static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); | ||
188 | static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); | ||
189 | static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); | ||
190 | static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); | ||
191 | static DEVICE_ATTR(locality, S_IRUGO | S_IWUSR, tpm_show_locality, | ||
192 | tpm_store_locality); | ||
193 | |||
194 | static struct attribute *vtpm_attrs[] = { | ||
195 | &dev_attr_pubek.attr, | ||
196 | &dev_attr_pcrs.attr, | ||
197 | &dev_attr_enabled.attr, | ||
198 | &dev_attr_active.attr, | ||
199 | &dev_attr_owned.attr, | ||
200 | &dev_attr_temp_deactivated.attr, | ||
201 | &dev_attr_caps.attr, | ||
202 | &dev_attr_cancel.attr, | ||
203 | &dev_attr_durations.attr, | ||
204 | &dev_attr_timeouts.attr, | ||
205 | &dev_attr_locality.attr, | ||
206 | NULL, | ||
207 | }; | ||
208 | |||
209 | static struct attribute_group vtpm_attr_grp = { | ||
210 | .attrs = vtpm_attrs, | ||
211 | }; | ||
212 | |||
213 | #define TPM_LONG_TIMEOUT (10 * 60 * HZ) | ||
214 | |||
215 | static const struct tpm_vendor_specific tpm_vtpm = { | ||
216 | .status = vtpm_status, | ||
217 | .recv = vtpm_recv, | ||
218 | .send = vtpm_send, | ||
219 | .cancel = vtpm_cancel, | ||
220 | .req_complete_mask = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT, | ||
221 | .req_complete_val = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT, | ||
222 | .req_canceled = vtpm_req_canceled, | ||
223 | .attr_group = &vtpm_attr_grp, | ||
224 | .miscdev = { | ||
225 | .fops = &vtpm_ops, | ||
226 | }, | ||
227 | .duration = { | ||
228 | TPM_LONG_TIMEOUT, | ||
229 | TPM_LONG_TIMEOUT, | ||
230 | TPM_LONG_TIMEOUT, | ||
231 | }, | ||
232 | }; | ||
233 | |||
234 | static irqreturn_t tpmif_interrupt(int dummy, void *dev_id) | ||
235 | { | ||
236 | struct tpm_private *priv = dev_id; | ||
237 | |||
238 | switch (priv->shr->state) { | ||
239 | case VTPM_STATE_IDLE: | ||
240 | case VTPM_STATE_FINISH: | ||
241 | wake_up_interruptible(&priv->chip->vendor.read_queue); | ||
242 | break; | ||
243 | case VTPM_STATE_SUBMIT: | ||
244 | case VTPM_STATE_CANCEL: | ||
245 | default: | ||
246 | break; | ||
247 | } | ||
248 | return IRQ_HANDLED; | ||
249 | } | ||
250 | |||
251 | static int setup_chip(struct device *dev, struct tpm_private *priv) | ||
252 | { | ||
253 | struct tpm_chip *chip; | ||
254 | |||
255 | chip = tpm_register_hardware(dev, &tpm_vtpm); | ||
256 | if (!chip) | ||
257 | return -ENODEV; | ||
258 | |||
259 | init_waitqueue_head(&chip->vendor.read_queue); | ||
260 | |||
261 | priv->chip = chip; | ||
262 | TPM_VPRIV(chip) = priv; | ||
263 | |||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | /* caller must clean up in case of errors */ | ||
268 | static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv) | ||
269 | { | ||
270 | struct xenbus_transaction xbt; | ||
271 | const char *message = NULL; | ||
272 | int rv; | ||
273 | |||
274 | priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); | ||
275 | if (!priv->shr) { | ||
276 | xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); | ||
277 | return -ENOMEM; | ||
278 | } | ||
279 | |||
280 | rv = xenbus_grant_ring(dev, virt_to_mfn(priv->shr)); | ||
281 | if (rv < 0) | ||
282 | return rv; | ||
283 | |||
284 | priv->ring_ref = rv; | ||
285 | |||
286 | rv = xenbus_alloc_evtchn(dev, &priv->evtchn); | ||
287 | if (rv) | ||
288 | return rv; | ||
289 | |||
290 | rv = bind_evtchn_to_irqhandler(priv->evtchn, tpmif_interrupt, 0, | ||
291 | "tpmif", priv); | ||
292 | if (rv <= 0) { | ||
293 | xenbus_dev_fatal(dev, rv, "allocating TPM irq"); | ||
294 | return rv; | ||
295 | } | ||
296 | priv->chip->vendor.irq = rv; | ||
297 | |||
298 | again: | ||
299 | rv = xenbus_transaction_start(&xbt); | ||
300 | if (rv) { | ||
301 | xenbus_dev_fatal(dev, rv, "starting transaction"); | ||
302 | return rv; | ||
303 | } | ||
304 | |||
305 | rv = xenbus_printf(xbt, dev->nodename, | ||
306 | "ring-ref", "%u", priv->ring_ref); | ||
307 | if (rv) { | ||
308 | message = "writing ring-ref"; | ||
309 | goto abort_transaction; | ||
310 | } | ||
311 | |||
312 | rv = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", | ||
313 | priv->evtchn); | ||
314 | if (rv) { | ||
315 | message = "writing event-channel"; | ||
316 | goto abort_transaction; | ||
317 | } | ||
318 | |||
319 | rv = xenbus_printf(xbt, dev->nodename, "feature-protocol-v2", "1"); | ||
320 | if (rv) { | ||
321 | message = "writing feature-protocol-v2"; | ||
322 | goto abort_transaction; | ||
323 | } | ||
324 | |||
325 | rv = xenbus_transaction_end(xbt, 0); | ||
326 | if (rv == -EAGAIN) | ||
327 | goto again; | ||
328 | if (rv) { | ||
329 | xenbus_dev_fatal(dev, rv, "completing transaction"); | ||
330 | return rv; | ||
331 | } | ||
332 | |||
333 | xenbus_switch_state(dev, XenbusStateInitialised); | ||
334 | |||
335 | return 0; | ||
336 | |||
337 | abort_transaction: | ||
338 | xenbus_transaction_end(xbt, 1); | ||
339 | if (message) | ||
340 | xenbus_dev_error(dev, rv, "%s", message); | ||
341 | |||
342 | return rv; | ||
343 | } | ||
344 | |||
345 | static void ring_free(struct tpm_private *priv) | ||
346 | { | ||
347 | if (!priv) | ||
348 | return; | ||
349 | |||
350 | if (priv->ring_ref) | ||
351 | gnttab_end_foreign_access(priv->ring_ref, 0, | ||
352 | (unsigned long)priv->shr); | ||
353 | else | ||
354 | free_page((unsigned long)priv->shr); | ||
355 | |||
356 | if (priv->chip && priv->chip->vendor.irq) | ||
357 | unbind_from_irqhandler(priv->chip->vendor.irq, priv); | ||
358 | |||
359 | kfree(priv); | ||
360 | } | ||
361 | |||
362 | static int tpmfront_probe(struct xenbus_device *dev, | ||
363 | const struct xenbus_device_id *id) | ||
364 | { | ||
365 | struct tpm_private *priv; | ||
366 | int rv; | ||
367 | |||
368 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
369 | if (!priv) { | ||
370 | xenbus_dev_fatal(dev, -ENOMEM, "allocating priv structure"); | ||
371 | return -ENOMEM; | ||
372 | } | ||
373 | |||
374 | rv = setup_chip(&dev->dev, priv); | ||
375 | if (rv) { | ||
376 | kfree(priv); | ||
377 | return rv; | ||
378 | } | ||
379 | |||
380 | rv = setup_ring(dev, priv); | ||
381 | if (rv) { | ||
382 | tpm_remove_hardware(&dev->dev); | ||
383 | ring_free(priv); | ||
384 | return rv; | ||
385 | } | ||
386 | |||
387 | tpm_get_timeouts(priv->chip); | ||
388 | |||
389 | dev_set_drvdata(&dev->dev, priv->chip); | ||
390 | |||
391 | return rv; | ||
392 | } | ||
393 | |||
394 | static int tpmfront_remove(struct xenbus_device *dev) | ||
395 | { | ||
396 | struct tpm_chip *chip = dev_get_drvdata(&dev->dev); | ||
397 | struct tpm_private *priv = TPM_VPRIV(chip); | ||
398 | tpm_remove_hardware(&dev->dev); | ||
399 | ring_free(priv); | ||
400 | TPM_VPRIV(chip) = NULL; | ||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | static int tpmfront_resume(struct xenbus_device *dev) | ||
405 | { | ||
406 | /* A suspend/resume/migrate will interrupt a vTPM anyway */ | ||
407 | tpmfront_remove(dev); | ||
408 | return tpmfront_probe(dev, NULL); | ||
409 | } | ||
410 | |||
411 | static void backend_changed(struct xenbus_device *dev, | ||
412 | enum xenbus_state backend_state) | ||
413 | { | ||
414 | int val; | ||
415 | |||
416 | switch (backend_state) { | ||
417 | case XenbusStateInitialised: | ||
418 | case XenbusStateConnected: | ||
419 | if (dev->state == XenbusStateConnected) | ||
420 | break; | ||
421 | |||
422 | if (xenbus_scanf(XBT_NIL, dev->otherend, | ||
423 | "feature-protocol-v2", "%d", &val) < 0) | ||
424 | val = 0; | ||
425 | if (!val) { | ||
426 | xenbus_dev_fatal(dev, -EINVAL, | ||
427 | "vTPM protocol 2 required"); | ||
428 | return; | ||
429 | } | ||
430 | xenbus_switch_state(dev, XenbusStateConnected); | ||
431 | break; | ||
432 | |||
433 | case XenbusStateClosing: | ||
434 | case XenbusStateClosed: | ||
435 | device_unregister(&dev->dev); | ||
436 | xenbus_frontend_closed(dev); | ||
437 | break; | ||
438 | default: | ||
439 | break; | ||
440 | } | ||
441 | } | ||
442 | |||
443 | static const struct xenbus_device_id tpmfront_ids[] = { | ||
444 | { "vtpm" }, | ||
445 | { "" } | ||
446 | }; | ||
447 | MODULE_ALIAS("xen:vtpm"); | ||
448 | |||
449 | static DEFINE_XENBUS_DRIVER(tpmfront, , | ||
450 | .probe = tpmfront_probe, | ||
451 | .remove = tpmfront_remove, | ||
452 | .resume = tpmfront_resume, | ||
453 | .otherend_changed = backend_changed, | ||
454 | ); | ||
455 | |||
456 | static int __init xen_tpmfront_init(void) | ||
457 | { | ||
458 | if (!xen_domain()) | ||
459 | return -ENODEV; | ||
460 | |||
461 | return xenbus_register_frontend(&tpmfront_driver); | ||
462 | } | ||
463 | module_init(xen_tpmfront_init); | ||
464 | |||
465 | static void __exit xen_tpmfront_exit(void) | ||
466 | { | ||
467 | xenbus_unregister_driver(&tpmfront_driver); | ||
468 | } | ||
469 | module_exit(xen_tpmfront_exit); | ||
470 | |||
471 | MODULE_AUTHOR("Daniel De Graaf <dgdegra@tycho.nsa.gov>"); | ||
472 | MODULE_DESCRIPTION("Xen vTPM Driver"); | ||
473 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 682210d778bd..e61c36cbb866 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c | |||
@@ -208,7 +208,7 @@ static int xen_hvm_console_init(void) | |||
208 | 208 | ||
209 | info = vtermno_to_xencons(HVC_COOKIE); | 209 | info = vtermno_to_xencons(HVC_COOKIE); |
210 | if (!info) { | 210 | if (!info) { |
211 | info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); | 211 | info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); |
212 | if (!info) | 212 | if (!info) |
213 | return -ENOMEM; | 213 | return -ENOMEM; |
214 | } else if (info->intf != NULL) { | 214 | } else if (info->intf != NULL) { |
@@ -257,7 +257,7 @@ static int xen_pv_console_init(void) | |||
257 | 257 | ||
258 | info = vtermno_to_xencons(HVC_COOKIE); | 258 | info = vtermno_to_xencons(HVC_COOKIE); |
259 | if (!info) { | 259 | if (!info) { |
260 | info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); | 260 | info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); |
261 | if (!info) | 261 | if (!info) |
262 | return -ENOMEM; | 262 | return -ENOMEM; |
263 | } else if (info->intf != NULL) { | 263 | } else if (info->intf != NULL) { |
@@ -284,7 +284,7 @@ static int xen_initial_domain_console_init(void) | |||
284 | 284 | ||
285 | info = vtermno_to_xencons(HVC_COOKIE); | 285 | info = vtermno_to_xencons(HVC_COOKIE); |
286 | if (!info) { | 286 | if (!info) { |
287 | info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); | 287 | info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); |
288 | if (!info) | 288 | if (!info) |
289 | return -ENOMEM; | 289 | return -ENOMEM; |
290 | } | 290 | } |
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 2a2ef97697b2..3101cf6daf56 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c | |||
@@ -38,6 +38,7 @@ | |||
38 | 38 | ||
39 | #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt | 39 | #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt |
40 | 40 | ||
41 | #include <linux/cpu.h> | ||
41 | #include <linux/kernel.h> | 42 | #include <linux/kernel.h> |
42 | #include <linux/sched.h> | 43 | #include <linux/sched.h> |
43 | #include <linux/errno.h> | 44 | #include <linux/errno.h> |
@@ -52,6 +53,7 @@ | |||
52 | #include <linux/notifier.h> | 53 | #include <linux/notifier.h> |
53 | #include <linux/memory.h> | 54 | #include <linux/memory.h> |
54 | #include <linux/memory_hotplug.h> | 55 | #include <linux/memory_hotplug.h> |
56 | #include <linux/percpu-defs.h> | ||
55 | 57 | ||
56 | #include <asm/page.h> | 58 | #include <asm/page.h> |
57 | #include <asm/pgalloc.h> | 59 | #include <asm/pgalloc.h> |
@@ -90,6 +92,8 @@ EXPORT_SYMBOL_GPL(balloon_stats); | |||
90 | 92 | ||
91 | /* We increase/decrease in batches which fit in a page */ | 93 | /* We increase/decrease in batches which fit in a page */ |
92 | static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; | 94 | static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; |
95 | static DEFINE_PER_CPU(struct page *, balloon_scratch_page); | ||
96 | |||
93 | 97 | ||
94 | /* List of ballooned pages, threaded through the mem_map array. */ | 98 | /* List of ballooned pages, threaded through the mem_map array. */ |
95 | static LIST_HEAD(ballooned_pages); | 99 | static LIST_HEAD(ballooned_pages); |
@@ -412,7 +416,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) | |||
412 | if (xen_pv_domain() && !PageHighMem(page)) { | 416 | if (xen_pv_domain() && !PageHighMem(page)) { |
413 | ret = HYPERVISOR_update_va_mapping( | 417 | ret = HYPERVISOR_update_va_mapping( |
414 | (unsigned long)__va(pfn << PAGE_SHIFT), | 418 | (unsigned long)__va(pfn << PAGE_SHIFT), |
415 | __pte_ma(0), 0); | 419 | pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)), |
420 | PAGE_KERNEL_RO), 0); | ||
416 | BUG_ON(ret); | 421 | BUG_ON(ret); |
417 | } | 422 | } |
418 | #endif | 423 | #endif |
@@ -425,7 +430,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) | |||
425 | /* No more mappings: invalidate P2M and add to balloon. */ | 430 | /* No more mappings: invalidate P2M and add to balloon. */ |
426 | for (i = 0; i < nr_pages; i++) { | 431 | for (i = 0; i < nr_pages; i++) { |
427 | pfn = mfn_to_pfn(frame_list[i]); | 432 | pfn = mfn_to_pfn(frame_list[i]); |
428 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 433 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
434 | unsigned long p; | ||
435 | struct page *pg; | ||
436 | pg = __get_cpu_var(balloon_scratch_page); | ||
437 | p = page_to_pfn(pg); | ||
438 | __set_phys_to_machine(pfn, pfn_to_mfn(p)); | ||
439 | } | ||
429 | balloon_append(pfn_to_page(pfn)); | 440 | balloon_append(pfn_to_page(pfn)); |
430 | } | 441 | } |
431 | 442 | ||
@@ -480,6 +491,18 @@ static void balloon_process(struct work_struct *work) | |||
480 | mutex_unlock(&balloon_mutex); | 491 | mutex_unlock(&balloon_mutex); |
481 | } | 492 | } |
482 | 493 | ||
494 | struct page *get_balloon_scratch_page(void) | ||
495 | { | ||
496 | struct page *ret = get_cpu_var(balloon_scratch_page); | ||
497 | BUG_ON(ret == NULL); | ||
498 | return ret; | ||
499 | } | ||
500 | |||
501 | void put_balloon_scratch_page(void) | ||
502 | { | ||
503 | put_cpu_var(balloon_scratch_page); | ||
504 | } | ||
505 | |||
483 | /* Resets the Xen limit, sets new target, and kicks off processing. */ | 506 | /* Resets the Xen limit, sets new target, and kicks off processing. */ |
484 | void balloon_set_new_target(unsigned long target) | 507 | void balloon_set_new_target(unsigned long target) |
485 | { | 508 | { |
@@ -573,13 +596,47 @@ static void __init balloon_add_region(unsigned long start_pfn, | |||
573 | } | 596 | } |
574 | } | 597 | } |
575 | 598 | ||
599 | static int __cpuinit balloon_cpu_notify(struct notifier_block *self, | ||
600 | unsigned long action, void *hcpu) | ||
601 | { | ||
602 | int cpu = (long)hcpu; | ||
603 | switch (action) { | ||
604 | case CPU_UP_PREPARE: | ||
605 | if (per_cpu(balloon_scratch_page, cpu) != NULL) | ||
606 | break; | ||
607 | per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); | ||
608 | if (per_cpu(balloon_scratch_page, cpu) == NULL) { | ||
609 | pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); | ||
610 | return NOTIFY_BAD; | ||
611 | } | ||
612 | break; | ||
613 | default: | ||
614 | break; | ||
615 | } | ||
616 | return NOTIFY_OK; | ||
617 | } | ||
618 | |||
619 | static struct notifier_block balloon_cpu_notifier __cpuinitdata = { | ||
620 | .notifier_call = balloon_cpu_notify, | ||
621 | }; | ||
622 | |||
576 | static int __init balloon_init(void) | 623 | static int __init balloon_init(void) |
577 | { | 624 | { |
578 | int i; | 625 | int i, cpu; |
579 | 626 | ||
580 | if (!xen_domain()) | 627 | if (!xen_domain()) |
581 | return -ENODEV; | 628 | return -ENODEV; |
582 | 629 | ||
630 | for_each_online_cpu(cpu) | ||
631 | { | ||
632 | per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); | ||
633 | if (per_cpu(balloon_scratch_page, cpu) == NULL) { | ||
634 | pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); | ||
635 | return -ENOMEM; | ||
636 | } | ||
637 | } | ||
638 | register_cpu_notifier(&balloon_cpu_notifier); | ||
639 | |||
583 | pr_info("Initialising balloon driver\n"); | 640 | pr_info("Initialising balloon driver\n"); |
584 | 641 | ||
585 | balloon_stats.current_pages = xen_pv_domain() | 642 | balloon_stats.current_pages = xen_pv_domain() |
@@ -616,4 +673,15 @@ static int __init balloon_init(void) | |||
616 | 673 | ||
617 | subsys_initcall(balloon_init); | 674 | subsys_initcall(balloon_init); |
618 | 675 | ||
676 | static int __init balloon_clear(void) | ||
677 | { | ||
678 | int cpu; | ||
679 | |||
680 | for_each_possible_cpu(cpu) | ||
681 | per_cpu(balloon_scratch_page, cpu) = NULL; | ||
682 | |||
683 | return 0; | ||
684 | } | ||
685 | early_initcall(balloon_clear); | ||
686 | |||
619 | MODULE_LICENSE("GPL"); | 687 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 5e8be462aed5..4035e833ea26 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <xen/interface/hvm/params.h> | 56 | #include <xen/interface/hvm/params.h> |
57 | #include <xen/interface/physdev.h> | 57 | #include <xen/interface/physdev.h> |
58 | #include <xen/interface/sched.h> | 58 | #include <xen/interface/sched.h> |
59 | #include <xen/interface/vcpu.h> | ||
59 | #include <asm/hw_irq.h> | 60 | #include <asm/hw_irq.h> |
60 | 61 | ||
61 | /* | 62 | /* |
@@ -1212,7 +1213,17 @@ EXPORT_SYMBOL_GPL(evtchn_put); | |||
1212 | 1213 | ||
1213 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) | 1214 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) |
1214 | { | 1215 | { |
1215 | int irq = per_cpu(ipi_to_irq, cpu)[vector]; | 1216 | int irq; |
1217 | |||
1218 | #ifdef CONFIG_X86 | ||
1219 | if (unlikely(vector == XEN_NMI_VECTOR)) { | ||
1220 | int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL); | ||
1221 | if (rc < 0) | ||
1222 | printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc); | ||
1223 | return; | ||
1224 | } | ||
1225 | #endif | ||
1226 | irq = per_cpu(ipi_to_irq, cpu)[vector]; | ||
1216 | BUG_ON(irq < 0); | 1227 | BUG_ON(irq < 0); |
1217 | notify_remote_via_irq(irq); | 1228 | notify_remote_via_irq(irq); |
1218 | } | 1229 | } |
@@ -1379,14 +1390,21 @@ static void __xen_evtchn_do_upcall(void) | |||
1379 | 1390 | ||
1380 | pending_bits = active_evtchns(cpu, s, word_idx); | 1391 | pending_bits = active_evtchns(cpu, s, word_idx); |
1381 | bit_idx = 0; /* usually scan entire word from start */ | 1392 | bit_idx = 0; /* usually scan entire word from start */ |
1393 | /* | ||
1394 | * We scan the starting word in two parts. | ||
1395 | * | ||
1396 | * 1st time: start in the middle, scanning the | ||
1397 | * upper bits. | ||
1398 | * | ||
1399 | * 2nd time: scan the whole word (not just the | ||
1400 | * parts skipped in the first pass) -- if an | ||
1401 | * event in the previously scanned bits is | ||
1402 | * pending again it would just be scanned on | ||
1403 | * the next loop anyway. | ||
1404 | */ | ||
1382 | if (word_idx == start_word_idx) { | 1405 | if (word_idx == start_word_idx) { |
1383 | /* We scan the starting word in two parts */ | ||
1384 | if (i == 0) | 1406 | if (i == 0) |
1385 | /* 1st time: start in the middle */ | ||
1386 | bit_idx = start_bit_idx; | 1407 | bit_idx = start_bit_idx; |
1387 | else | ||
1388 | /* 2nd time: mask bits done already */ | ||
1389 | bit_idx &= (1UL << start_bit_idx) - 1; | ||
1390 | } | 1408 | } |
1391 | 1409 | ||
1392 | do { | 1410 | do { |
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index b6165e047f48..8b3a69a06c39 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c | |||
@@ -57,6 +57,7 @@ | |||
57 | 57 | ||
58 | struct per_user_data { | 58 | struct per_user_data { |
59 | struct mutex bind_mutex; /* serialize bind/unbind operations */ | 59 | struct mutex bind_mutex; /* serialize bind/unbind operations */ |
60 | struct rb_root evtchns; | ||
60 | 61 | ||
61 | /* Notification ring, accessed via /dev/xen/evtchn. */ | 62 | /* Notification ring, accessed via /dev/xen/evtchn. */ |
62 | #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) | 63 | #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) |
@@ -64,6 +65,7 @@ struct per_user_data { | |||
64 | evtchn_port_t *ring; | 65 | evtchn_port_t *ring; |
65 | unsigned int ring_cons, ring_prod, ring_overflow; | 66 | unsigned int ring_cons, ring_prod, ring_overflow; |
66 | struct mutex ring_cons_mutex; /* protect against concurrent readers */ | 67 | struct mutex ring_cons_mutex; /* protect against concurrent readers */ |
68 | spinlock_t ring_prod_lock; /* product against concurrent interrupts */ | ||
67 | 69 | ||
68 | /* Processes wait on this queue when ring is empty. */ | 70 | /* Processes wait on this queue when ring is empty. */ |
69 | wait_queue_head_t evtchn_wait; | 71 | wait_queue_head_t evtchn_wait; |
@@ -71,54 +73,79 @@ struct per_user_data { | |||
71 | const char *name; | 73 | const char *name; |
72 | }; | 74 | }; |
73 | 75 | ||
74 | /* | 76 | struct user_evtchn { |
75 | * Who's bound to each port? This is logically an array of struct | 77 | struct rb_node node; |
76 | * per_user_data *, but we encode the current enabled-state in bit 0. | 78 | struct per_user_data *user; |
77 | */ | 79 | unsigned port; |
78 | static unsigned long *port_user; | 80 | bool enabled; |
79 | static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ | 81 | }; |
80 | 82 | ||
81 | static inline struct per_user_data *get_port_user(unsigned port) | 83 | static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) |
82 | { | 84 | { |
83 | return (struct per_user_data *)(port_user[port] & ~1); | 85 | struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; |
84 | } | ||
85 | 86 | ||
86 | static inline void set_port_user(unsigned port, struct per_user_data *u) | 87 | while (*new) { |
87 | { | 88 | struct user_evtchn *this; |
88 | port_user[port] = (unsigned long)u; | 89 | |
90 | this = container_of(*new, struct user_evtchn, node); | ||
91 | |||
92 | parent = *new; | ||
93 | if (this->port < evtchn->port) | ||
94 | new = &((*new)->rb_left); | ||
95 | else if (this->port > evtchn->port) | ||
96 | new = &((*new)->rb_right); | ||
97 | else | ||
98 | return -EEXIST; | ||
99 | } | ||
100 | |||
101 | /* Add new node and rebalance tree. */ | ||
102 | rb_link_node(&evtchn->node, parent, new); | ||
103 | rb_insert_color(&evtchn->node, &u->evtchns); | ||
104 | |||
105 | return 0; | ||
89 | } | 106 | } |
90 | 107 | ||
91 | static inline bool get_port_enabled(unsigned port) | 108 | static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) |
92 | { | 109 | { |
93 | return port_user[port] & 1; | 110 | rb_erase(&evtchn->node, &u->evtchns); |
111 | kfree(evtchn); | ||
94 | } | 112 | } |
95 | 113 | ||
96 | static inline void set_port_enabled(unsigned port, bool enabled) | 114 | static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port) |
97 | { | 115 | { |
98 | if (enabled) | 116 | struct rb_node *node = u->evtchns.rb_node; |
99 | port_user[port] |= 1; | 117 | |
100 | else | 118 | while (node) { |
101 | port_user[port] &= ~1; | 119 | struct user_evtchn *evtchn; |
120 | |||
121 | evtchn = container_of(node, struct user_evtchn, node); | ||
122 | |||
123 | if (evtchn->port < port) | ||
124 | node = node->rb_left; | ||
125 | else if (evtchn->port > port) | ||
126 | node = node->rb_right; | ||
127 | else | ||
128 | return evtchn; | ||
129 | } | ||
130 | return NULL; | ||
102 | } | 131 | } |
103 | 132 | ||
104 | static irqreturn_t evtchn_interrupt(int irq, void *data) | 133 | static irqreturn_t evtchn_interrupt(int irq, void *data) |
105 | { | 134 | { |
106 | unsigned int port = (unsigned long)data; | 135 | struct user_evtchn *evtchn = data; |
107 | struct per_user_data *u; | 136 | struct per_user_data *u = evtchn->user; |
108 | |||
109 | spin_lock(&port_user_lock); | ||
110 | |||
111 | u = get_port_user(port); | ||
112 | 137 | ||
113 | WARN(!get_port_enabled(port), | 138 | WARN(!evtchn->enabled, |
114 | "Interrupt for port %d, but apparently not enabled; per-user %p\n", | 139 | "Interrupt for port %d, but apparently not enabled; per-user %p\n", |
115 | port, u); | 140 | evtchn->port, u); |
116 | 141 | ||
117 | disable_irq_nosync(irq); | 142 | disable_irq_nosync(irq); |
118 | set_port_enabled(port, false); | 143 | evtchn->enabled = false; |
144 | |||
145 | spin_lock(&u->ring_prod_lock); | ||
119 | 146 | ||
120 | if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { | 147 | if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { |
121 | u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; | 148 | u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; |
122 | wmb(); /* Ensure ring contents visible */ | 149 | wmb(); /* Ensure ring contents visible */ |
123 | if (u->ring_cons == u->ring_prod++) { | 150 | if (u->ring_cons == u->ring_prod++) { |
124 | wake_up_interruptible(&u->evtchn_wait); | 151 | wake_up_interruptible(&u->evtchn_wait); |
@@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) | |||
128 | } else | 155 | } else |
129 | u->ring_overflow = 1; | 156 | u->ring_overflow = 1; |
130 | 157 | ||
131 | spin_unlock(&port_user_lock); | 158 | spin_unlock(&u->ring_prod_lock); |
132 | 159 | ||
133 | return IRQ_HANDLED; | 160 | return IRQ_HANDLED; |
134 | } | 161 | } |
@@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, | |||
229 | if (copy_from_user(kbuf, buf, count) != 0) | 256 | if (copy_from_user(kbuf, buf, count) != 0) |
230 | goto out; | 257 | goto out; |
231 | 258 | ||
232 | spin_lock_irq(&port_user_lock); | 259 | mutex_lock(&u->bind_mutex); |
233 | 260 | ||
234 | for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { | 261 | for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { |
235 | unsigned port = kbuf[i]; | 262 | unsigned port = kbuf[i]; |
263 | struct user_evtchn *evtchn; | ||
236 | 264 | ||
237 | if (port < NR_EVENT_CHANNELS && | 265 | evtchn = find_evtchn(u, port); |
238 | get_port_user(port) == u && | 266 | if (evtchn && !evtchn->enabled) { |
239 | !get_port_enabled(port)) { | 267 | evtchn->enabled = true; |
240 | set_port_enabled(port, true); | ||
241 | enable_irq(irq_from_evtchn(port)); | 268 | enable_irq(irq_from_evtchn(port)); |
242 | } | 269 | } |
243 | } | 270 | } |
244 | 271 | ||
245 | spin_unlock_irq(&port_user_lock); | 272 | mutex_unlock(&u->bind_mutex); |
246 | 273 | ||
247 | rc = count; | 274 | rc = count; |
248 | 275 | ||
@@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, | |||
253 | 280 | ||
254 | static int evtchn_bind_to_user(struct per_user_data *u, int port) | 281 | static int evtchn_bind_to_user(struct per_user_data *u, int port) |
255 | { | 282 | { |
283 | struct user_evtchn *evtchn; | ||
284 | struct evtchn_close close; | ||
256 | int rc = 0; | 285 | int rc = 0; |
257 | 286 | ||
258 | /* | 287 | /* |
@@ -263,35 +292,46 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) | |||
263 | * interrupt handler yet, and our caller has already | 292 | * interrupt handler yet, and our caller has already |
264 | * serialized bind operations.) | 293 | * serialized bind operations.) |
265 | */ | 294 | */ |
266 | BUG_ON(get_port_user(port) != NULL); | 295 | |
267 | set_port_user(port, u); | 296 | evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL); |
268 | set_port_enabled(port, true); /* start enabled */ | 297 | if (!evtchn) |
298 | return -ENOMEM; | ||
299 | |||
300 | evtchn->user = u; | ||
301 | evtchn->port = port; | ||
302 | evtchn->enabled = true; /* start enabled */ | ||
303 | |||
304 | rc = add_evtchn(u, evtchn); | ||
305 | if (rc < 0) | ||
306 | goto err; | ||
269 | 307 | ||
270 | rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, | 308 | rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, |
271 | u->name, (void *)(unsigned long)port); | 309 | u->name, evtchn); |
272 | if (rc >= 0) | 310 | if (rc < 0) |
273 | rc = evtchn_make_refcounted(port); | 311 | goto err; |
274 | else { | ||
275 | /* bind failed, should close the port now */ | ||
276 | struct evtchn_close close; | ||
277 | close.port = port; | ||
278 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) | ||
279 | BUG(); | ||
280 | set_port_user(port, NULL); | ||
281 | } | ||
282 | 312 | ||
313 | rc = evtchn_make_refcounted(port); | ||
314 | return rc; | ||
315 | |||
316 | err: | ||
317 | /* bind failed, should close the port now */ | ||
318 | close.port = port; | ||
319 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) | ||
320 | BUG(); | ||
321 | del_evtchn(u, evtchn); | ||
283 | return rc; | 322 | return rc; |
284 | } | 323 | } |
285 | 324 | ||
286 | static void evtchn_unbind_from_user(struct per_user_data *u, int port) | 325 | static void evtchn_unbind_from_user(struct per_user_data *u, |
326 | struct user_evtchn *evtchn) | ||
287 | { | 327 | { |
288 | int irq = irq_from_evtchn(port); | 328 | int irq = irq_from_evtchn(evtchn->port); |
289 | 329 | ||
290 | BUG_ON(irq < 0); | 330 | BUG_ON(irq < 0); |
291 | 331 | ||
292 | unbind_from_irqhandler(irq, (void *)(unsigned long)port); | 332 | unbind_from_irqhandler(irq, evtchn); |
293 | 333 | ||
294 | set_port_user(port, NULL); | 334 | del_evtchn(u, evtchn); |
295 | } | 335 | } |
296 | 336 | ||
297 | static long evtchn_ioctl(struct file *file, | 337 | static long evtchn_ioctl(struct file *file, |
@@ -370,6 +410,7 @@ static long evtchn_ioctl(struct file *file, | |||
370 | 410 | ||
371 | case IOCTL_EVTCHN_UNBIND: { | 411 | case IOCTL_EVTCHN_UNBIND: { |
372 | struct ioctl_evtchn_unbind unbind; | 412 | struct ioctl_evtchn_unbind unbind; |
413 | struct user_evtchn *evtchn; | ||
373 | 414 | ||
374 | rc = -EFAULT; | 415 | rc = -EFAULT; |
375 | if (copy_from_user(&unbind, uarg, sizeof(unbind))) | 416 | if (copy_from_user(&unbind, uarg, sizeof(unbind))) |
@@ -380,29 +421,27 @@ static long evtchn_ioctl(struct file *file, | |||
380 | break; | 421 | break; |
381 | 422 | ||
382 | rc = -ENOTCONN; | 423 | rc = -ENOTCONN; |
383 | if (get_port_user(unbind.port) != u) | 424 | evtchn = find_evtchn(u, unbind.port); |
425 | if (!evtchn) | ||
384 | break; | 426 | break; |
385 | 427 | ||
386 | disable_irq(irq_from_evtchn(unbind.port)); | 428 | disable_irq(irq_from_evtchn(unbind.port)); |
387 | 429 | evtchn_unbind_from_user(u, evtchn); | |
388 | evtchn_unbind_from_user(u, unbind.port); | ||
389 | |||
390 | rc = 0; | 430 | rc = 0; |
391 | break; | 431 | break; |
392 | } | 432 | } |
393 | 433 | ||
394 | case IOCTL_EVTCHN_NOTIFY: { | 434 | case IOCTL_EVTCHN_NOTIFY: { |
395 | struct ioctl_evtchn_notify notify; | 435 | struct ioctl_evtchn_notify notify; |
436 | struct user_evtchn *evtchn; | ||
396 | 437 | ||
397 | rc = -EFAULT; | 438 | rc = -EFAULT; |
398 | if (copy_from_user(¬ify, uarg, sizeof(notify))) | 439 | if (copy_from_user(¬ify, uarg, sizeof(notify))) |
399 | break; | 440 | break; |
400 | 441 | ||
401 | if (notify.port >= NR_EVENT_CHANNELS) { | 442 | rc = -ENOTCONN; |
402 | rc = -EINVAL; | 443 | evtchn = find_evtchn(u, notify.port); |
403 | } else if (get_port_user(notify.port) != u) { | 444 | if (evtchn) { |
404 | rc = -ENOTCONN; | ||
405 | } else { | ||
406 | notify_remote_via_evtchn(notify.port); | 445 | notify_remote_via_evtchn(notify.port); |
407 | rc = 0; | 446 | rc = 0; |
408 | } | 447 | } |
@@ -412,9 +451,9 @@ static long evtchn_ioctl(struct file *file, | |||
412 | case IOCTL_EVTCHN_RESET: { | 451 | case IOCTL_EVTCHN_RESET: { |
413 | /* Initialise the ring to empty. Clear errors. */ | 452 | /* Initialise the ring to empty. Clear errors. */ |
414 | mutex_lock(&u->ring_cons_mutex); | 453 | mutex_lock(&u->ring_cons_mutex); |
415 | spin_lock_irq(&port_user_lock); | 454 | spin_lock_irq(&u->ring_prod_lock); |
416 | u->ring_cons = u->ring_prod = u->ring_overflow = 0; | 455 | u->ring_cons = u->ring_prod = u->ring_overflow = 0; |
417 | spin_unlock_irq(&port_user_lock); | 456 | spin_unlock_irq(&u->ring_prod_lock); |
418 | mutex_unlock(&u->ring_cons_mutex); | 457 | mutex_unlock(&u->ring_cons_mutex); |
419 | rc = 0; | 458 | rc = 0; |
420 | break; | 459 | break; |
@@ -473,6 +512,7 @@ static int evtchn_open(struct inode *inode, struct file *filp) | |||
473 | 512 | ||
474 | mutex_init(&u->bind_mutex); | 513 | mutex_init(&u->bind_mutex); |
475 | mutex_init(&u->ring_cons_mutex); | 514 | mutex_init(&u->ring_cons_mutex); |
515 | spin_lock_init(&u->ring_prod_lock); | ||
476 | 516 | ||
477 | filp->private_data = u; | 517 | filp->private_data = u; |
478 | 518 | ||
@@ -481,15 +521,15 @@ static int evtchn_open(struct inode *inode, struct file *filp) | |||
481 | 521 | ||
482 | static int evtchn_release(struct inode *inode, struct file *filp) | 522 | static int evtchn_release(struct inode *inode, struct file *filp) |
483 | { | 523 | { |
484 | int i; | ||
485 | struct per_user_data *u = filp->private_data; | 524 | struct per_user_data *u = filp->private_data; |
525 | struct rb_node *node; | ||
486 | 526 | ||
487 | for (i = 0; i < NR_EVENT_CHANNELS; i++) { | 527 | while ((node = u->evtchns.rb_node)) { |
488 | if (get_port_user(i) != u) | 528 | struct user_evtchn *evtchn; |
489 | continue; | ||
490 | 529 | ||
491 | disable_irq(irq_from_evtchn(i)); | 530 | evtchn = rb_entry(node, struct user_evtchn, node); |
492 | evtchn_unbind_from_user(get_port_user(i), i); | 531 | disable_irq(irq_from_evtchn(evtchn->port)); |
532 | evtchn_unbind_from_user(u, evtchn); | ||
493 | } | 533 | } |
494 | 534 | ||
495 | free_page((unsigned long)u->ring); | 535 | free_page((unsigned long)u->ring); |
@@ -523,12 +563,6 @@ static int __init evtchn_init(void) | |||
523 | if (!xen_domain()) | 563 | if (!xen_domain()) |
524 | return -ENODEV; | 564 | return -ENODEV; |
525 | 565 | ||
526 | port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); | ||
527 | if (port_user == NULL) | ||
528 | return -ENOMEM; | ||
529 | |||
530 | spin_lock_init(&port_user_lock); | ||
531 | |||
532 | /* Create '/dev/xen/evtchn'. */ | 566 | /* Create '/dev/xen/evtchn'. */ |
533 | err = misc_register(&evtchn_miscdev); | 567 | err = misc_register(&evtchn_miscdev); |
534 | if (err != 0) { | 568 | if (err != 0) { |
@@ -543,9 +577,6 @@ static int __init evtchn_init(void) | |||
543 | 577 | ||
544 | static void __exit evtchn_cleanup(void) | 578 | static void __exit evtchn_cleanup(void) |
545 | { | 579 | { |
546 | kfree(port_user); | ||
547 | port_user = NULL; | ||
548 | |||
549 | misc_deregister(&evtchn_miscdev); | 580 | misc_deregister(&evtchn_miscdev); |
550 | } | 581 | } |
551 | 582 | ||
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index eab5427c75f5..e41c79c986ea 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c | |||
@@ -272,19 +272,12 @@ static int map_grant_pages(struct grant_map *map) | |||
272 | * with find_grant_ptes. | 272 | * with find_grant_ptes. |
273 | */ | 273 | */ |
274 | for (i = 0; i < map->count; i++) { | 274 | for (i = 0; i < map->count; i++) { |
275 | unsigned level; | ||
276 | unsigned long address = (unsigned long) | 275 | unsigned long address = (unsigned long) |
277 | pfn_to_kaddr(page_to_pfn(map->pages[i])); | 276 | pfn_to_kaddr(page_to_pfn(map->pages[i])); |
278 | pte_t *ptep; | ||
279 | u64 pte_maddr = 0; | ||
280 | BUG_ON(PageHighMem(map->pages[i])); | 277 | BUG_ON(PageHighMem(map->pages[i])); |
281 | 278 | ||
282 | ptep = lookup_address(address, &level); | 279 | gnttab_set_map_op(&map->kmap_ops[i], address, |
283 | pte_maddr = arbitrary_virt_to_machine(ptep).maddr; | 280 | map->flags | GNTMAP_host_map, |
284 | gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, | ||
285 | map->flags | | ||
286 | GNTMAP_host_map | | ||
287 | GNTMAP_contains_pte, | ||
288 | map->grants[i].ref, | 281 | map->grants[i].ref, |
289 | map->grants[i].domid); | 282 | map->grants[i].domid); |
290 | } | 283 | } |
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 04cdeb8e3719..c4d2298893b1 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c | |||
@@ -730,9 +730,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback, | |||
730 | void (*fn)(void *), void *arg, u16 count) | 730 | void (*fn)(void *), void *arg, u16 count) |
731 | { | 731 | { |
732 | unsigned long flags; | 732 | unsigned long flags; |
733 | struct gnttab_free_callback *cb; | ||
734 | |||
733 | spin_lock_irqsave(&gnttab_list_lock, flags); | 735 | spin_lock_irqsave(&gnttab_list_lock, flags); |
734 | if (callback->next) | 736 | |
735 | goto out; | 737 | /* Check if the callback is already on the list */ |
738 | cb = gnttab_free_callback_list; | ||
739 | while (cb) { | ||
740 | if (cb == callback) | ||
741 | goto out; | ||
742 | cb = cb->next; | ||
743 | } | ||
744 | |||
736 | callback->fn = fn; | 745 | callback->fn = fn; |
737 | callback->arg = arg; | 746 | callback->arg = arg; |
738 | callback->count = count; | 747 | callback->count = count; |
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index f8e5dd701ecb..8e74590fa1bb 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c | |||
@@ -43,9 +43,10 @@ MODULE_LICENSE("GPL"); | |||
43 | 43 | ||
44 | #define PRIV_VMA_LOCKED ((void *)1) | 44 | #define PRIV_VMA_LOCKED ((void *)1) |
45 | 45 | ||
46 | #ifndef HAVE_ARCH_PRIVCMD_MMAP | 46 | static int privcmd_vma_range_is_mapped( |
47 | static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); | 47 | struct vm_area_struct *vma, |
48 | #endif | 48 | unsigned long addr, |
49 | unsigned long nr_pages); | ||
49 | 50 | ||
50 | static long privcmd_ioctl_hypercall(void __user *udata) | 51 | static long privcmd_ioctl_hypercall(void __user *udata) |
51 | { | 52 | { |
@@ -225,9 +226,9 @@ static long privcmd_ioctl_mmap(void __user *udata) | |||
225 | vma = find_vma(mm, msg->va); | 226 | vma = find_vma(mm, msg->va); |
226 | rc = -EINVAL; | 227 | rc = -EINVAL; |
227 | 228 | ||
228 | if (!vma || (msg->va != vma->vm_start) || | 229 | if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data) |
229 | !privcmd_enforce_singleshot_mapping(vma)) | ||
230 | goto out_up; | 230 | goto out_up; |
231 | vma->vm_private_data = PRIV_VMA_LOCKED; | ||
231 | } | 232 | } |
232 | 233 | ||
233 | state.va = vma->vm_start; | 234 | state.va = vma->vm_start; |
@@ -358,7 +359,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) | |||
358 | kfree(pages); | 359 | kfree(pages); |
359 | return -ENOMEM; | 360 | return -ENOMEM; |
360 | } | 361 | } |
361 | BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); | 362 | BUG_ON(vma->vm_private_data != NULL); |
362 | vma->vm_private_data = pages; | 363 | vma->vm_private_data = pages; |
363 | 364 | ||
364 | return 0; | 365 | return 0; |
@@ -421,19 +422,43 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) | |||
421 | 422 | ||
422 | vma = find_vma(mm, m.addr); | 423 | vma = find_vma(mm, m.addr); |
423 | if (!vma || | 424 | if (!vma || |
424 | vma->vm_ops != &privcmd_vm_ops || | 425 | vma->vm_ops != &privcmd_vm_ops) { |
425 | (m.addr != vma->vm_start) || | ||
426 | ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || | ||
427 | !privcmd_enforce_singleshot_mapping(vma)) { | ||
428 | up_write(&mm->mmap_sem); | ||
429 | ret = -EINVAL; | 426 | ret = -EINVAL; |
430 | goto out; | 427 | goto out_unlock; |
431 | } | 428 | } |
432 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | 429 | |
433 | ret = alloc_empty_pages(vma, m.num); | 430 | /* |
434 | if (ret < 0) { | 431 | * Caller must either: |
435 | up_write(&mm->mmap_sem); | 432 | * |
436 | goto out; | 433 | * Map the whole VMA range, which will also allocate all the |
434 | * pages required for the auto_translated_physmap case. | ||
435 | * | ||
436 | * Or | ||
437 | * | ||
438 | * Map unmapped holes left from a previous map attempt (e.g., | ||
439 | * because those foreign frames were previously paged out). | ||
440 | */ | ||
441 | if (vma->vm_private_data == NULL) { | ||
442 | if (m.addr != vma->vm_start || | ||
443 | m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) { | ||
444 | ret = -EINVAL; | ||
445 | goto out_unlock; | ||
446 | } | ||
447 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
448 | ret = alloc_empty_pages(vma, m.num); | ||
449 | if (ret < 0) | ||
450 | goto out_unlock; | ||
451 | } else | ||
452 | vma->vm_private_data = PRIV_VMA_LOCKED; | ||
453 | } else { | ||
454 | if (m.addr < vma->vm_start || | ||
455 | m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) { | ||
456 | ret = -EINVAL; | ||
457 | goto out_unlock; | ||
458 | } | ||
459 | if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) { | ||
460 | ret = -EINVAL; | ||
461 | goto out_unlock; | ||
437 | } | 462 | } |
438 | } | 463 | } |
439 | 464 | ||
@@ -466,8 +491,11 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) | |||
466 | 491 | ||
467 | out: | 492 | out: |
468 | free_page_list(&pagelist); | 493 | free_page_list(&pagelist); |
469 | |||
470 | return ret; | 494 | return ret; |
495 | |||
496 | out_unlock: | ||
497 | up_write(&mm->mmap_sem); | ||
498 | goto out; | ||
471 | } | 499 | } |
472 | 500 | ||
473 | static long privcmd_ioctl(struct file *file, | 501 | static long privcmd_ioctl(struct file *file, |
@@ -540,9 +568,24 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) | |||
540 | return 0; | 568 | return 0; |
541 | } | 569 | } |
542 | 570 | ||
543 | static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) | 571 | /* |
572 | * For MMAPBATCH*. This allows asserting the singleshot mapping | ||
573 | * on a per pfn/pte basis. Mapping calls that fail with ENOENT | ||
574 | * can be then retried until success. | ||
575 | */ | ||
576 | static int is_mapped_fn(pte_t *pte, struct page *pmd_page, | ||
577 | unsigned long addr, void *data) | ||
578 | { | ||
579 | return pte_none(*pte) ? 0 : -EBUSY; | ||
580 | } | ||
581 | |||
582 | static int privcmd_vma_range_is_mapped( | ||
583 | struct vm_area_struct *vma, | ||
584 | unsigned long addr, | ||
585 | unsigned long nr_pages) | ||
544 | { | 586 | { |
545 | return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); | 587 | return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT, |
588 | is_mapped_fn, NULL) != 0; | ||
546 | } | 589 | } |
547 | 590 | ||
548 | const struct file_operations xen_privcmd_fops = { | 591 | const struct file_operations xen_privcmd_fops = { |
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index aadffcf7db9b..1b2277c311d2 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c | |||
@@ -506,13 +506,13 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, | |||
506 | to do proper error handling. */ | 506 | to do proper error handling. */ |
507 | xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, | 507 | xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, |
508 | attrs); | 508 | attrs); |
509 | sgl[0].dma_length = 0; | 509 | sg_dma_len(sgl) = 0; |
510 | return DMA_ERROR_CODE; | 510 | return DMA_ERROR_CODE; |
511 | } | 511 | } |
512 | sg->dma_address = xen_phys_to_bus(map); | 512 | sg->dma_address = xen_phys_to_bus(map); |
513 | } else | 513 | } else |
514 | sg->dma_address = dev_addr; | 514 | sg->dma_address = dev_addr; |
515 | sg->dma_length = sg->length; | 515 | sg_dma_len(sg) = sg->length; |
516 | } | 516 | } |
517 | return nelems; | 517 | return nelems; |
518 | } | 518 | } |
@@ -533,7 +533,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, | |||
533 | BUG_ON(dir == DMA_NONE); | 533 | BUG_ON(dir == DMA_NONE); |
534 | 534 | ||
535 | for_each_sg(sgl, sg, nelems, i) | 535 | for_each_sg(sgl, sg, nelems, i) |
536 | xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); | 536 | xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir); |
537 | 537 | ||
538 | } | 538 | } |
539 | EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); | 539 | EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); |
@@ -555,7 +555,7 @@ xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, | |||
555 | 555 | ||
556 | for_each_sg(sgl, sg, nelems, i) | 556 | for_each_sg(sgl, sg, nelems, i) |
557 | xen_swiotlb_sync_single(hwdev, sg->dma_address, | 557 | xen_swiotlb_sync_single(hwdev, sg->dma_address, |
558 | sg->dma_length, dir, target); | 558 | sg_dma_len(sg), dir, target); |
559 | } | 559 | } |
560 | 560 | ||
561 | void | 561 | void |
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 02817a85f877..21e18c18c7a1 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c | |||
@@ -265,8 +265,10 @@ static ssize_t store_selfballooning(struct device *dev, | |||
265 | if (!capable(CAP_SYS_ADMIN)) | 265 | if (!capable(CAP_SYS_ADMIN)) |
266 | return -EPERM; | 266 | return -EPERM; |
267 | 267 | ||
268 | err = strict_strtoul(buf, 10, &tmp); | 268 | err = kstrtoul(buf, 10, &tmp); |
269 | if (err || ((tmp != 0) && (tmp != 1))) | 269 | if (err) |
270 | return err; | ||
271 | if ((tmp != 0) && (tmp != 1)) | ||
270 | return -EINVAL; | 272 | return -EINVAL; |
271 | 273 | ||
272 | xen_selfballooning_enabled = !!tmp; | 274 | xen_selfballooning_enabled = !!tmp; |
@@ -292,8 +294,10 @@ static ssize_t store_selfballoon_interval(struct device *dev, | |||
292 | 294 | ||
293 | if (!capable(CAP_SYS_ADMIN)) | 295 | if (!capable(CAP_SYS_ADMIN)) |
294 | return -EPERM; | 296 | return -EPERM; |
295 | err = strict_strtoul(buf, 10, &val); | 297 | err = kstrtoul(buf, 10, &val); |
296 | if (err || val == 0) | 298 | if (err) |
299 | return err; | ||
300 | if (val == 0) | ||
297 | return -EINVAL; | 301 | return -EINVAL; |
298 | selfballoon_interval = val; | 302 | selfballoon_interval = val; |
299 | return count; | 303 | return count; |
@@ -314,8 +318,10 @@ static ssize_t store_selfballoon_downhys(struct device *dev, | |||
314 | 318 | ||
315 | if (!capable(CAP_SYS_ADMIN)) | 319 | if (!capable(CAP_SYS_ADMIN)) |
316 | return -EPERM; | 320 | return -EPERM; |
317 | err = strict_strtoul(buf, 10, &val); | 321 | err = kstrtoul(buf, 10, &val); |
318 | if (err || val == 0) | 322 | if (err) |
323 | return err; | ||
324 | if (val == 0) | ||
319 | return -EINVAL; | 325 | return -EINVAL; |
320 | selfballoon_downhysteresis = val; | 326 | selfballoon_downhysteresis = val; |
321 | return count; | 327 | return count; |
@@ -337,8 +343,10 @@ static ssize_t store_selfballoon_uphys(struct device *dev, | |||
337 | 343 | ||
338 | if (!capable(CAP_SYS_ADMIN)) | 344 | if (!capable(CAP_SYS_ADMIN)) |
339 | return -EPERM; | 345 | return -EPERM; |
340 | err = strict_strtoul(buf, 10, &val); | 346 | err = kstrtoul(buf, 10, &val); |
341 | if (err || val == 0) | 347 | if (err) |
348 | return err; | ||
349 | if (val == 0) | ||
342 | return -EINVAL; | 350 | return -EINVAL; |
343 | selfballoon_uphysteresis = val; | 351 | selfballoon_uphysteresis = val; |
344 | return count; | 352 | return count; |
@@ -360,8 +368,10 @@ static ssize_t store_selfballoon_min_usable_mb(struct device *dev, | |||
360 | 368 | ||
361 | if (!capable(CAP_SYS_ADMIN)) | 369 | if (!capable(CAP_SYS_ADMIN)) |
362 | return -EPERM; | 370 | return -EPERM; |
363 | err = strict_strtoul(buf, 10, &val); | 371 | err = kstrtoul(buf, 10, &val); |
364 | if (err || val == 0) | 372 | if (err) |
373 | return err; | ||
374 | if (val == 0) | ||
365 | return -EINVAL; | 375 | return -EINVAL; |
366 | selfballoon_min_usable_mb = val; | 376 | selfballoon_min_usable_mb = val; |
367 | return count; | 377 | return count; |
@@ -384,8 +394,10 @@ static ssize_t store_selfballoon_reserved_mb(struct device *dev, | |||
384 | 394 | ||
385 | if (!capable(CAP_SYS_ADMIN)) | 395 | if (!capable(CAP_SYS_ADMIN)) |
386 | return -EPERM; | 396 | return -EPERM; |
387 | err = strict_strtoul(buf, 10, &val); | 397 | err = kstrtoul(buf, 10, &val); |
388 | if (err || val == 0) | 398 | if (err) |
399 | return err; | ||
400 | if (val == 0) | ||
389 | return -EINVAL; | 401 | return -EINVAL; |
390 | selfballoon_reserved_mb = val; | 402 | selfballoon_reserved_mb = val; |
391 | return count; | 403 | return count; |
@@ -410,8 +422,10 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev, | |||
410 | 422 | ||
411 | if (!capable(CAP_SYS_ADMIN)) | 423 | if (!capable(CAP_SYS_ADMIN)) |
412 | return -EPERM; | 424 | return -EPERM; |
413 | err = strict_strtoul(buf, 10, &tmp); | 425 | err = kstrtoul(buf, 10, &tmp); |
414 | if (err || ((tmp != 0) && (tmp != 1))) | 426 | if (err) |
427 | return err; | ||
428 | if ((tmp != 0) && (tmp != 1)) | ||
415 | return -EINVAL; | 429 | return -EINVAL; |
416 | frontswap_selfshrinking = !!tmp; | 430 | frontswap_selfshrinking = !!tmp; |
417 | if (!was_enabled && !xen_selfballooning_enabled && | 431 | if (!was_enabled && !xen_selfballooning_enabled && |
@@ -437,8 +451,10 @@ static ssize_t store_frontswap_inertia(struct device *dev, | |||
437 | 451 | ||
438 | if (!capable(CAP_SYS_ADMIN)) | 452 | if (!capable(CAP_SYS_ADMIN)) |
439 | return -EPERM; | 453 | return -EPERM; |
440 | err = strict_strtoul(buf, 10, &val); | 454 | err = kstrtoul(buf, 10, &val); |
441 | if (err || val == 0) | 455 | if (err) |
456 | return err; | ||
457 | if (val == 0) | ||
442 | return -EINVAL; | 458 | return -EINVAL; |
443 | frontswap_inertia = val; | 459 | frontswap_inertia = val; |
444 | frontswap_inertia_counter = val; | 460 | frontswap_inertia_counter = val; |
@@ -460,8 +476,10 @@ static ssize_t store_frontswap_hysteresis(struct device *dev, | |||
460 | 476 | ||
461 | if (!capable(CAP_SYS_ADMIN)) | 477 | if (!capable(CAP_SYS_ADMIN)) |
462 | return -EPERM; | 478 | return -EPERM; |
463 | err = strict_strtoul(buf, 10, &val); | 479 | err = kstrtoul(buf, 10, &val); |
464 | if (err || val == 0) | 480 | if (err) |
481 | return err; | ||
482 | if (val == 0) | ||
465 | return -EINVAL; | 483 | return -EINVAL; |
466 | frontswap_hysteresis = val; | 484 | frontswap_hysteresis = val; |
467 | return count; | 485 | return count; |
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0976fc46d1e0..a5079072da66 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h | |||
@@ -48,7 +48,6 @@ | |||
48 | 48 | ||
49 | #include <linux/types.h> | 49 | #include <linux/types.h> |
50 | #include <linux/compiler.h> | 50 | #include <linux/compiler.h> |
51 | #include <linux/workqueue.h> | ||
52 | 51 | ||
53 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) | 52 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) |
54 | 53 | ||
@@ -61,12 +60,6 @@ struct static_key { | |||
61 | #endif | 60 | #endif |
62 | }; | 61 | }; |
63 | 62 | ||
64 | struct static_key_deferred { | ||
65 | struct static_key key; | ||
66 | unsigned long timeout; | ||
67 | struct delayed_work work; | ||
68 | }; | ||
69 | |||
70 | # include <asm/jump_label.h> | 63 | # include <asm/jump_label.h> |
71 | # define HAVE_JUMP_LABEL | 64 | # define HAVE_JUMP_LABEL |
72 | #endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ | 65 | #endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ |
@@ -78,6 +71,7 @@ enum jump_label_type { | |||
78 | 71 | ||
79 | struct module; | 72 | struct module; |
80 | 73 | ||
74 | #include <linux/atomic.h> | ||
81 | #ifdef HAVE_JUMP_LABEL | 75 | #ifdef HAVE_JUMP_LABEL |
82 | 76 | ||
83 | #define JUMP_LABEL_TRUE_BRANCH 1UL | 77 | #define JUMP_LABEL_TRUE_BRANCH 1UL |
@@ -119,10 +113,7 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry, | |||
119 | extern int jump_label_text_reserved(void *start, void *end); | 113 | extern int jump_label_text_reserved(void *start, void *end); |
120 | extern void static_key_slow_inc(struct static_key *key); | 114 | extern void static_key_slow_inc(struct static_key *key); |
121 | extern void static_key_slow_dec(struct static_key *key); | 115 | extern void static_key_slow_dec(struct static_key *key); |
122 | extern void static_key_slow_dec_deferred(struct static_key_deferred *key); | ||
123 | extern void jump_label_apply_nops(struct module *mod); | 116 | extern void jump_label_apply_nops(struct module *mod); |
124 | extern void | ||
125 | jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); | ||
126 | 117 | ||
127 | #define STATIC_KEY_INIT_TRUE ((struct static_key) \ | 118 | #define STATIC_KEY_INIT_TRUE ((struct static_key) \ |
128 | { .enabled = ATOMIC_INIT(1), .entries = (void *)1 }) | 119 | { .enabled = ATOMIC_INIT(1), .entries = (void *)1 }) |
@@ -131,8 +122,6 @@ jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); | |||
131 | 122 | ||
132 | #else /* !HAVE_JUMP_LABEL */ | 123 | #else /* !HAVE_JUMP_LABEL */ |
133 | 124 | ||
134 | #include <linux/atomic.h> | ||
135 | |||
136 | struct static_key { | 125 | struct static_key { |
137 | atomic_t enabled; | 126 | atomic_t enabled; |
138 | }; | 127 | }; |
@@ -141,10 +130,6 @@ static __always_inline void jump_label_init(void) | |||
141 | { | 130 | { |
142 | } | 131 | } |
143 | 132 | ||
144 | struct static_key_deferred { | ||
145 | struct static_key key; | ||
146 | }; | ||
147 | |||
148 | static __always_inline bool static_key_false(struct static_key *key) | 133 | static __always_inline bool static_key_false(struct static_key *key) |
149 | { | 134 | { |
150 | if (unlikely(atomic_read(&key->enabled)) > 0) | 135 | if (unlikely(atomic_read(&key->enabled)) > 0) |
@@ -169,11 +154,6 @@ static inline void static_key_slow_dec(struct static_key *key) | |||
169 | atomic_dec(&key->enabled); | 154 | atomic_dec(&key->enabled); |
170 | } | 155 | } |
171 | 156 | ||
172 | static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) | ||
173 | { | ||
174 | static_key_slow_dec(&key->key); | ||
175 | } | ||
176 | |||
177 | static inline int jump_label_text_reserved(void *start, void *end) | 157 | static inline int jump_label_text_reserved(void *start, void *end) |
178 | { | 158 | { |
179 | return 0; | 159 | return 0; |
@@ -187,12 +167,6 @@ static inline int jump_label_apply_nops(struct module *mod) | |||
187 | return 0; | 167 | return 0; |
188 | } | 168 | } |
189 | 169 | ||
190 | static inline void | ||
191 | jump_label_rate_limit(struct static_key_deferred *key, | ||
192 | unsigned long rl) | ||
193 | { | ||
194 | } | ||
195 | |||
196 | #define STATIC_KEY_INIT_TRUE ((struct static_key) \ | 170 | #define STATIC_KEY_INIT_TRUE ((struct static_key) \ |
197 | { .enabled = ATOMIC_INIT(1) }) | 171 | { .enabled = ATOMIC_INIT(1) }) |
198 | #define STATIC_KEY_INIT_FALSE ((struct static_key) \ | 172 | #define STATIC_KEY_INIT_FALSE ((struct static_key) \ |
diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h new file mode 100644 index 000000000000..113788389b3d --- /dev/null +++ b/include/linux/jump_label_ratelimit.h | |||
@@ -0,0 +1,34 @@ | |||
1 | #ifndef _LINUX_JUMP_LABEL_RATELIMIT_H | ||
2 | #define _LINUX_JUMP_LABEL_RATELIMIT_H | ||
3 | |||
4 | #include <linux/jump_label.h> | ||
5 | #include <linux/workqueue.h> | ||
6 | |||
7 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) | ||
8 | struct static_key_deferred { | ||
9 | struct static_key key; | ||
10 | unsigned long timeout; | ||
11 | struct delayed_work work; | ||
12 | }; | ||
13 | #endif | ||
14 | |||
15 | #ifdef HAVE_JUMP_LABEL | ||
16 | extern void static_key_slow_dec_deferred(struct static_key_deferred *key); | ||
17 | extern void | ||
18 | jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); | ||
19 | |||
20 | #else /* !HAVE_JUMP_LABEL */ | ||
21 | struct static_key_deferred { | ||
22 | struct static_key key; | ||
23 | }; | ||
24 | static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) | ||
25 | { | ||
26 | static_key_slow_dec(&key->key); | ||
27 | } | ||
28 | static inline void | ||
29 | jump_label_rate_limit(struct static_key_deferred *key, | ||
30 | unsigned long rl) | ||
31 | { | ||
32 | } | ||
33 | #endif /* HAVE_JUMP_LABEL */ | ||
34 | #endif /* _LINUX_JUMP_LABEL_RATELIMIT_H */ | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c43f6eabad5b..226be8da3f85 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -48,6 +48,7 @@ struct perf_guest_info_callbacks { | |||
48 | #include <linux/cpu.h> | 48 | #include <linux/cpu.h> |
49 | #include <linux/irq_work.h> | 49 | #include <linux/irq_work.h> |
50 | #include <linux/static_key.h> | 50 | #include <linux/static_key.h> |
51 | #include <linux/jump_label_ratelimit.h> | ||
51 | #include <linux/atomic.h> | 52 | #include <linux/atomic.h> |
52 | #include <linux/sysfs.h> | 53 | #include <linux/sysfs.h> |
53 | #include <linux/perf_regs.h> | 54 | #include <linux/perf_regs.h> |
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index cea2c5c72d26..2841f86eae0b 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #define KVM_HC_MMU_OP 2 | 19 | #define KVM_HC_MMU_OP 2 |
20 | #define KVM_HC_FEATURES 3 | 20 | #define KVM_HC_FEATURES 3 |
21 | #define KVM_HC_PPC_MAP_MAGIC_PAGE 4 | 21 | #define KVM_HC_PPC_MAP_MAGIC_PAGE 4 |
22 | #define KVM_HC_KICK_CPU 5 | ||
22 | 23 | ||
23 | /* | 24 | /* |
24 | * hypercalls use architecture specific | 25 | * hypercalls use architecture specific |
diff --git a/include/xen/balloon.h b/include/xen/balloon.h index cc2e1a7e44ec..a4c1c6a93691 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h | |||
@@ -29,6 +29,9 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages, | |||
29 | bool highmem); | 29 | bool highmem); |
30 | void free_xenballooned_pages(int nr_pages, struct page **pages); | 30 | void free_xenballooned_pages(int nr_pages, struct page **pages); |
31 | 31 | ||
32 | struct page *get_balloon_scratch_page(void); | ||
33 | void put_balloon_scratch_page(void); | ||
34 | |||
32 | struct device; | 35 | struct device; |
33 | #ifdef CONFIG_XEN_SELFBALLOONING | 36 | #ifdef CONFIG_XEN_SELFBALLOONING |
34 | extern int register_xen_selfballooning(struct device *dev); | 37 | extern int register_xen_selfballooning(struct device *dev); |
diff --git a/include/xen/interface/io/tpmif.h b/include/xen/interface/io/tpmif.h new file mode 100644 index 000000000000..28e7dcd75e82 --- /dev/null +++ b/include/xen/interface/io/tpmif.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /****************************************************************************** | ||
2 | * tpmif.h | ||
3 | * | ||
4 | * TPM I/O interface for Xen guest OSes, v2 | ||
5 | * | ||
6 | * This file is in the public domain. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #ifndef __XEN_PUBLIC_IO_TPMIF_H__ | ||
11 | #define __XEN_PUBLIC_IO_TPMIF_H__ | ||
12 | |||
13 | /* | ||
14 | * Xenbus state machine | ||
15 | * | ||
16 | * Device open: | ||
17 | * 1. Both ends start in XenbusStateInitialising | ||
18 | * 2. Backend transitions to InitWait (frontend does not wait on this step) | ||
19 | * 3. Frontend populates ring-ref, event-channel, feature-protocol-v2 | ||
20 | * 4. Frontend transitions to Initialised | ||
21 | * 5. Backend maps grant and event channel, verifies feature-protocol-v2 | ||
22 | * 6. Backend transitions to Connected | ||
23 | * 7. Frontend verifies feature-protocol-v2, transitions to Connected | ||
24 | * | ||
25 | * Device close: | ||
26 | * 1. State is changed to XenbusStateClosing | ||
27 | * 2. Frontend transitions to Closed | ||
28 | * 3. Backend unmaps grant and event, changes state to InitWait | ||
29 | */ | ||
30 | |||
31 | enum vtpm_shared_page_state { | ||
32 | VTPM_STATE_IDLE, /* no contents / vTPM idle / cancel complete */ | ||
33 | VTPM_STATE_SUBMIT, /* request ready / vTPM working */ | ||
34 | VTPM_STATE_FINISH, /* response ready / vTPM idle */ | ||
35 | VTPM_STATE_CANCEL, /* cancel requested / vTPM working */ | ||
36 | }; | ||
37 | /* The backend should only change state to IDLE or FINISH, while the | ||
38 | * frontend should only change to SUBMIT or CANCEL. */ | ||
39 | |||
40 | |||
41 | struct vtpm_shared_page { | ||
42 | uint32_t length; /* request/response length in bytes */ | ||
43 | |||
44 | uint8_t state; /* enum vtpm_shared_page_state */ | ||
45 | uint8_t locality; /* for the current request */ | ||
46 | uint8_t pad; | ||
47 | |||
48 | uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */ | ||
49 | uint32_t extra_pages[0]; /* grant IDs; length in nr_extra_pages */ | ||
50 | }; | ||
51 | |||
52 | #endif | ||
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index 87e6f8a48661..b05288ce3991 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h | |||
@@ -170,4 +170,6 @@ struct vcpu_register_vcpu_info { | |||
170 | }; | 170 | }; |
171 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); | 171 | DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); |
172 | 172 | ||
173 | /* Send an NMI to the specified VCPU. @extra_arg == NULL. */ | ||
174 | #define VCPUOP_send_nmi 11 | ||
173 | #endif /* __XEN_PUBLIC_VCPU_H__ */ | 175 | #endif /* __XEN_PUBLIC_VCPU_H__ */ |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 60f48fa0fd0d..297a9247a3b3 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/sort.h> | 13 | #include <linux/sort.h> |
14 | #include <linux/err.h> | 14 | #include <linux/err.h> |
15 | #include <linux/static_key.h> | 15 | #include <linux/static_key.h> |
16 | #include <linux/jump_label_ratelimit.h> | ||
16 | 17 | ||
17 | #ifdef HAVE_JUMP_LABEL | 18 | #ifdef HAVE_JUMP_LABEL |
18 | 19 | ||
diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d23762e6652c..4e8686c7e5a4 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c | |||
@@ -870,13 +870,13 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, | |||
870 | swiotlb_full(hwdev, sg->length, dir, 0); | 870 | swiotlb_full(hwdev, sg->length, dir, 0); |
871 | swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, | 871 | swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, |
872 | attrs); | 872 | attrs); |
873 | sgl[0].dma_length = 0; | 873 | sg_dma_len(sgl) = 0; |
874 | return 0; | 874 | return 0; |
875 | } | 875 | } |
876 | sg->dma_address = phys_to_dma(hwdev, map); | 876 | sg->dma_address = phys_to_dma(hwdev, map); |
877 | } else | 877 | } else |
878 | sg->dma_address = dev_addr; | 878 | sg->dma_address = dev_addr; |
879 | sg->dma_length = sg->length; | 879 | sg_dma_len(sg) = sg->length; |
880 | } | 880 | } |
881 | return nelems; | 881 | return nelems; |
882 | } | 882 | } |
@@ -904,7 +904,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, | |||
904 | BUG_ON(dir == DMA_NONE); | 904 | BUG_ON(dir == DMA_NONE); |
905 | 905 | ||
906 | for_each_sg(sgl, sg, nelems, i) | 906 | for_each_sg(sgl, sg, nelems, i) |
907 | unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); | 907 | unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir); |
908 | 908 | ||
909 | } | 909 | } |
910 | EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); | 910 | EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); |
@@ -934,7 +934,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, | |||
934 | 934 | ||
935 | for_each_sg(sgl, sg, nelems, i) | 935 | for_each_sg(sgl, sg, nelems, i) |
936 | swiotlb_sync_single(hwdev, sg->dma_address, | 936 | swiotlb_sync_single(hwdev, sg->dma_address, |
937 | sg->dma_length, dir, target); | 937 | sg_dma_len(sg), dir, target); |
938 | } | 938 | } |
939 | 939 | ||
940 | void | 940 | void |