aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kvm')
-rw-r--r--arch/arm/kvm/Kconfig72
-rw-r--r--arch/arm/kvm/Makefile23
-rw-r--r--arch/arm/kvm/arch_timer.c271
-rw-r--r--arch/arm/kvm/arm.c984
-rw-r--r--arch/arm/kvm/coproc.c1050
-rw-r--r--arch/arm/kvm/coproc.h153
-rw-r--r--arch/arm/kvm/coproc_a15.c162
-rw-r--r--arch/arm/kvm/emulate.c402
-rw-r--r--arch/arm/kvm/guest.c239
-rw-r--r--arch/arm/kvm/handle_exit.c164
-rw-r--r--arch/arm/kvm/init.S114
-rw-r--r--arch/arm/kvm/interrupts.S487
-rw-r--r--arch/arm/kvm/interrupts_head.S605
-rw-r--r--arch/arm/kvm/mmio.c146
-rw-r--r--arch/arm/kvm/mmu.c774
-rw-r--r--arch/arm/kvm/psci.c108
-rw-r--r--arch/arm/kvm/reset.c74
-rw-r--r--arch/arm/kvm/trace.h235
-rw-r--r--arch/arm/kvm/vgic.c1499
19 files changed, 7562 insertions, 0 deletions
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
new file mode 100644
index 000000000000..49dd64e579c2
--- /dev/null
+++ b/arch/arm/kvm/Kconfig
@@ -0,0 +1,72 @@
1#
2# KVM configuration
3#
4
5source "virt/kvm/Kconfig"
6
7menuconfig VIRTUALIZATION
8 bool "Virtualization"
9 ---help---
10 Say Y here to get to see options for using your Linux host to run
11 other operating systems inside virtual machines (guests).
12 This option alone does not add any kernel code.
13
14 If you say N, all options in this submenu will be skipped and
15 disabled.
16
17if VIRTUALIZATION
18
19config KVM
20 bool "Kernel-based Virtual Machine (KVM) support"
21 select PREEMPT_NOTIFIERS
22 select ANON_INODES
23 select KVM_MMIO
24 select KVM_ARM_HOST
25 depends on ARM_VIRT_EXT && ARM_LPAE
26 ---help---
27 Support hosting virtualized guest machines. You will also
28 need to select one or more of the processor modules below.
29
30 This module provides access to the hardware capabilities through
31 a character device node named /dev/kvm.
32
33 If unsure, say N.
34
35config KVM_ARM_HOST
36 bool "KVM host support for ARM cpus."
37 depends on KVM
38 depends on MMU
39 select MMU_NOTIFIER
40 ---help---
41 Provides host support for ARM processors.
42
43config KVM_ARM_MAX_VCPUS
44 int "Number maximum supported virtual CPUs per VM"
45 depends on KVM_ARM_HOST
46 default 4
47 help
48 Static number of max supported virtual CPUs per VM.
49
50 If you choose a high number, the vcpu structures will be quite
51 large, so only choose a reasonable number that you expect to
52 actually use.
53
54config KVM_ARM_VGIC
55 bool "KVM support for Virtual GIC"
56 depends on KVM_ARM_HOST && OF
57 select HAVE_KVM_IRQCHIP
58 default y
59 ---help---
60 Adds support for a hardware assisted, in-kernel GIC emulation.
61
62config KVM_ARM_TIMER
63 bool "KVM support for Architected Timers"
64 depends on KVM_ARM_VGIC && ARM_ARCH_TIMER
65 select HAVE_KVM_IRQCHIP
66 default y
67 ---help---
68 Adds support for the Architected Timers in virtual machines
69
70source drivers/virtio/Kconfig
71
72endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
new file mode 100644
index 000000000000..8dc5e76cb789
--- /dev/null
+++ b/arch/arm/kvm/Makefile
@@ -0,0 +1,23 @@
1#
2# Makefile for Kernel-based Virtual Machine module
3#
4
5plus_virt := $(call as-instr,.arch_extension virt,+virt)
6ifeq ($(plus_virt),+virt)
7 plus_virt_def := -DREQUIRES_VIRT=1
8endif
9
10ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
11CFLAGS_arm.o := -I. $(plus_virt_def)
12CFLAGS_mmu.o := -I.
13
14AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
15AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
16
17kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
18
19obj-y += kvm-arm.o init.o interrupts.o
20obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
21obj-y += coproc.o coproc_a15.o mmio.o psci.o
22obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
23obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c
new file mode 100644
index 000000000000..6ac938d46297
--- /dev/null
+++ b/arch/arm/kvm/arch_timer.c
@@ -0,0 +1,271 @@
1/*
2 * Copyright (C) 2012 ARM Ltd.
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <linux/cpu.h>
20#include <linux/of_irq.h>
21#include <linux/kvm.h>
22#include <linux/kvm_host.h>
23#include <linux/interrupt.h>
24
25#include <asm/arch_timer.h>
26
27#include <asm/kvm_vgic.h>
28#include <asm/kvm_arch_timer.h>
29
30static struct timecounter *timecounter;
31static struct workqueue_struct *wqueue;
32static struct kvm_irq_level timer_irq = {
33 .level = 1,
34};
35
36static cycle_t kvm_phys_timer_read(void)
37{
38 return timecounter->cc->read(timecounter->cc);
39}
40
41static bool timer_is_armed(struct arch_timer_cpu *timer)
42{
43 return timer->armed;
44}
45
46/* timer_arm: as in "arm the timer", not as in ARM the company */
47static void timer_arm(struct arch_timer_cpu *timer, u64 ns)
48{
49 timer->armed = true;
50 hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
51 HRTIMER_MODE_ABS);
52}
53
54static void timer_disarm(struct arch_timer_cpu *timer)
55{
56 if (timer_is_armed(timer)) {
57 hrtimer_cancel(&timer->timer);
58 cancel_work_sync(&timer->expired);
59 timer->armed = false;
60 }
61}
62
63static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
64{
65 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
66
67 timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */
68 kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
69 vcpu->arch.timer_cpu.irq->irq,
70 vcpu->arch.timer_cpu.irq->level);
71}
72
73static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
74{
75 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
76
77 /*
78 * We disable the timer in the world switch and let it be
79 * handled by kvm_timer_sync_hwstate(). Getting a timer
80 * interrupt at this point is a sure sign of some major
81 * breakage.
82 */
83 pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
84 return IRQ_HANDLED;
85}
86
87static void kvm_timer_inject_irq_work(struct work_struct *work)
88{
89 struct kvm_vcpu *vcpu;
90
91 vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
92 vcpu->arch.timer_cpu.armed = false;
93 kvm_timer_inject_irq(vcpu);
94}
95
96static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
97{
98 struct arch_timer_cpu *timer;
99 timer = container_of(hrt, struct arch_timer_cpu, timer);
100 queue_work(wqueue, &timer->expired);
101 return HRTIMER_NORESTART;
102}
103
104/**
105 * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
106 * @vcpu: The vcpu pointer
107 *
108 * Disarm any pending soft timers, since the world-switch code will write the
109 * virtual timer state back to the physical CPU.
110 */
111void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
112{
113 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
114
115 /*
116 * We're about to run this vcpu again, so there is no need to
117 * keep the background timer running, as we're about to
118 * populate the CPU timer again.
119 */
120 timer_disarm(timer);
121}
122
123/**
124 * kvm_timer_sync_hwstate - sync timer state from cpu
125 * @vcpu: The vcpu pointer
126 *
127 * Check if the virtual timer was armed and either schedule a corresponding
128 * soft timer or inject directly if already expired.
129 */
130void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
131{
132 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
133 cycle_t cval, now;
134 u64 ns;
135
136 /* Check if the timer is enabled and unmasked first */
137 if ((timer->cntv_ctl & 3) != 1)
138 return;
139
140 cval = timer->cntv_cval;
141 now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
142
143 BUG_ON(timer_is_armed(timer));
144
145 if (cval <= now) {
146 /*
147 * Timer has already expired while we were not
148 * looking. Inject the interrupt and carry on.
149 */
150 kvm_timer_inject_irq(vcpu);
151 return;
152 }
153
154 ns = cyclecounter_cyc2ns(timecounter->cc, cval - now);
155 timer_arm(timer, ns);
156}
157
158void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
159{
160 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
161
162 INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
163 hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
164 timer->timer.function = kvm_timer_expire;
165 timer->irq = &timer_irq;
166}
167
168static void kvm_timer_init_interrupt(void *info)
169{
170 enable_percpu_irq(timer_irq.irq, 0);
171}
172
173
174static int kvm_timer_cpu_notify(struct notifier_block *self,
175 unsigned long action, void *cpu)
176{
177 switch (action) {
178 case CPU_STARTING:
179 case CPU_STARTING_FROZEN:
180 kvm_timer_init_interrupt(NULL);
181 break;
182 case CPU_DYING:
183 case CPU_DYING_FROZEN:
184 disable_percpu_irq(timer_irq.irq);
185 break;
186 }
187
188 return NOTIFY_OK;
189}
190
191static struct notifier_block kvm_timer_cpu_nb = {
192 .notifier_call = kvm_timer_cpu_notify,
193};
194
195static const struct of_device_id arch_timer_of_match[] = {
196 { .compatible = "arm,armv7-timer", },
197 {},
198};
199
200int kvm_timer_hyp_init(void)
201{
202 struct device_node *np;
203 unsigned int ppi;
204 int err;
205
206 timecounter = arch_timer_get_timecounter();
207 if (!timecounter)
208 return -ENODEV;
209
210 np = of_find_matching_node(NULL, arch_timer_of_match);
211 if (!np) {
212 kvm_err("kvm_arch_timer: can't find DT node\n");
213 return -ENODEV;
214 }
215
216 ppi = irq_of_parse_and_map(np, 2);
217 if (!ppi) {
218 kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
219 err = -EINVAL;
220 goto out;
221 }
222
223 err = request_percpu_irq(ppi, kvm_arch_timer_handler,
224 "kvm guest timer", kvm_get_running_vcpus());
225 if (err) {
226 kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
227 ppi, err);
228 goto out;
229 }
230
231 timer_irq.irq = ppi;
232
233 err = register_cpu_notifier(&kvm_timer_cpu_nb);
234 if (err) {
235 kvm_err("Cannot register timer CPU notifier\n");
236 goto out_free;
237 }
238
239 wqueue = create_singlethread_workqueue("kvm_arch_timer");
240 if (!wqueue) {
241 err = -ENOMEM;
242 goto out_free;
243 }
244
245 kvm_info("%s IRQ%d\n", np->name, ppi);
246 on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
247
248 goto out;
249out_free:
250 free_percpu_irq(ppi, kvm_get_running_vcpus());
251out:
252 of_node_put(np);
253 return err;
254}
255
256void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
257{
258 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
259
260 timer_disarm(timer);
261}
262
263int kvm_timer_init(struct kvm *kvm)
264{
265 if (timecounter && wqueue) {
266 kvm->arch.timer.cntvoff = kvm_phys_timer_read();
267 kvm->arch.timer.enabled = 1;
268 }
269
270 return 0;
271}
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
new file mode 100644
index 000000000000..a0dfc2a53f91
--- /dev/null
+++ b/arch/arm/kvm/arm.c
@@ -0,0 +1,984 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <linux/errno.h>
20#include <linux/err.h>
21#include <linux/kvm_host.h>
22#include <linux/module.h>
23#include <linux/vmalloc.h>
24#include <linux/fs.h>
25#include <linux/mman.h>
26#include <linux/sched.h>
27#include <linux/kvm.h>
28#include <trace/events/kvm.h>
29
30#define CREATE_TRACE_POINTS
31#include "trace.h"
32
33#include <asm/uaccess.h>
34#include <asm/ptrace.h>
35#include <asm/mman.h>
36#include <asm/tlbflush.h>
37#include <asm/cacheflush.h>
38#include <asm/virt.h>
39#include <asm/kvm_arm.h>
40#include <asm/kvm_asm.h>
41#include <asm/kvm_mmu.h>
42#include <asm/kvm_emulate.h>
43#include <asm/kvm_coproc.h>
44#include <asm/kvm_psci.h>
45
46#ifdef REQUIRES_VIRT
47__asm__(".arch_extension virt");
48#endif
49
50static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
51static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state;
52static unsigned long hyp_default_vectors;
53
54/* Per-CPU variable containing the currently running vcpu. */
55static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
56
57/* The VMID used in the VTTBR */
58static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
59static u8 kvm_next_vmid;
60static DEFINE_SPINLOCK(kvm_vmid_lock);
61
62static bool vgic_present;
63
64static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
65{
66 BUG_ON(preemptible());
67 __get_cpu_var(kvm_arm_running_vcpu) = vcpu;
68}
69
70/**
71 * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
72 * Must be called from non-preemptible context
73 */
74struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
75{
76 BUG_ON(preemptible());
77 return __get_cpu_var(kvm_arm_running_vcpu);
78}
79
80/**
81 * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
82 */
83struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
84{
85 return &kvm_arm_running_vcpu;
86}
87
88int kvm_arch_hardware_enable(void *garbage)
89{
90 return 0;
91}
92
93int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
94{
95 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
96}
97
98void kvm_arch_hardware_disable(void *garbage)
99{
100}
101
102int kvm_arch_hardware_setup(void)
103{
104 return 0;
105}
106
107void kvm_arch_hardware_unsetup(void)
108{
109}
110
111void kvm_arch_check_processor_compat(void *rtn)
112{
113 *(int *)rtn = 0;
114}
115
116void kvm_arch_sync_events(struct kvm *kvm)
117{
118}
119
120/**
121 * kvm_arch_init_vm - initializes a VM data structure
122 * @kvm: pointer to the KVM struct
123 */
124int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
125{
126 int ret = 0;
127
128 if (type)
129 return -EINVAL;
130
131 ret = kvm_alloc_stage2_pgd(kvm);
132 if (ret)
133 goto out_fail_alloc;
134
135 ret = create_hyp_mappings(kvm, kvm + 1);
136 if (ret)
137 goto out_free_stage2_pgd;
138
139 /* Mark the initial VMID generation invalid */
140 kvm->arch.vmid_gen = 0;
141
142 return ret;
143out_free_stage2_pgd:
144 kvm_free_stage2_pgd(kvm);
145out_fail_alloc:
146 return ret;
147}
148
149int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
150{
151 return VM_FAULT_SIGBUS;
152}
153
154void kvm_arch_free_memslot(struct kvm_memory_slot *free,
155 struct kvm_memory_slot *dont)
156{
157}
158
159int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
160{
161 return 0;
162}
163
164/**
165 * kvm_arch_destroy_vm - destroy the VM data structure
166 * @kvm: pointer to the KVM struct
167 */
168void kvm_arch_destroy_vm(struct kvm *kvm)
169{
170 int i;
171
172 kvm_free_stage2_pgd(kvm);
173
174 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
175 if (kvm->vcpus[i]) {
176 kvm_arch_vcpu_free(kvm->vcpus[i]);
177 kvm->vcpus[i] = NULL;
178 }
179 }
180}
181
182int kvm_dev_ioctl_check_extension(long ext)
183{
184 int r;
185 switch (ext) {
186 case KVM_CAP_IRQCHIP:
187 r = vgic_present;
188 break;
189 case KVM_CAP_USER_MEMORY:
190 case KVM_CAP_SYNC_MMU:
191 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
192 case KVM_CAP_ONE_REG:
193 case KVM_CAP_ARM_PSCI:
194 r = 1;
195 break;
196 case KVM_CAP_COALESCED_MMIO:
197 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
198 break;
199 case KVM_CAP_ARM_SET_DEVICE_ADDR:
200 r = 1;
201 break;
202 case KVM_CAP_NR_VCPUS:
203 r = num_online_cpus();
204 break;
205 case KVM_CAP_MAX_VCPUS:
206 r = KVM_MAX_VCPUS;
207 break;
208 default:
209 r = 0;
210 break;
211 }
212 return r;
213}
214
215long kvm_arch_dev_ioctl(struct file *filp,
216 unsigned int ioctl, unsigned long arg)
217{
218 return -EINVAL;
219}
220
221int kvm_arch_set_memory_region(struct kvm *kvm,
222 struct kvm_userspace_memory_region *mem,
223 struct kvm_memory_slot old,
224 int user_alloc)
225{
226 return 0;
227}
228
229int kvm_arch_prepare_memory_region(struct kvm *kvm,
230 struct kvm_memory_slot *memslot,
231 struct kvm_memory_slot old,
232 struct kvm_userspace_memory_region *mem,
233 bool user_alloc)
234{
235 return 0;
236}
237
238void kvm_arch_commit_memory_region(struct kvm *kvm,
239 struct kvm_userspace_memory_region *mem,
240 struct kvm_memory_slot old,
241 bool user_alloc)
242{
243}
244
245void kvm_arch_flush_shadow_all(struct kvm *kvm)
246{
247}
248
249void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
250 struct kvm_memory_slot *slot)
251{
252}
253
254struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
255{
256 int err;
257 struct kvm_vcpu *vcpu;
258
259 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
260 if (!vcpu) {
261 err = -ENOMEM;
262 goto out;
263 }
264
265 err = kvm_vcpu_init(vcpu, kvm, id);
266 if (err)
267 goto free_vcpu;
268
269 err = create_hyp_mappings(vcpu, vcpu + 1);
270 if (err)
271 goto vcpu_uninit;
272
273 return vcpu;
274vcpu_uninit:
275 kvm_vcpu_uninit(vcpu);
276free_vcpu:
277 kmem_cache_free(kvm_vcpu_cache, vcpu);
278out:
279 return ERR_PTR(err);
280}
281
282int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
283{
284 return 0;
285}
286
287void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
288{
289 kvm_mmu_free_memory_caches(vcpu);
290 kvm_timer_vcpu_terminate(vcpu);
291 kmem_cache_free(kvm_vcpu_cache, vcpu);
292}
293
294void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
295{
296 kvm_arch_vcpu_free(vcpu);
297}
298
299int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
300{
301 return 0;
302}
303
304int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
305{
306 int ret;
307
308 /* Force users to call KVM_ARM_VCPU_INIT */
309 vcpu->arch.target = -1;
310
311 /* Set up VGIC */
312 ret = kvm_vgic_vcpu_init(vcpu);
313 if (ret)
314 return ret;
315
316 /* Set up the timer */
317 kvm_timer_vcpu_init(vcpu);
318
319 return 0;
320}
321
322void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
323{
324}
325
326void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
327{
328 vcpu->cpu = cpu;
329 vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state);
330
331 /*
332 * Check whether this vcpu requires the cache to be flushed on
333 * this physical CPU. This is a consequence of doing dcache
334 * operations by set/way on this vcpu. We do it here to be in
335 * a non-preemptible section.
336 */
337 if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
338 flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
339
340 kvm_arm_set_running_vcpu(vcpu);
341}
342
343void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
344{
345 kvm_arm_set_running_vcpu(NULL);
346}
347
348int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
349 struct kvm_guest_debug *dbg)
350{
351 return -EINVAL;
352}
353
354
355int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
356 struct kvm_mp_state *mp_state)
357{
358 return -EINVAL;
359}
360
361int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
362 struct kvm_mp_state *mp_state)
363{
364 return -EINVAL;
365}
366
367/**
368 * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
369 * @v: The VCPU pointer
370 *
371 * If the guest CPU is not waiting for interrupts or an interrupt line is
372 * asserted, the CPU is by definition runnable.
373 */
374int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
375{
376 return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v);
377}
378
379/* Just ensure a guest exit from a particular CPU */
380static void exit_vm_noop(void *info)
381{
382}
383
384void force_vm_exit(const cpumask_t *mask)
385{
386 smp_call_function_many(mask, exit_vm_noop, NULL, true);
387}
388
389/**
390 * need_new_vmid_gen - check that the VMID is still valid
391 * @kvm: The VM's VMID to checkt
392 *
393 * return true if there is a new generation of VMIDs being used
394 *
395 * The hardware supports only 256 values with the value zero reserved for the
396 * host, so we check if an assigned value belongs to a previous generation,
397 * which which requires us to assign a new value. If we're the first to use a
398 * VMID for the new generation, we must flush necessary caches and TLBs on all
399 * CPUs.
400 */
401static bool need_new_vmid_gen(struct kvm *kvm)
402{
403 return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
404}
405
406/**
407 * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
408 * @kvm The guest that we are about to run
409 *
410 * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
411 * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
412 * caches and TLBs.
413 */
414static void update_vttbr(struct kvm *kvm)
415{
416 phys_addr_t pgd_phys;
417 u64 vmid;
418
419 if (!need_new_vmid_gen(kvm))
420 return;
421
422 spin_lock(&kvm_vmid_lock);
423
424 /*
425 * We need to re-check the vmid_gen here to ensure that if another vcpu
426 * already allocated a valid vmid for this vm, then this vcpu should
427 * use the same vmid.
428 */
429 if (!need_new_vmid_gen(kvm)) {
430 spin_unlock(&kvm_vmid_lock);
431 return;
432 }
433
434 /* First user of a new VMID generation? */
435 if (unlikely(kvm_next_vmid == 0)) {
436 atomic64_inc(&kvm_vmid_gen);
437 kvm_next_vmid = 1;
438
439 /*
440 * On SMP we know no other CPUs can use this CPU's or each
441 * other's VMID after force_vm_exit returns since the
442 * kvm_vmid_lock blocks them from reentry to the guest.
443 */
444 force_vm_exit(cpu_all_mask);
445 /*
446 * Now broadcast TLB + ICACHE invalidation over the inner
447 * shareable domain to make sure all data structures are
448 * clean.
449 */
450 kvm_call_hyp(__kvm_flush_vm_context);
451 }
452
453 kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
454 kvm->arch.vmid = kvm_next_vmid;
455 kvm_next_vmid++;
456
457 /* update vttbr to be used with the new vmid */
458 pgd_phys = virt_to_phys(kvm->arch.pgd);
459 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
460 kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
461 kvm->arch.vttbr |= vmid;
462
463 spin_unlock(&kvm_vmid_lock);
464}
465
466static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
467{
468 if (likely(vcpu->arch.has_run_once))
469 return 0;
470
471 vcpu->arch.has_run_once = true;
472
473 /*
474 * Initialize the VGIC before running a vcpu the first time on
475 * this VM.
476 */
477 if (irqchip_in_kernel(vcpu->kvm) &&
478 unlikely(!vgic_initialized(vcpu->kvm))) {
479 int ret = kvm_vgic_init(vcpu->kvm);
480 if (ret)
481 return ret;
482 }
483
484 /*
485 * Handle the "start in power-off" case by calling into the
486 * PSCI code.
487 */
488 if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
489 *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
490 kvm_psci_call(vcpu);
491 }
492
493 return 0;
494}
495
496static void vcpu_pause(struct kvm_vcpu *vcpu)
497{
498 wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
499
500 wait_event_interruptible(*wq, !vcpu->arch.pause);
501}
502
503/**
504 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
505 * @vcpu: The VCPU pointer
506 * @run: The kvm_run structure pointer used for userspace state exchange
507 *
508 * This function is called through the VCPU_RUN ioctl called from user space. It
509 * will execute VM code in a loop until the time slice for the process is used
510 * or some emulation is needed from user space in which case the function will
511 * return with return value 0 and with the kvm_run structure filled in with the
512 * required data for the requested emulation.
513 */
514int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
515{
516 int ret;
517 sigset_t sigsaved;
518
519 /* Make sure they initialize the vcpu with KVM_ARM_VCPU_INIT */
520 if (unlikely(vcpu->arch.target < 0))
521 return -ENOEXEC;
522
523 ret = kvm_vcpu_first_run_init(vcpu);
524 if (ret)
525 return ret;
526
527 if (run->exit_reason == KVM_EXIT_MMIO) {
528 ret = kvm_handle_mmio_return(vcpu, vcpu->run);
529 if (ret)
530 return ret;
531 }
532
533 if (vcpu->sigset_active)
534 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
535
536 ret = 1;
537 run->exit_reason = KVM_EXIT_UNKNOWN;
538 while (ret > 0) {
539 /*
540 * Check conditions before entering the guest
541 */
542 cond_resched();
543
544 update_vttbr(vcpu->kvm);
545
546 if (vcpu->arch.pause)
547 vcpu_pause(vcpu);
548
549 kvm_vgic_flush_hwstate(vcpu);
550 kvm_timer_flush_hwstate(vcpu);
551
552 local_irq_disable();
553
554 /*
555 * Re-check atomic conditions
556 */
557 if (signal_pending(current)) {
558 ret = -EINTR;
559 run->exit_reason = KVM_EXIT_INTR;
560 }
561
562 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
563 local_irq_enable();
564 kvm_timer_sync_hwstate(vcpu);
565 kvm_vgic_sync_hwstate(vcpu);
566 continue;
567 }
568
569 /**************************************************************
570 * Enter the guest
571 */
572 trace_kvm_entry(*vcpu_pc(vcpu));
573 kvm_guest_enter();
574 vcpu->mode = IN_GUEST_MODE;
575
576 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
577
578 vcpu->mode = OUTSIDE_GUEST_MODE;
579 vcpu->arch.last_pcpu = smp_processor_id();
580 kvm_guest_exit();
581 trace_kvm_exit(*vcpu_pc(vcpu));
582 /*
583 * We may have taken a host interrupt in HYP mode (ie
584 * while executing the guest). This interrupt is still
585 * pending, as we haven't serviced it yet!
586 *
587 * We're now back in SVC mode, with interrupts
588 * disabled. Enabling the interrupts now will have
589 * the effect of taking the interrupt again, in SVC
590 * mode this time.
591 */
592 local_irq_enable();
593
594 /*
595 * Back from guest
596 *************************************************************/
597
598 kvm_timer_sync_hwstate(vcpu);
599 kvm_vgic_sync_hwstate(vcpu);
600
601 ret = handle_exit(vcpu, run, ret);
602 }
603
604 if (vcpu->sigset_active)
605 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
606 return ret;
607}
608
609static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
610{
611 int bit_index;
612 bool set;
613 unsigned long *ptr;
614
615 if (number == KVM_ARM_IRQ_CPU_IRQ)
616 bit_index = __ffs(HCR_VI);
617 else /* KVM_ARM_IRQ_CPU_FIQ */
618 bit_index = __ffs(HCR_VF);
619
620 ptr = (unsigned long *)&vcpu->arch.irq_lines;
621 if (level)
622 set = test_and_set_bit(bit_index, ptr);
623 else
624 set = test_and_clear_bit(bit_index, ptr);
625
626 /*
627 * If we didn't change anything, no need to wake up or kick other CPUs
628 */
629 if (set == level)
630 return 0;
631
632 /*
633 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
634 * trigger a world-switch round on the running physical CPU to set the
635 * virtual IRQ/FIQ fields in the HCR appropriately.
636 */
637 kvm_vcpu_kick(vcpu);
638
639 return 0;
640}
641
642int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
643{
644 u32 irq = irq_level->irq;
645 unsigned int irq_type, vcpu_idx, irq_num;
646 int nrcpus = atomic_read(&kvm->online_vcpus);
647 struct kvm_vcpu *vcpu = NULL;
648 bool level = irq_level->level;
649
650 irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
651 vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
652 irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
653
654 trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
655
656 switch (irq_type) {
657 case KVM_ARM_IRQ_TYPE_CPU:
658 if (irqchip_in_kernel(kvm))
659 return -ENXIO;
660
661 if (vcpu_idx >= nrcpus)
662 return -EINVAL;
663
664 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
665 if (!vcpu)
666 return -EINVAL;
667
668 if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
669 return -EINVAL;
670
671 return vcpu_interrupt_line(vcpu, irq_num, level);
672 case KVM_ARM_IRQ_TYPE_PPI:
673 if (!irqchip_in_kernel(kvm))
674 return -ENXIO;
675
676 if (vcpu_idx >= nrcpus)
677 return -EINVAL;
678
679 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
680 if (!vcpu)
681 return -EINVAL;
682
683 if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
684 return -EINVAL;
685
686 return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level);
687 case KVM_ARM_IRQ_TYPE_SPI:
688 if (!irqchip_in_kernel(kvm))
689 return -ENXIO;
690
691 if (irq_num < VGIC_NR_PRIVATE_IRQS ||
692 irq_num > KVM_ARM_IRQ_GIC_MAX)
693 return -EINVAL;
694
695 return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
696 }
697
698 return -EINVAL;
699}
700
701long kvm_arch_vcpu_ioctl(struct file *filp,
702 unsigned int ioctl, unsigned long arg)
703{
704 struct kvm_vcpu *vcpu = filp->private_data;
705 void __user *argp = (void __user *)arg;
706
707 switch (ioctl) {
708 case KVM_ARM_VCPU_INIT: {
709 struct kvm_vcpu_init init;
710
711 if (copy_from_user(&init, argp, sizeof(init)))
712 return -EFAULT;
713
714 return kvm_vcpu_set_target(vcpu, &init);
715
716 }
717 case KVM_SET_ONE_REG:
718 case KVM_GET_ONE_REG: {
719 struct kvm_one_reg reg;
720 if (copy_from_user(&reg, argp, sizeof(reg)))
721 return -EFAULT;
722 if (ioctl == KVM_SET_ONE_REG)
723 return kvm_arm_set_reg(vcpu, &reg);
724 else
725 return kvm_arm_get_reg(vcpu, &reg);
726 }
727 case KVM_GET_REG_LIST: {
728 struct kvm_reg_list __user *user_list = argp;
729 struct kvm_reg_list reg_list;
730 unsigned n;
731
732 if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
733 return -EFAULT;
734 n = reg_list.n;
735 reg_list.n = kvm_arm_num_regs(vcpu);
736 if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
737 return -EFAULT;
738 if (n < reg_list.n)
739 return -E2BIG;
740 return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
741 }
742 default:
743 return -EINVAL;
744 }
745}
746
747int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
748{
749 return -EINVAL;
750}
751
752static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
753 struct kvm_arm_device_addr *dev_addr)
754{
755 unsigned long dev_id, type;
756
757 dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
758 KVM_ARM_DEVICE_ID_SHIFT;
759 type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
760 KVM_ARM_DEVICE_TYPE_SHIFT;
761
762 switch (dev_id) {
763 case KVM_ARM_DEVICE_VGIC_V2:
764 if (!vgic_present)
765 return -ENXIO;
766 return kvm_vgic_set_addr(kvm, type, dev_addr->addr);
767 default:
768 return -ENODEV;
769 }
770}
771
772long kvm_arch_vm_ioctl(struct file *filp,
773 unsigned int ioctl, unsigned long arg)
774{
775 struct kvm *kvm = filp->private_data;
776 void __user *argp = (void __user *)arg;
777
778 switch (ioctl) {
779 case KVM_CREATE_IRQCHIP: {
780 if (vgic_present)
781 return kvm_vgic_create(kvm);
782 else
783 return -ENXIO;
784 }
785 case KVM_ARM_SET_DEVICE_ADDR: {
786 struct kvm_arm_device_addr dev_addr;
787
788 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
789 return -EFAULT;
790 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
791 }
792 default:
793 return -EINVAL;
794 }
795}
796
797static void cpu_init_hyp_mode(void *vector)
798{
799 unsigned long long pgd_ptr;
800 unsigned long hyp_stack_ptr;
801 unsigned long stack_page;
802 unsigned long vector_ptr;
803
804 /* Switch from the HYP stub to our own HYP init vector */
805 __hyp_set_vectors((unsigned long)vector);
806
807 pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
808 stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
809 hyp_stack_ptr = stack_page + PAGE_SIZE;
810 vector_ptr = (unsigned long)__kvm_hyp_vector;
811
812 __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
813}
814
815/**
816 * Inits Hyp-mode on all online CPUs
817 */
818static int init_hyp_mode(void)
819{
820 phys_addr_t init_phys_addr;
821 int cpu;
822 int err = 0;
823
824 /*
825 * Allocate Hyp PGD and setup Hyp identity mapping
826 */
827 err = kvm_mmu_init();
828 if (err)
829 goto out_err;
830
831 /*
832 * It is probably enough to obtain the default on one
833 * CPU. It's unlikely to be different on the others.
834 */
835 hyp_default_vectors = __hyp_get_vectors();
836
837 /*
838 * Allocate stack pages for Hypervisor-mode
839 */
840 for_each_possible_cpu(cpu) {
841 unsigned long stack_page;
842
843 stack_page = __get_free_page(GFP_KERNEL);
844 if (!stack_page) {
845 err = -ENOMEM;
846 goto out_free_stack_pages;
847 }
848
849 per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
850 }
851
852 /*
853 * Execute the init code on each CPU.
854 *
855 * Note: The stack is not mapped yet, so don't do anything else than
856 * initializing the hypervisor mode on each CPU using a local stack
857 * space for temporary storage.
858 */
859 init_phys_addr = virt_to_phys(__kvm_hyp_init);
860 for_each_online_cpu(cpu) {
861 smp_call_function_single(cpu, cpu_init_hyp_mode,
862 (void *)(long)init_phys_addr, 1);
863 }
864
865 /*
866 * Unmap the identity mapping
867 */
868 kvm_clear_hyp_idmap();
869
870 /*
871 * Map the Hyp-code called directly from the host
872 */
873 err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
874 if (err) {
875 kvm_err("Cannot map world-switch code\n");
876 goto out_free_mappings;
877 }
878
879 /*
880 * Map the Hyp stack pages
881 */
882 for_each_possible_cpu(cpu) {
883 char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
884 err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE);
885
886 if (err) {
887 kvm_err("Cannot map hyp stack\n");
888 goto out_free_mappings;
889 }
890 }
891
892 /*
893 * Map the host VFP structures
894 */
895 kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t);
896 if (!kvm_host_vfp_state) {
897 err = -ENOMEM;
898 kvm_err("Cannot allocate host VFP state\n");
899 goto out_free_mappings;
900 }
901
902 for_each_possible_cpu(cpu) {
903 kvm_kernel_vfp_t *vfp;
904
905 vfp = per_cpu_ptr(kvm_host_vfp_state, cpu);
906 err = create_hyp_mappings(vfp, vfp + 1);
907
908 if (err) {
909 kvm_err("Cannot map host VFP state: %d\n", err);
910 goto out_free_vfp;
911 }
912 }
913
914 /*
915 * Init HYP view of VGIC
916 */
917 err = kvm_vgic_hyp_init();
918 if (err)
919 goto out_free_vfp;
920
921#ifdef CONFIG_KVM_ARM_VGIC
922 vgic_present = true;
923#endif
924
925 /*
926 * Init HYP architected timer support
927 */
928 err = kvm_timer_hyp_init();
929 if (err)
930 goto out_free_mappings;
931
932 kvm_info("Hyp mode initialized successfully\n");
933 return 0;
934out_free_vfp:
935 free_percpu(kvm_host_vfp_state);
936out_free_mappings:
937 free_hyp_pmds();
938out_free_stack_pages:
939 for_each_possible_cpu(cpu)
940 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
941out_err:
942 kvm_err("error initializing Hyp mode: %d\n", err);
943 return err;
944}
945
946/**
947 * Initialize Hyp-mode and memory mappings on all CPUs.
948 */
949int kvm_arch_init(void *opaque)
950{
951 int err;
952
953 if (!is_hyp_mode_available()) {
954 kvm_err("HYP mode not available\n");
955 return -ENODEV;
956 }
957
958 if (kvm_target_cpu() < 0) {
959 kvm_err("Target CPU not supported!\n");
960 return -ENODEV;
961 }
962
963 err = init_hyp_mode();
964 if (err)
965 goto out_err;
966
967 kvm_coproc_table_init();
968 return 0;
969out_err:
970 return err;
971}
972
973/* NOP: Compiling as a module not supported */
974void kvm_arch_exit(void)
975{
976}
977
978static int arm_init(void)
979{
980 int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
981 return rc;
982}
983
984module_init(arm_init);
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
new file mode 100644
index 000000000000..8eea97be1ed5
--- /dev/null
+++ b/arch/arm/kvm/coproc.c
@@ -0,0 +1,1050 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Authors: Rusty Russell <rusty@rustcorp.com.au>
4 * Christoffer Dall <c.dall@virtualopensystems.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 */
19#include <linux/mm.h>
20#include <linux/kvm_host.h>
21#include <linux/uaccess.h>
22#include <asm/kvm_arm.h>
23#include <asm/kvm_host.h>
24#include <asm/kvm_emulate.h>
25#include <asm/kvm_coproc.h>
26#include <asm/cacheflush.h>
27#include <asm/cputype.h>
28#include <trace/events/kvm.h>
29#include <asm/vfp.h>
30#include "../vfp/vfpinstr.h"
31
32#include "trace.h"
33#include "coproc.h"
34
35
36/******************************************************************************
37 * Co-processor emulation
38 *****************************************************************************/
39
40/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
41static u32 cache_levels;
42
43/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
44#define CSSELR_MAX 12
45
46int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
47{
48 kvm_inject_undefined(vcpu);
49 return 1;
50}
51
52int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
53{
54 /*
55 * We can get here, if the host has been built without VFPv3 support,
56 * but the guest attempted a floating point operation.
57 */
58 kvm_inject_undefined(vcpu);
59 return 1;
60}
61
62int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
63{
64 kvm_inject_undefined(vcpu);
65 return 1;
66}
67
68int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
69{
70 kvm_inject_undefined(vcpu);
71 return 1;
72}
73
74/* See note at ARM ARM B1.14.4 */
75static bool access_dcsw(struct kvm_vcpu *vcpu,
76 const struct coproc_params *p,
77 const struct coproc_reg *r)
78{
79 unsigned long val;
80 int cpu;
81
82 if (!p->is_write)
83 return read_from_write_only(vcpu, p);
84
85 cpu = get_cpu();
86
87 cpumask_setall(&vcpu->arch.require_dcache_flush);
88 cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
89
90 /* If we were already preempted, take the long way around */
91 if (cpu != vcpu->arch.last_pcpu) {
92 flush_cache_all();
93 goto done;
94 }
95
96 val = *vcpu_reg(vcpu, p->Rt1);
97
98 switch (p->CRm) {
99 case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */
100 case 14: /* DCCISW */
101 asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val));
102 break;
103
104 case 10: /* DCCSW */
105 asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val));
106 break;
107 }
108
109done:
110 put_cpu();
111
112 return true;
113}
114
115/*
116 * We could trap ID_DFR0 and tell the guest we don't support performance
117 * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
118 * NAKed, so it will read the PMCR anyway.
119 *
120 * Therefore we tell the guest we have 0 counters. Unfortunately, we
121 * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
122 * all PM registers, which doesn't crash the guest kernel at least.
123 */
124static bool pm_fake(struct kvm_vcpu *vcpu,
125 const struct coproc_params *p,
126 const struct coproc_reg *r)
127{
128 if (p->is_write)
129 return ignore_write(vcpu, p);
130 else
131 return read_zero(vcpu, p);
132}
133
134#define access_pmcr pm_fake
135#define access_pmcntenset pm_fake
136#define access_pmcntenclr pm_fake
137#define access_pmovsr pm_fake
138#define access_pmselr pm_fake
139#define access_pmceid0 pm_fake
140#define access_pmceid1 pm_fake
141#define access_pmccntr pm_fake
142#define access_pmxevtyper pm_fake
143#define access_pmxevcntr pm_fake
144#define access_pmuserenr pm_fake
145#define access_pmintenset pm_fake
146#define access_pmintenclr pm_fake
147
148/* Architected CP15 registers.
149 * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
150 */
151static const struct coproc_reg cp15_regs[] = {
152 /* CSSELR: swapped by interrupt.S. */
153 { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
154 NULL, reset_unknown, c0_CSSELR },
155
156 /* TTBR0/TTBR1: swapped by interrupt.S. */
157 { CRm( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
158 { CRm( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
159
160 /* TTBCR: swapped by interrupt.S. */
161 { CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
162 NULL, reset_val, c2_TTBCR, 0x00000000 },
163
164 /* DACR: swapped by interrupt.S. */
165 { CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32,
166 NULL, reset_unknown, c3_DACR },
167
168 /* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */
169 { CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32,
170 NULL, reset_unknown, c5_DFSR },
171 { CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32,
172 NULL, reset_unknown, c5_IFSR },
173 { CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32,
174 NULL, reset_unknown, c5_ADFSR },
175 { CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32,
176 NULL, reset_unknown, c5_AIFSR },
177
178 /* DFAR/IFAR: swapped by interrupt.S. */
179 { CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32,
180 NULL, reset_unknown, c6_DFAR },
181 { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
182 NULL, reset_unknown, c6_IFAR },
183 /*
184 * DC{C,I,CI}SW operations:
185 */
186 { CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, access_dcsw},
187 { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
188 { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
189 /*
190 * Dummy performance monitor implementation.
191 */
192 { CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr},
193 { CRn( 9), CRm(12), Op1( 0), Op2( 1), is32, access_pmcntenset},
194 { CRn( 9), CRm(12), Op1( 0), Op2( 2), is32, access_pmcntenclr},
195 { CRn( 9), CRm(12), Op1( 0), Op2( 3), is32, access_pmovsr},
196 { CRn( 9), CRm(12), Op1( 0), Op2( 5), is32, access_pmselr},
197 { CRn( 9), CRm(12), Op1( 0), Op2( 6), is32, access_pmceid0},
198 { CRn( 9), CRm(12), Op1( 0), Op2( 7), is32, access_pmceid1},
199 { CRn( 9), CRm(13), Op1( 0), Op2( 0), is32, access_pmccntr},
200 { CRn( 9), CRm(13), Op1( 0), Op2( 1), is32, access_pmxevtyper},
201 { CRn( 9), CRm(13), Op1( 0), Op2( 2), is32, access_pmxevcntr},
202 { CRn( 9), CRm(14), Op1( 0), Op2( 0), is32, access_pmuserenr},
203 { CRn( 9), CRm(14), Op1( 0), Op2( 1), is32, access_pmintenset},
204 { CRn( 9), CRm(14), Op1( 0), Op2( 2), is32, access_pmintenclr},
205
206 /* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */
207 { CRn(10), CRm( 2), Op1( 0), Op2( 0), is32,
208 NULL, reset_unknown, c10_PRRR},
209 { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
210 NULL, reset_unknown, c10_NMRR},
211
212 /* VBAR: swapped by interrupt.S. */
213 { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
214 NULL, reset_val, c12_VBAR, 0x00000000 },
215
216 /* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */
217 { CRn(13), CRm( 0), Op1( 0), Op2( 1), is32,
218 NULL, reset_val, c13_CID, 0x00000000 },
219 { CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,
220 NULL, reset_unknown, c13_TID_URW },
221 { CRn(13), CRm( 0), Op1( 0), Op2( 3), is32,
222 NULL, reset_unknown, c13_TID_URO },
223 { CRn(13), CRm( 0), Op1( 0), Op2( 4), is32,
224 NULL, reset_unknown, c13_TID_PRIV },
225
226 /* CNTKCTL: swapped by interrupt.S. */
227 { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
228 NULL, reset_val, c14_CNTKCTL, 0x00000000 },
229};
230
231/* Target specific emulation tables */
232static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
233
234void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
235{
236 target_tables[table->target] = table;
237}
238
239/* Get specific register table for this target. */
240static const struct coproc_reg *get_target_table(unsigned target, size_t *num)
241{
242 struct kvm_coproc_target_table *table;
243
244 table = target_tables[target];
245 *num = table->num;
246 return table->table;
247}
248
249static const struct coproc_reg *find_reg(const struct coproc_params *params,
250 const struct coproc_reg table[],
251 unsigned int num)
252{
253 unsigned int i;
254
255 for (i = 0; i < num; i++) {
256 const struct coproc_reg *r = &table[i];
257
258 if (params->is_64bit != r->is_64)
259 continue;
260 if (params->CRn != r->CRn)
261 continue;
262 if (params->CRm != r->CRm)
263 continue;
264 if (params->Op1 != r->Op1)
265 continue;
266 if (params->Op2 != r->Op2)
267 continue;
268
269 return r;
270 }
271 return NULL;
272}
273
274static int emulate_cp15(struct kvm_vcpu *vcpu,
275 const struct coproc_params *params)
276{
277 size_t num;
278 const struct coproc_reg *table, *r;
279
280 trace_kvm_emulate_cp15_imp(params->Op1, params->Rt1, params->CRn,
281 params->CRm, params->Op2, params->is_write);
282
283 table = get_target_table(vcpu->arch.target, &num);
284
285 /* Search target-specific then generic table. */
286 r = find_reg(params, table, num);
287 if (!r)
288 r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs));
289
290 if (likely(r)) {
291 /* If we don't have an accessor, we should never get here! */
292 BUG_ON(!r->access);
293
294 if (likely(r->access(vcpu, params, r))) {
295 /* Skip instruction, since it was emulated */
296 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
297 return 1;
298 }
299 /* If access function fails, it should complain. */
300 } else {
301 kvm_err("Unsupported guest CP15 access at: %08lx\n",
302 *vcpu_pc(vcpu));
303 print_cp_instr(params);
304 }
305 kvm_inject_undefined(vcpu);
306 return 1;
307}
308
309/**
310 * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
311 * @vcpu: The VCPU pointer
312 * @run: The kvm_run struct
313 */
314int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
315{
316 struct coproc_params params;
317
318 params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
319 params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
320 params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
321 params.is_64bit = true;
322
323 params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
324 params.Op2 = 0;
325 params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
326 params.CRn = 0;
327
328 return emulate_cp15(vcpu, &params);
329}
330
331static void reset_coproc_regs(struct kvm_vcpu *vcpu,
332 const struct coproc_reg *table, size_t num)
333{
334 unsigned long i;
335
336 for (i = 0; i < num; i++)
337 if (table[i].reset)
338 table[i].reset(vcpu, &table[i]);
339}
340
341/**
342 * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
343 * @vcpu: The VCPU pointer
344 * @run: The kvm_run struct
345 */
346int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
347{
348 struct coproc_params params;
349
350 params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
351 params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
352 params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
353 params.is_64bit = false;
354
355 params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
356 params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 14) & 0x7;
357 params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7;
358 params.Rt2 = 0;
359
360 return emulate_cp15(vcpu, &params);
361}
362
363/******************************************************************************
364 * Userspace API
365 *****************************************************************************/
366
367static bool index_to_params(u64 id, struct coproc_params *params)
368{
369 switch (id & KVM_REG_SIZE_MASK) {
370 case KVM_REG_SIZE_U32:
371 /* Any unused index bits means it's not valid. */
372 if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
373 | KVM_REG_ARM_COPROC_MASK
374 | KVM_REG_ARM_32_CRN_MASK
375 | KVM_REG_ARM_CRM_MASK
376 | KVM_REG_ARM_OPC1_MASK
377 | KVM_REG_ARM_32_OPC2_MASK))
378 return false;
379
380 params->is_64bit = false;
381 params->CRn = ((id & KVM_REG_ARM_32_CRN_MASK)
382 >> KVM_REG_ARM_32_CRN_SHIFT);
383 params->CRm = ((id & KVM_REG_ARM_CRM_MASK)
384 >> KVM_REG_ARM_CRM_SHIFT);
385 params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK)
386 >> KVM_REG_ARM_OPC1_SHIFT);
387 params->Op2 = ((id & KVM_REG_ARM_32_OPC2_MASK)
388 >> KVM_REG_ARM_32_OPC2_SHIFT);
389 return true;
390 case KVM_REG_SIZE_U64:
391 /* Any unused index bits means it's not valid. */
392 if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
393 | KVM_REG_ARM_COPROC_MASK
394 | KVM_REG_ARM_CRM_MASK
395 | KVM_REG_ARM_OPC1_MASK))
396 return false;
397 params->is_64bit = true;
398 params->CRm = ((id & KVM_REG_ARM_CRM_MASK)
399 >> KVM_REG_ARM_CRM_SHIFT);
400 params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK)
401 >> KVM_REG_ARM_OPC1_SHIFT);
402 params->Op2 = 0;
403 params->CRn = 0;
404 return true;
405 default:
406 return false;
407 }
408}
409
410/* Decode an index value, and find the cp15 coproc_reg entry. */
411static const struct coproc_reg *index_to_coproc_reg(struct kvm_vcpu *vcpu,
412 u64 id)
413{
414 size_t num;
415 const struct coproc_reg *table, *r;
416 struct coproc_params params;
417
418 /* We only do cp15 for now. */
419 if ((id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT != 15)
420 return NULL;
421
422 if (!index_to_params(id, &params))
423 return NULL;
424
425 table = get_target_table(vcpu->arch.target, &num);
426 r = find_reg(&params, table, num);
427 if (!r)
428 r = find_reg(&params, cp15_regs, ARRAY_SIZE(cp15_regs));
429
430 /* Not saved in the cp15 array? */
431 if (r && !r->reg)
432 r = NULL;
433
434 return r;
435}
436
437/*
438 * These are the invariant cp15 registers: we let the guest see the host
439 * versions of these, so they're part of the guest state.
440 *
441 * A future CPU may provide a mechanism to present different values to
442 * the guest, or a future kvm may trap them.
443 */
444/* Unfortunately, there's no register-argument for mrc, so generate. */
445#define FUNCTION_FOR32(crn, crm, op1, op2, name) \
446 static void get_##name(struct kvm_vcpu *v, \
447 const struct coproc_reg *r) \
448 { \
449 u32 val; \
450 \
451 asm volatile("mrc p15, " __stringify(op1) \
452 ", %0, c" __stringify(crn) \
453 ", c" __stringify(crm) \
454 ", " __stringify(op2) "\n" : "=r" (val)); \
455 ((struct coproc_reg *)r)->val = val; \
456 }
457
458FUNCTION_FOR32(0, 0, 0, 0, MIDR)
459FUNCTION_FOR32(0, 0, 0, 1, CTR)
460FUNCTION_FOR32(0, 0, 0, 2, TCMTR)
461FUNCTION_FOR32(0, 0, 0, 3, TLBTR)
462FUNCTION_FOR32(0, 0, 0, 6, REVIDR)
463FUNCTION_FOR32(0, 1, 0, 0, ID_PFR0)
464FUNCTION_FOR32(0, 1, 0, 1, ID_PFR1)
465FUNCTION_FOR32(0, 1, 0, 2, ID_DFR0)
466FUNCTION_FOR32(0, 1, 0, 3, ID_AFR0)
467FUNCTION_FOR32(0, 1, 0, 4, ID_MMFR0)
468FUNCTION_FOR32(0, 1, 0, 5, ID_MMFR1)
469FUNCTION_FOR32(0, 1, 0, 6, ID_MMFR2)
470FUNCTION_FOR32(0, 1, 0, 7, ID_MMFR3)
471FUNCTION_FOR32(0, 2, 0, 0, ID_ISAR0)
472FUNCTION_FOR32(0, 2, 0, 1, ID_ISAR1)
473FUNCTION_FOR32(0, 2, 0, 2, ID_ISAR2)
474FUNCTION_FOR32(0, 2, 0, 3, ID_ISAR3)
475FUNCTION_FOR32(0, 2, 0, 4, ID_ISAR4)
476FUNCTION_FOR32(0, 2, 0, 5, ID_ISAR5)
477FUNCTION_FOR32(0, 0, 1, 1, CLIDR)
478FUNCTION_FOR32(0, 0, 1, 7, AIDR)
479
480/* ->val is filled in by kvm_invariant_coproc_table_init() */
481static struct coproc_reg invariant_cp15[] = {
482 { CRn( 0), CRm( 0), Op1( 0), Op2( 0), is32, NULL, get_MIDR },
483 { CRn( 0), CRm( 0), Op1( 0), Op2( 1), is32, NULL, get_CTR },
484 { CRn( 0), CRm( 0), Op1( 0), Op2( 2), is32, NULL, get_TCMTR },
485 { CRn( 0), CRm( 0), Op1( 0), Op2( 3), is32, NULL, get_TLBTR },
486 { CRn( 0), CRm( 0), Op1( 0), Op2( 6), is32, NULL, get_REVIDR },
487
488 { CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, NULL, get_ID_PFR0 },
489 { CRn( 0), CRm( 1), Op1( 0), Op2( 1), is32, NULL, get_ID_PFR1 },
490 { CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32, NULL, get_ID_DFR0 },
491 { CRn( 0), CRm( 1), Op1( 0), Op2( 3), is32, NULL, get_ID_AFR0 },
492 { CRn( 0), CRm( 1), Op1( 0), Op2( 4), is32, NULL, get_ID_MMFR0 },
493 { CRn( 0), CRm( 1), Op1( 0), Op2( 5), is32, NULL, get_ID_MMFR1 },
494 { CRn( 0), CRm( 1), Op1( 0), Op2( 6), is32, NULL, get_ID_MMFR2 },
495 { CRn( 0), CRm( 1), Op1( 0), Op2( 7), is32, NULL, get_ID_MMFR3 },
496
497 { CRn( 0), CRm( 2), Op1( 0), Op2( 0), is32, NULL, get_ID_ISAR0 },
498 { CRn( 0), CRm( 2), Op1( 0), Op2( 1), is32, NULL, get_ID_ISAR1 },
499 { CRn( 0), CRm( 2), Op1( 0), Op2( 2), is32, NULL, get_ID_ISAR2 },
500 { CRn( 0), CRm( 2), Op1( 0), Op2( 3), is32, NULL, get_ID_ISAR3 },
501 { CRn( 0), CRm( 2), Op1( 0), Op2( 4), is32, NULL, get_ID_ISAR4 },
502 { CRn( 0), CRm( 2), Op1( 0), Op2( 5), is32, NULL, get_ID_ISAR5 },
503
504 { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR },
505 { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
506};
507
508static int reg_from_user(void *val, const void __user *uaddr, u64 id)
509{
510 /* This Just Works because we are little endian. */
511 if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
512 return -EFAULT;
513 return 0;
514}
515
516static int reg_to_user(void __user *uaddr, const void *val, u64 id)
517{
518 /* This Just Works because we are little endian. */
519 if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
520 return -EFAULT;
521 return 0;
522}
523
524static int get_invariant_cp15(u64 id, void __user *uaddr)
525{
526 struct coproc_params params;
527 const struct coproc_reg *r;
528
529 if (!index_to_params(id, &params))
530 return -ENOENT;
531
532 r = find_reg(&params, invariant_cp15, ARRAY_SIZE(invariant_cp15));
533 if (!r)
534 return -ENOENT;
535
536 return reg_to_user(uaddr, &r->val, id);
537}
538
539static int set_invariant_cp15(u64 id, void __user *uaddr)
540{
541 struct coproc_params params;
542 const struct coproc_reg *r;
543 int err;
544 u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
545
546 if (!index_to_params(id, &params))
547 return -ENOENT;
548 r = find_reg(&params, invariant_cp15, ARRAY_SIZE(invariant_cp15));
549 if (!r)
550 return -ENOENT;
551
552 err = reg_from_user(&val, uaddr, id);
553 if (err)
554 return err;
555
556 /* This is what we mean by invariant: you can't change it. */
557 if (r->val != val)
558 return -EINVAL;
559
560 return 0;
561}
562
563static bool is_valid_cache(u32 val)
564{
565 u32 level, ctype;
566
567 if (val >= CSSELR_MAX)
568 return -ENOENT;
569
570 /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
571 level = (val >> 1);
572 ctype = (cache_levels >> (level * 3)) & 7;
573
574 switch (ctype) {
575 case 0: /* No cache */
576 return false;
577 case 1: /* Instruction cache only */
578 return (val & 1);
579 case 2: /* Data cache only */
580 case 4: /* Unified cache */
581 return !(val & 1);
582 case 3: /* Separate instruction and data caches */
583 return true;
584 default: /* Reserved: we can't know instruction or data. */
585 return false;
586 }
587}
588
589/* Which cache CCSIDR represents depends on CSSELR value. */
590static u32 get_ccsidr(u32 csselr)
591{
592 u32 ccsidr;
593
594 /* Make sure noone else changes CSSELR during this! */
595 local_irq_disable();
596 /* Put value into CSSELR */
597 asm volatile("mcr p15, 2, %0, c0, c0, 0" : : "r" (csselr));
598 isb();
599 /* Read result out of CCSIDR */
600 asm volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (ccsidr));
601 local_irq_enable();
602
603 return ccsidr;
604}
605
606static int demux_c15_get(u64 id, void __user *uaddr)
607{
608 u32 val;
609 u32 __user *uval = uaddr;
610
611 /* Fail if we have unknown bits set. */
612 if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
613 | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
614 return -ENOENT;
615
616 switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
617 case KVM_REG_ARM_DEMUX_ID_CCSIDR:
618 if (KVM_REG_SIZE(id) != 4)
619 return -ENOENT;
620 val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
621 >> KVM_REG_ARM_DEMUX_VAL_SHIFT;
622 if (!is_valid_cache(val))
623 return -ENOENT;
624
625 return put_user(get_ccsidr(val), uval);
626 default:
627 return -ENOENT;
628 }
629}
630
631static int demux_c15_set(u64 id, void __user *uaddr)
632{
633 u32 val, newval;
634 u32 __user *uval = uaddr;
635
636 /* Fail if we have unknown bits set. */
637 if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
638 | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
639 return -ENOENT;
640
641 switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
642 case KVM_REG_ARM_DEMUX_ID_CCSIDR:
643 if (KVM_REG_SIZE(id) != 4)
644 return -ENOENT;
645 val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
646 >> KVM_REG_ARM_DEMUX_VAL_SHIFT;
647 if (!is_valid_cache(val))
648 return -ENOENT;
649
650 if (get_user(newval, uval))
651 return -EFAULT;
652
653 /* This is also invariant: you can't change it. */
654 if (newval != get_ccsidr(val))
655 return -EINVAL;
656 return 0;
657 default:
658 return -ENOENT;
659 }
660}
661
662#ifdef CONFIG_VFPv3
663static const int vfp_sysregs[] = { KVM_REG_ARM_VFP_FPEXC,
664 KVM_REG_ARM_VFP_FPSCR,
665 KVM_REG_ARM_VFP_FPINST,
666 KVM_REG_ARM_VFP_FPINST2,
667 KVM_REG_ARM_VFP_MVFR0,
668 KVM_REG_ARM_VFP_MVFR1,
669 KVM_REG_ARM_VFP_FPSID };
670
671static unsigned int num_fp_regs(void)
672{
673 if (((fmrx(MVFR0) & MVFR0_A_SIMD_MASK) >> MVFR0_A_SIMD_BIT) == 2)
674 return 32;
675 else
676 return 16;
677}
678
679static unsigned int num_vfp_regs(void)
680{
681 /* Normal FP regs + control regs. */
682 return num_fp_regs() + ARRAY_SIZE(vfp_sysregs);
683}
684
685static int copy_vfp_regids(u64 __user *uindices)
686{
687 unsigned int i;
688 const u64 u32reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP;
689 const u64 u64reg = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP;
690
691 for (i = 0; i < num_fp_regs(); i++) {
692 if (put_user((u64reg | KVM_REG_ARM_VFP_BASE_REG) + i,
693 uindices))
694 return -EFAULT;
695 uindices++;
696 }
697
698 for (i = 0; i < ARRAY_SIZE(vfp_sysregs); i++) {
699 if (put_user(u32reg | vfp_sysregs[i], uindices))
700 return -EFAULT;
701 uindices++;
702 }
703
704 return num_vfp_regs();
705}
706
707static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
708{
709 u32 vfpid = (id & KVM_REG_ARM_VFP_MASK);
710 u32 val;
711
712 /* Fail if we have unknown bits set. */
713 if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
714 | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
715 return -ENOENT;
716
717 if (vfpid < num_fp_regs()) {
718 if (KVM_REG_SIZE(id) != 8)
719 return -ENOENT;
720 return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpregs[vfpid],
721 id);
722 }
723
724 /* FP control registers are all 32 bit. */
725 if (KVM_REG_SIZE(id) != 4)
726 return -ENOENT;
727
728 switch (vfpid) {
729 case KVM_REG_ARM_VFP_FPEXC:
730 return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpexc, id);
731 case KVM_REG_ARM_VFP_FPSCR:
732 return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpscr, id);
733 case KVM_REG_ARM_VFP_FPINST:
734 return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst, id);
735 case KVM_REG_ARM_VFP_FPINST2:
736 return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst2, id);
737 case KVM_REG_ARM_VFP_MVFR0:
738 val = fmrx(MVFR0);
739 return reg_to_user(uaddr, &val, id);
740 case KVM_REG_ARM_VFP_MVFR1:
741 val = fmrx(MVFR1);
742 return reg_to_user(uaddr, &val, id);
743 case KVM_REG_ARM_VFP_FPSID:
744 val = fmrx(FPSID);
745 return reg_to_user(uaddr, &val, id);
746 default:
747 return -ENOENT;
748 }
749}
750
751static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr)
752{
753 u32 vfpid = (id & KVM_REG_ARM_VFP_MASK);
754 u32 val;
755
756 /* Fail if we have unknown bits set. */
757 if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
758 | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
759 return -ENOENT;
760
761 if (vfpid < num_fp_regs()) {
762 if (KVM_REG_SIZE(id) != 8)
763 return -ENOENT;
764 return reg_from_user(&vcpu->arch.vfp_guest.fpregs[vfpid],
765 uaddr, id);
766 }
767
768 /* FP control registers are all 32 bit. */
769 if (KVM_REG_SIZE(id) != 4)
770 return -ENOENT;
771
772 switch (vfpid) {
773 case KVM_REG_ARM_VFP_FPEXC:
774 return reg_from_user(&vcpu->arch.vfp_guest.fpexc, uaddr, id);
775 case KVM_REG_ARM_VFP_FPSCR:
776 return reg_from_user(&vcpu->arch.vfp_guest.fpscr, uaddr, id);
777 case KVM_REG_ARM_VFP_FPINST:
778 return reg_from_user(&vcpu->arch.vfp_guest.fpinst, uaddr, id);
779 case KVM_REG_ARM_VFP_FPINST2:
780 return reg_from_user(&vcpu->arch.vfp_guest.fpinst2, uaddr, id);
781 /* These are invariant. */
782 case KVM_REG_ARM_VFP_MVFR0:
783 if (reg_from_user(&val, uaddr, id))
784 return -EFAULT;
785 if (val != fmrx(MVFR0))
786 return -EINVAL;
787 return 0;
788 case KVM_REG_ARM_VFP_MVFR1:
789 if (reg_from_user(&val, uaddr, id))
790 return -EFAULT;
791 if (val != fmrx(MVFR1))
792 return -EINVAL;
793 return 0;
794 case KVM_REG_ARM_VFP_FPSID:
795 if (reg_from_user(&val, uaddr, id))
796 return -EFAULT;
797 if (val != fmrx(FPSID))
798 return -EINVAL;
799 return 0;
800 default:
801 return -ENOENT;
802 }
803}
804#else /* !CONFIG_VFPv3 */
805static unsigned int num_vfp_regs(void)
806{
807 return 0;
808}
809
810static int copy_vfp_regids(u64 __user *uindices)
811{
812 return 0;
813}
814
815static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
816{
817 return -ENOENT;
818}
819
820static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr)
821{
822 return -ENOENT;
823}
824#endif /* !CONFIG_VFPv3 */
825
826int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
827{
828 const struct coproc_reg *r;
829 void __user *uaddr = (void __user *)(long)reg->addr;
830
831 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
832 return demux_c15_get(reg->id, uaddr);
833
834 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_VFP)
835 return vfp_get_reg(vcpu, reg->id, uaddr);
836
837 r = index_to_coproc_reg(vcpu, reg->id);
838 if (!r)
839 return get_invariant_cp15(reg->id, uaddr);
840
841 /* Note: copies two regs if size is 64 bit. */
842 return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
843}
844
845int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
846{
847 const struct coproc_reg *r;
848 void __user *uaddr = (void __user *)(long)reg->addr;
849
850 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
851 return demux_c15_set(reg->id, uaddr);
852
853 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_VFP)
854 return vfp_set_reg(vcpu, reg->id, uaddr);
855
856 r = index_to_coproc_reg(vcpu, reg->id);
857 if (!r)
858 return set_invariant_cp15(reg->id, uaddr);
859
860 /* Note: copies two regs if size is 64 bit */
861 return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
862}
863
864static unsigned int num_demux_regs(void)
865{
866 unsigned int i, count = 0;
867
868 for (i = 0; i < CSSELR_MAX; i++)
869 if (is_valid_cache(i))
870 count++;
871
872 return count;
873}
874
875static int write_demux_regids(u64 __user *uindices)
876{
877 u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
878 unsigned int i;
879
880 val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
881 for (i = 0; i < CSSELR_MAX; i++) {
882 if (!is_valid_cache(i))
883 continue;
884 if (put_user(val | i, uindices))
885 return -EFAULT;
886 uindices++;
887 }
888 return 0;
889}
890
891static u64 cp15_to_index(const struct coproc_reg *reg)
892{
893 u64 val = KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT);
894 if (reg->is_64) {
895 val |= KVM_REG_SIZE_U64;
896 val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
897 val |= (reg->CRm << KVM_REG_ARM_CRM_SHIFT);
898 } else {
899 val |= KVM_REG_SIZE_U32;
900 val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
901 val |= (reg->Op2 << KVM_REG_ARM_32_OPC2_SHIFT);
902 val |= (reg->CRm << KVM_REG_ARM_CRM_SHIFT);
903 val |= (reg->CRn << KVM_REG_ARM_32_CRN_SHIFT);
904 }
905 return val;
906}
907
908static bool copy_reg_to_user(const struct coproc_reg *reg, u64 __user **uind)
909{
910 if (!*uind)
911 return true;
912
913 if (put_user(cp15_to_index(reg), *uind))
914 return false;
915
916 (*uind)++;
917 return true;
918}
919
920/* Assumed ordered tables, see kvm_coproc_table_init. */
921static int walk_cp15(struct kvm_vcpu *vcpu, u64 __user *uind)
922{
923 const struct coproc_reg *i1, *i2, *end1, *end2;
924 unsigned int total = 0;
925 size_t num;
926
927 /* We check for duplicates here, to allow arch-specific overrides. */
928 i1 = get_target_table(vcpu->arch.target, &num);
929 end1 = i1 + num;
930 i2 = cp15_regs;
931 end2 = cp15_regs + ARRAY_SIZE(cp15_regs);
932
933 BUG_ON(i1 == end1 || i2 == end2);
934
935 /* Walk carefully, as both tables may refer to the same register. */
936 while (i1 || i2) {
937 int cmp = cmp_reg(i1, i2);
938 /* target-specific overrides generic entry. */
939 if (cmp <= 0) {
940 /* Ignore registers we trap but don't save. */
941 if (i1->reg) {
942 if (!copy_reg_to_user(i1, &uind))
943 return -EFAULT;
944 total++;
945 }
946 } else {
947 /* Ignore registers we trap but don't save. */
948 if (i2->reg) {
949 if (!copy_reg_to_user(i2, &uind))
950 return -EFAULT;
951 total++;
952 }
953 }
954
955 if (cmp <= 0 && ++i1 == end1)
956 i1 = NULL;
957 if (cmp >= 0 && ++i2 == end2)
958 i2 = NULL;
959 }
960 return total;
961}
962
963unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu)
964{
965 return ARRAY_SIZE(invariant_cp15)
966 + num_demux_regs()
967 + num_vfp_regs()
968 + walk_cp15(vcpu, (u64 __user *)NULL);
969}
970
971int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
972{
973 unsigned int i;
974 int err;
975
976 /* Then give them all the invariant registers' indices. */
977 for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++) {
978 if (put_user(cp15_to_index(&invariant_cp15[i]), uindices))
979 return -EFAULT;
980 uindices++;
981 }
982
983 err = walk_cp15(vcpu, uindices);
984 if (err < 0)
985 return err;
986 uindices += err;
987
988 err = copy_vfp_regids(uindices);
989 if (err < 0)
990 return err;
991 uindices += err;
992
993 return write_demux_regids(uindices);
994}
995
996void kvm_coproc_table_init(void)
997{
998 unsigned int i;
999
1000 /* Make sure tables are unique and in order. */
1001 for (i = 1; i < ARRAY_SIZE(cp15_regs); i++)
1002 BUG_ON(cmp_reg(&cp15_regs[i-1], &cp15_regs[i]) >= 0);
1003
1004 /* We abuse the reset function to overwrite the table itself. */
1005 for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++)
1006 invariant_cp15[i].reset(NULL, &invariant_cp15[i]);
1007
1008 /*
1009 * CLIDR format is awkward, so clean it up. See ARM B4.1.20:
1010 *
1011 * If software reads the Cache Type fields from Ctype1
1012 * upwards, once it has seen a value of 0b000, no caches
1013 * exist at further-out levels of the hierarchy. So, for
1014 * example, if Ctype3 is the first Cache Type field with a
1015 * value of 0b000, the values of Ctype4 to Ctype7 must be
1016 * ignored.
1017 */
1018 asm volatile("mrc p15, 1, %0, c0, c0, 1" : "=r" (cache_levels));
1019 for (i = 0; i < 7; i++)
1020 if (((cache_levels >> (i*3)) & 7) == 0)
1021 break;
1022 /* Clear all higher bits. */
1023 cache_levels &= (1 << (i*3))-1;
1024}
1025
1026/**
1027 * kvm_reset_coprocs - sets cp15 registers to reset value
1028 * @vcpu: The VCPU pointer
1029 *
1030 * This function finds the right table above and sets the registers on the
1031 * virtual CPU struct to their architecturally defined reset values.
1032 */
1033void kvm_reset_coprocs(struct kvm_vcpu *vcpu)
1034{
1035 size_t num;
1036 const struct coproc_reg *table;
1037
1038 /* Catch someone adding a register without putting in reset entry. */
1039 memset(vcpu->arch.cp15, 0x42, sizeof(vcpu->arch.cp15));
1040
1041 /* Generic chip reset first (so target could override). */
1042 reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
1043
1044 table = get_target_table(vcpu->arch.target, &num);
1045 reset_coproc_regs(vcpu, table, num);
1046
1047 for (num = 1; num < NR_CP15_REGS; num++)
1048 if (vcpu->arch.cp15[num] == 0x42424242)
1049 panic("Didn't reset vcpu->arch.cp15[%zi]", num);
1050}
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
new file mode 100644
index 000000000000..b7301d3e4799
--- /dev/null
+++ b/arch/arm/kvm/coproc.h
@@ -0,0 +1,153 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Authors: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#ifndef __ARM_KVM_COPROC_LOCAL_H__
20#define __ARM_KVM_COPROC_LOCAL_H__
21
22struct coproc_params {
23 unsigned long CRn;
24 unsigned long CRm;
25 unsigned long Op1;
26 unsigned long Op2;
27 unsigned long Rt1;
28 unsigned long Rt2;
29 bool is_64bit;
30 bool is_write;
31};
32
33struct coproc_reg {
34 /* MRC/MCR/MRRC/MCRR instruction which accesses it. */
35 unsigned long CRn;
36 unsigned long CRm;
37 unsigned long Op1;
38 unsigned long Op2;
39
40 bool is_64;
41
42 /* Trapped access from guest, if non-NULL. */
43 bool (*access)(struct kvm_vcpu *,
44 const struct coproc_params *,
45 const struct coproc_reg *);
46
47 /* Initialization for vcpu. */
48 void (*reset)(struct kvm_vcpu *, const struct coproc_reg *);
49
50 /* Index into vcpu->arch.cp15[], or 0 if we don't need to save it. */
51 unsigned long reg;
52
53 /* Value (usually reset value) */
54 u64 val;
55};
56
57static inline void print_cp_instr(const struct coproc_params *p)
58{
59 /* Look, we even formatted it for you to paste into the table! */
60 if (p->is_64bit) {
61 kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n",
62 p->CRm, p->Op1, p->is_write ? "write" : "read");
63 } else {
64 kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
65 " func_%s },\n",
66 p->CRn, p->CRm, p->Op1, p->Op2,
67 p->is_write ? "write" : "read");
68 }
69}
70
71static inline bool ignore_write(struct kvm_vcpu *vcpu,
72 const struct coproc_params *p)
73{
74 return true;
75}
76
77static inline bool read_zero(struct kvm_vcpu *vcpu,
78 const struct coproc_params *p)
79{
80 *vcpu_reg(vcpu, p->Rt1) = 0;
81 return true;
82}
83
84static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
85 const struct coproc_params *params)
86{
87 kvm_debug("CP15 write to read-only register at: %08lx\n",
88 *vcpu_pc(vcpu));
89 print_cp_instr(params);
90 return false;
91}
92
93static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
94 const struct coproc_params *params)
95{
96 kvm_debug("CP15 read to write-only register at: %08lx\n",
97 *vcpu_pc(vcpu));
98 print_cp_instr(params);
99 return false;
100}
101
102/* Reset functions */
103static inline void reset_unknown(struct kvm_vcpu *vcpu,
104 const struct coproc_reg *r)
105{
106 BUG_ON(!r->reg);
107 BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15));
108 vcpu->arch.cp15[r->reg] = 0xdecafbad;
109}
110
111static inline void reset_val(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
112{
113 BUG_ON(!r->reg);
114 BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15));
115 vcpu->arch.cp15[r->reg] = r->val;
116}
117
118static inline void reset_unknown64(struct kvm_vcpu *vcpu,
119 const struct coproc_reg *r)
120{
121 BUG_ON(!r->reg);
122 BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.cp15));
123
124 vcpu->arch.cp15[r->reg] = 0xdecafbad;
125 vcpu->arch.cp15[r->reg+1] = 0xd0c0ffee;
126}
127
128static inline int cmp_reg(const struct coproc_reg *i1,
129 const struct coproc_reg *i2)
130{
131 BUG_ON(i1 == i2);
132 if (!i1)
133 return 1;
134 else if (!i2)
135 return -1;
136 if (i1->CRn != i2->CRn)
137 return i1->CRn - i2->CRn;
138 if (i1->CRm != i2->CRm)
139 return i1->CRm - i2->CRm;
140 if (i1->Op1 != i2->Op1)
141 return i1->Op1 - i2->Op1;
142 return i1->Op2 - i2->Op2;
143}
144
145
146#define CRn(_x) .CRn = _x
147#define CRm(_x) .CRm = _x
148#define Op1(_x) .Op1 = _x
149#define Op2(_x) .Op2 = _x
150#define is64 .is_64 = true
151#define is32 .is_64 = false
152
153#endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
new file mode 100644
index 000000000000..685063a6d0cf
--- /dev/null
+++ b/arch/arm/kvm/coproc_a15.c
@@ -0,0 +1,162 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Authors: Rusty Russell <rusty@rustcorp.au>
4 * Christoffer Dall <c.dall@virtualopensystems.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 */
19#include <linux/kvm_host.h>
20#include <asm/cputype.h>
21#include <asm/kvm_arm.h>
22#include <asm/kvm_host.h>
23#include <asm/kvm_emulate.h>
24#include <asm/kvm_coproc.h>
25#include <linux/init.h>
26
27static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
28{
29 /*
30 * Compute guest MPIDR:
31 * (Even if we present only one VCPU to the guest on an SMP
32 * host we don't set the U bit in the MPIDR, or vice versa, as
33 * revealing the underlying hardware properties is likely to
34 * be the best choice).
35 */
36 vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
37 | (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
38}
39
40#include "coproc.h"
41
42/* A15 TRM 4.3.28: RO WI */
43static bool access_actlr(struct kvm_vcpu *vcpu,
44 const struct coproc_params *p,
45 const struct coproc_reg *r)
46{
47 if (p->is_write)
48 return ignore_write(vcpu, p);
49
50 *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
51 return true;
52}
53
54/* A15 TRM 4.3.60: R/O. */
55static bool access_cbar(struct kvm_vcpu *vcpu,
56 const struct coproc_params *p,
57 const struct coproc_reg *r)
58{
59 if (p->is_write)
60 return write_to_read_only(vcpu, p);
61 return read_zero(vcpu, p);
62}
63
64/* A15 TRM 4.3.48: R/O WI. */
65static bool access_l2ctlr(struct kvm_vcpu *vcpu,
66 const struct coproc_params *p,
67 const struct coproc_reg *r)
68{
69 if (p->is_write)
70 return ignore_write(vcpu, p);
71
72 *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
73 return true;
74}
75
76static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
77{
78 u32 l2ctlr, ncores;
79
80 asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
81 l2ctlr &= ~(3 << 24);
82 ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
83 l2ctlr |= (ncores & 3) << 24;
84
85 vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
86}
87
88static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
89{
90 u32 actlr;
91
92 /* ACTLR contains SMP bit: make sure you create all cpus first! */
93 asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
94 /* Make the SMP bit consistent with the guest configuration */
95 if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
96 actlr |= 1U << 6;
97 else
98 actlr &= ~(1U << 6);
99
100 vcpu->arch.cp15[c1_ACTLR] = actlr;
101}
102
103/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
104static bool access_l2ectlr(struct kvm_vcpu *vcpu,
105 const struct coproc_params *p,
106 const struct coproc_reg *r)
107{
108 if (p->is_write)
109 return ignore_write(vcpu, p);
110
111 *vcpu_reg(vcpu, p->Rt1) = 0;
112 return true;
113}
114
115/*
116 * A15-specific CP15 registers.
117 * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
118 */
119static const struct coproc_reg a15_regs[] = {
120 /* MPIDR: we use VMPIDR for guest access. */
121 { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
122 NULL, reset_mpidr, c0_MPIDR },
123
124 /* SCTLR: swapped by interrupt.S. */
125 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
126 NULL, reset_val, c1_SCTLR, 0x00C50078 },
127 /* ACTLR: trapped by HCR.TAC bit. */
128 { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
129 access_actlr, reset_actlr, c1_ACTLR },
130 /* CPACR: swapped by interrupt.S. */
131 { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
132 NULL, reset_val, c1_CPACR, 0x00000000 },
133
134 /*
135 * L2CTLR access (guest wants to know #CPUs).
136 */
137 { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
138 access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
139 { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
140
141 /* The Configuration Base Address Register. */
142 { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
143};
144
145static struct kvm_coproc_target_table a15_target_table = {
146 .target = KVM_ARM_TARGET_CORTEX_A15,
147 .table = a15_regs,
148 .num = ARRAY_SIZE(a15_regs),
149};
150
151static int __init coproc_a15_init(void)
152{
153 unsigned int i;
154
155 for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
156 BUG_ON(cmp_reg(&a15_regs[i-1],
157 &a15_regs[i]) >= 0);
158
159 kvm_register_target_coproc_table(&a15_target_table);
160 return 0;
161}
162late_initcall(coproc_a15_init);
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
new file mode 100644
index 000000000000..bdede9e7da51
--- /dev/null
+++ b/arch/arm/kvm/emulate.c
@@ -0,0 +1,402 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <linux/mm.h>
20#include <linux/kvm_host.h>
21#include <asm/kvm_arm.h>
22#include <asm/kvm_emulate.h>
23#include <asm/opcodes.h>
24#include <trace/events/kvm.h>
25
26#include "trace.h"
27
28#define VCPU_NR_MODES 6
29#define VCPU_REG_OFFSET_USR 0
30#define VCPU_REG_OFFSET_FIQ 1
31#define VCPU_REG_OFFSET_IRQ 2
32#define VCPU_REG_OFFSET_SVC 3
33#define VCPU_REG_OFFSET_ABT 4
34#define VCPU_REG_OFFSET_UND 5
35#define REG_OFFSET(_reg) \
36 (offsetof(struct kvm_regs, _reg) / sizeof(u32))
37
38#define USR_REG_OFFSET(_num) REG_OFFSET(usr_regs.uregs[_num])
39
40static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = {
41 /* USR/SYS Registers */
42 [VCPU_REG_OFFSET_USR] = {
43 USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
44 USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
45 USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
46 USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
47 USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14),
48 },
49
50 /* FIQ Registers */
51 [VCPU_REG_OFFSET_FIQ] = {
52 USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
53 USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
54 USR_REG_OFFSET(6), USR_REG_OFFSET(7),
55 REG_OFFSET(fiq_regs[0]), /* r8 */
56 REG_OFFSET(fiq_regs[1]), /* r9 */
57 REG_OFFSET(fiq_regs[2]), /* r10 */
58 REG_OFFSET(fiq_regs[3]), /* r11 */
59 REG_OFFSET(fiq_regs[4]), /* r12 */
60 REG_OFFSET(fiq_regs[5]), /* r13 */
61 REG_OFFSET(fiq_regs[6]), /* r14 */
62 },
63
64 /* IRQ Registers */
65 [VCPU_REG_OFFSET_IRQ] = {
66 USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
67 USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
68 USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
69 USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
70 USR_REG_OFFSET(12),
71 REG_OFFSET(irq_regs[0]), /* r13 */
72 REG_OFFSET(irq_regs[1]), /* r14 */
73 },
74
75 /* SVC Registers */
76 [VCPU_REG_OFFSET_SVC] = {
77 USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
78 USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
79 USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
80 USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
81 USR_REG_OFFSET(12),
82 REG_OFFSET(svc_regs[0]), /* r13 */
83 REG_OFFSET(svc_regs[1]), /* r14 */
84 },
85
86 /* ABT Registers */
87 [VCPU_REG_OFFSET_ABT] = {
88 USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
89 USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
90 USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
91 USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
92 USR_REG_OFFSET(12),
93 REG_OFFSET(abt_regs[0]), /* r13 */
94 REG_OFFSET(abt_regs[1]), /* r14 */
95 },
96
97 /* UND Registers */
98 [VCPU_REG_OFFSET_UND] = {
99 USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
100 USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
101 USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
102 USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
103 USR_REG_OFFSET(12),
104 REG_OFFSET(und_regs[0]), /* r13 */
105 REG_OFFSET(und_regs[1]), /* r14 */
106 },
107};
108
109/*
110 * Return a pointer to the register number valid in the current mode of
111 * the virtual CPU.
112 */
113unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
114{
115 unsigned long *reg_array = (unsigned long *)&vcpu->arch.regs;
116 unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
117
118 switch (mode) {
119 case USR_MODE...SVC_MODE:
120 mode &= ~MODE32_BIT; /* 0 ... 3 */
121 break;
122
123 case ABT_MODE:
124 mode = VCPU_REG_OFFSET_ABT;
125 break;
126
127 case UND_MODE:
128 mode = VCPU_REG_OFFSET_UND;
129 break;
130
131 case SYSTEM_MODE:
132 mode = VCPU_REG_OFFSET_USR;
133 break;
134
135 default:
136 BUG();
137 }
138
139 return reg_array + vcpu_reg_offsets[mode][reg_num];
140}
141
142/*
143 * Return the SPSR for the current mode of the virtual CPU.
144 */
145unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
146{
147 unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
148 switch (mode) {
149 case SVC_MODE:
150 return &vcpu->arch.regs.KVM_ARM_SVC_spsr;
151 case ABT_MODE:
152 return &vcpu->arch.regs.KVM_ARM_ABT_spsr;
153 case UND_MODE:
154 return &vcpu->arch.regs.KVM_ARM_UND_spsr;
155 case IRQ_MODE:
156 return &vcpu->arch.regs.KVM_ARM_IRQ_spsr;
157 case FIQ_MODE:
158 return &vcpu->arch.regs.KVM_ARM_FIQ_spsr;
159 default:
160 BUG();
161 }
162}
163
164/*
165 * A conditional instruction is allowed to trap, even though it
166 * wouldn't be executed. So let's re-implement the hardware, in
167 * software!
168 */
169bool kvm_condition_valid(struct kvm_vcpu *vcpu)
170{
171 unsigned long cpsr, cond, insn;
172
173 /*
174 * Exception Code 0 can only happen if we set HCR.TGE to 1, to
175 * catch undefined instructions, and then we won't get past
176 * the arm_exit_handlers test anyway.
177 */
178 BUG_ON(!kvm_vcpu_trap_get_class(vcpu));
179
180 /* Top two bits non-zero? Unconditional. */
181 if (kvm_vcpu_get_hsr(vcpu) >> 30)
182 return true;
183
184 cpsr = *vcpu_cpsr(vcpu);
185
186 /* Is condition field valid? */
187 if ((kvm_vcpu_get_hsr(vcpu) & HSR_CV) >> HSR_CV_SHIFT)
188 cond = (kvm_vcpu_get_hsr(vcpu) & HSR_COND) >> HSR_COND_SHIFT;
189 else {
190 /* This can happen in Thumb mode: examine IT state. */
191 unsigned long it;
192
193 it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
194
195 /* it == 0 => unconditional. */
196 if (it == 0)
197 return true;
198
199 /* The cond for this insn works out as the top 4 bits. */
200 cond = (it >> 4);
201 }
202
203 /* Shift makes it look like an ARM-mode instruction */
204 insn = cond << 28;
205 return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL;
206}
207
208/**
209 * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
210 * @vcpu: The VCPU pointer
211 *
212 * When exceptions occur while instructions are executed in Thumb IF-THEN
213 * blocks, the ITSTATE field of the CPSR is not advanved (updated), so we have
214 * to do this little bit of work manually. The fields map like this:
215 *
216 * IT[7:0] -> CPSR[26:25],CPSR[15:10]
217 */
218static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
219{
220 unsigned long itbits, cond;
221 unsigned long cpsr = *vcpu_cpsr(vcpu);
222 bool is_arm = !(cpsr & PSR_T_BIT);
223
224 BUG_ON(is_arm && (cpsr & PSR_IT_MASK));
225
226 if (!(cpsr & PSR_IT_MASK))
227 return;
228
229 cond = (cpsr & 0xe000) >> 13;
230 itbits = (cpsr & 0x1c00) >> (10 - 2);
231 itbits |= (cpsr & (0x3 << 25)) >> 25;
232
233 /* Perform ITAdvance (see page A-52 in ARM DDI 0406C) */
234 if ((itbits & 0x7) == 0)
235 itbits = cond = 0;
236 else
237 itbits = (itbits << 1) & 0x1f;
238
239 cpsr &= ~PSR_IT_MASK;
240 cpsr |= cond << 13;
241 cpsr |= (itbits & 0x1c) << (10 - 2);
242 cpsr |= (itbits & 0x3) << 25;
243 *vcpu_cpsr(vcpu) = cpsr;
244}
245
246/**
247 * kvm_skip_instr - skip a trapped instruction and proceed to the next
248 * @vcpu: The vcpu pointer
249 */
250void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
251{
252 bool is_thumb;
253
254 is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_T_BIT);
255 if (is_thumb && !is_wide_instr)
256 *vcpu_pc(vcpu) += 2;
257 else
258 *vcpu_pc(vcpu) += 4;
259 kvm_adjust_itstate(vcpu);
260}
261
262
263/******************************************************************************
264 * Inject exceptions into the guest
265 */
266
267static u32 exc_vector_base(struct kvm_vcpu *vcpu)
268{
269 u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
270 u32 vbar = vcpu->arch.cp15[c12_VBAR];
271
272 if (sctlr & SCTLR_V)
273 return 0xffff0000;
274 else /* always have security exceptions */
275 return vbar;
276}
277
278/**
279 * kvm_inject_undefined - inject an undefined exception into the guest
280 * @vcpu: The VCPU to receive the undefined exception
281 *
282 * It is assumed that this code is called from the VCPU thread and that the
283 * VCPU therefore is not currently executing guest code.
284 *
285 * Modelled after TakeUndefInstrException() pseudocode.
286 */
287void kvm_inject_undefined(struct kvm_vcpu *vcpu)
288{
289 unsigned long new_lr_value;
290 unsigned long new_spsr_value;
291 unsigned long cpsr = *vcpu_cpsr(vcpu);
292 u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
293 bool is_thumb = (cpsr & PSR_T_BIT);
294 u32 vect_offset = 4;
295 u32 return_offset = (is_thumb) ? 2 : 4;
296
297 new_spsr_value = cpsr;
298 new_lr_value = *vcpu_pc(vcpu) - return_offset;
299
300 *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | UND_MODE;
301 *vcpu_cpsr(vcpu) |= PSR_I_BIT;
302 *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
303
304 if (sctlr & SCTLR_TE)
305 *vcpu_cpsr(vcpu) |= PSR_T_BIT;
306 if (sctlr & SCTLR_EE)
307 *vcpu_cpsr(vcpu) |= PSR_E_BIT;
308
309 /* Note: These now point to UND banked copies */
310 *vcpu_spsr(vcpu) = cpsr;
311 *vcpu_reg(vcpu, 14) = new_lr_value;
312
313 /* Branch to exception vector */
314 *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
315}
316
317/*
318 * Modelled after TakeDataAbortException() and TakePrefetchAbortException
319 * pseudocode.
320 */
321static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
322{
323 unsigned long new_lr_value;
324 unsigned long new_spsr_value;
325 unsigned long cpsr = *vcpu_cpsr(vcpu);
326 u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
327 bool is_thumb = (cpsr & PSR_T_BIT);
328 u32 vect_offset;
329 u32 return_offset = (is_thumb) ? 4 : 0;
330 bool is_lpae;
331
332 new_spsr_value = cpsr;
333 new_lr_value = *vcpu_pc(vcpu) + return_offset;
334
335 *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | ABT_MODE;
336 *vcpu_cpsr(vcpu) |= PSR_I_BIT | PSR_A_BIT;
337 *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
338
339 if (sctlr & SCTLR_TE)
340 *vcpu_cpsr(vcpu) |= PSR_T_BIT;
341 if (sctlr & SCTLR_EE)
342 *vcpu_cpsr(vcpu) |= PSR_E_BIT;
343
344 /* Note: These now point to ABT banked copies */
345 *vcpu_spsr(vcpu) = cpsr;
346 *vcpu_reg(vcpu, 14) = new_lr_value;
347
348 if (is_pabt)
349 vect_offset = 12;
350 else
351 vect_offset = 16;
352
353 /* Branch to exception vector */
354 *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
355
356 if (is_pabt) {
357 /* Set DFAR and DFSR */
358 vcpu->arch.cp15[c6_IFAR] = addr;
359 is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
360 /* Always give debug fault for now - should give guest a clue */
361 if (is_lpae)
362 vcpu->arch.cp15[c5_IFSR] = 1 << 9 | 0x22;
363 else
364 vcpu->arch.cp15[c5_IFSR] = 2;
365 } else { /* !iabt */
366 /* Set DFAR and DFSR */
367 vcpu->arch.cp15[c6_DFAR] = addr;
368 is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
369 /* Always give debug fault for now - should give guest a clue */
370 if (is_lpae)
371 vcpu->arch.cp15[c5_DFSR] = 1 << 9 | 0x22;
372 else
373 vcpu->arch.cp15[c5_DFSR] = 2;
374 }
375
376}
377
378/**
379 * kvm_inject_dabt - inject a data abort into the guest
380 * @vcpu: The VCPU to receive the undefined exception
381 * @addr: The address to report in the DFAR
382 *
383 * It is assumed that this code is called from the VCPU thread and that the
384 * VCPU therefore is not currently executing guest code.
385 */
386void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
387{
388 inject_abt(vcpu, false, addr);
389}
390
391/**
392 * kvm_inject_pabt - inject a prefetch abort into the guest
393 * @vcpu: The VCPU to receive the undefined exception
394 * @addr: The address to report in the DFAR
395 *
396 * It is assumed that this code is called from the VCPU thread and that the
397 * VCPU therefore is not currently executing guest code.
398 */
399void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
400{
401 inject_abt(vcpu, true, addr);
402}
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
new file mode 100644
index 000000000000..152d03612181
--- /dev/null
+++ b/arch/arm/kvm/guest.c
@@ -0,0 +1,239 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <linux/errno.h>
20#include <linux/err.h>
21#include <linux/kvm_host.h>
22#include <linux/module.h>
23#include <linux/vmalloc.h>
24#include <linux/fs.h>
25#include <asm/cputype.h>
26#include <asm/uaccess.h>
27#include <asm/kvm.h>
28#include <asm/kvm_asm.h>
29#include <asm/kvm_emulate.h>
30#include <asm/kvm_coproc.h>
31
32#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
33#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
34
35struct kvm_stats_debugfs_item debugfs_entries[] = {
36 { NULL }
37};
38
39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
40{
41 return 0;
42}
43
44static u64 core_reg_offset_from_id(u64 id)
45{
46 return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
47}
48
49static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
50{
51 u32 __user *uaddr = (u32 __user *)(long)reg->addr;
52 struct kvm_regs *regs = &vcpu->arch.regs;
53 u64 off;
54
55 if (KVM_REG_SIZE(reg->id) != 4)
56 return -ENOENT;
57
58 /* Our ID is an index into the kvm_regs struct. */
59 off = core_reg_offset_from_id(reg->id);
60 if (off >= sizeof(*regs) / KVM_REG_SIZE(reg->id))
61 return -ENOENT;
62
63 return put_user(((u32 *)regs)[off], uaddr);
64}
65
66static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
67{
68 u32 __user *uaddr = (u32 __user *)(long)reg->addr;
69 struct kvm_regs *regs = &vcpu->arch.regs;
70 u64 off, val;
71
72 if (KVM_REG_SIZE(reg->id) != 4)
73 return -ENOENT;
74
75 /* Our ID is an index into the kvm_regs struct. */
76 off = core_reg_offset_from_id(reg->id);
77 if (off >= sizeof(*regs) / KVM_REG_SIZE(reg->id))
78 return -ENOENT;
79
80 if (get_user(val, uaddr) != 0)
81 return -EFAULT;
82
83 if (off == KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr)) {
84 unsigned long mode = val & MODE_MASK;
85 switch (mode) {
86 case USR_MODE:
87 case FIQ_MODE:
88 case IRQ_MODE:
89 case SVC_MODE:
90 case ABT_MODE:
91 case UND_MODE:
92 break;
93 default:
94 return -EINVAL;
95 }
96 }
97
98 ((u32 *)regs)[off] = val;
99 return 0;
100}
101
102int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
103{
104 return -EINVAL;
105}
106
107int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
108{
109 return -EINVAL;
110}
111
112static unsigned long num_core_regs(void)
113{
114 return sizeof(struct kvm_regs) / sizeof(u32);
115}
116
117/**
118 * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
119 *
120 * This is for all registers.
121 */
122unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
123{
124 return num_core_regs() + kvm_arm_num_coproc_regs(vcpu);
125}
126
127/**
128 * kvm_arm_copy_reg_indices - get indices of all registers.
129 *
130 * We do core registers right here, then we apppend coproc regs.
131 */
132int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
133{
134 unsigned int i;
135 const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
136
137 for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
138 if (put_user(core_reg | i, uindices))
139 return -EFAULT;
140 uindices++;
141 }
142
143 return kvm_arm_copy_coproc_indices(vcpu, uindices);
144}
145
146int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
147{
148 /* We currently use nothing arch-specific in upper 32 bits */
149 if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM >> 32)
150 return -EINVAL;
151
152 /* Register group 16 means we want a core register. */
153 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
154 return get_core_reg(vcpu, reg);
155
156 return kvm_arm_coproc_get_reg(vcpu, reg);
157}
158
159int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
160{
161 /* We currently use nothing arch-specific in upper 32 bits */
162 if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM >> 32)
163 return -EINVAL;
164
165 /* Register group 16 means we set a core register. */
166 if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
167 return set_core_reg(vcpu, reg);
168
169 return kvm_arm_coproc_set_reg(vcpu, reg);
170}
171
172int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
173 struct kvm_sregs *sregs)
174{
175 return -EINVAL;
176}
177
178int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
179 struct kvm_sregs *sregs)
180{
181 return -EINVAL;
182}
183
184int __attribute_const__ kvm_target_cpu(void)
185{
186 unsigned long implementor = read_cpuid_implementor();
187 unsigned long part_number = read_cpuid_part_number();
188
189 if (implementor != ARM_CPU_IMP_ARM)
190 return -EINVAL;
191
192 switch (part_number) {
193 case ARM_CPU_PART_CORTEX_A15:
194 return KVM_ARM_TARGET_CORTEX_A15;
195 default:
196 return -EINVAL;
197 }
198}
199
200int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
201 const struct kvm_vcpu_init *init)
202{
203 unsigned int i;
204
205 /* We can only do a cortex A15 for now. */
206 if (init->target != kvm_target_cpu())
207 return -EINVAL;
208
209 vcpu->arch.target = init->target;
210 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
211
212 /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
213 for (i = 0; i < sizeof(init->features) * 8; i++) {
214 if (test_bit(i, (void *)init->features)) {
215 if (i >= KVM_VCPU_MAX_FEATURES)
216 return -ENOENT;
217 set_bit(i, vcpu->arch.features);
218 }
219 }
220
221 /* Now we know what it is, we can reset it. */
222 return kvm_reset_vcpu(vcpu);
223}
224
225int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
226{
227 return -EINVAL;
228}
229
230int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
231{
232 return -EINVAL;
233}
234
235int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
236 struct kvm_translation *tr)
237{
238 return -EINVAL;
239}
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
new file mode 100644
index 000000000000..26ad17310a1e
--- /dev/null
+++ b/arch/arm/kvm/handle_exit.c
@@ -0,0 +1,164 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <linux/kvm.h>
20#include <linux/kvm_host.h>
21#include <asm/kvm_emulate.h>
22#include <asm/kvm_coproc.h>
23#include <asm/kvm_mmu.h>
24#include <asm/kvm_psci.h>
25#include <trace/events/kvm.h>
26
27#include "trace.h"
28
29#include "trace.h"
30
31typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
32
33static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
34{
35 /* SVC called from Hyp mode should never get here */
36 kvm_debug("SVC called from Hyp mode shouldn't go here\n");
37 BUG();
38 return -EINVAL; /* Squash warning */
39}
40
41static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
42{
43 trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
44 kvm_vcpu_hvc_get_imm(vcpu));
45
46 if (kvm_psci_call(vcpu))
47 return 1;
48
49 kvm_inject_undefined(vcpu);
50 return 1;
51}
52
53static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
54{
55 if (kvm_psci_call(vcpu))
56 return 1;
57
58 kvm_inject_undefined(vcpu);
59 return 1;
60}
61
62static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
63{
64 /* The hypervisor should never cause aborts */
65 kvm_err("Prefetch Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n",
66 kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu));
67 return -EFAULT;
68}
69
70static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
71{
72 /* This is either an error in the ws. code or an external abort */
73 kvm_err("Data Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n",
74 kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu));
75 return -EFAULT;
76}
77
78/**
79 * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
80 * @vcpu: the vcpu pointer
81 * @run: the kvm_run structure pointer
82 *
83 * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
84 * halt execution of world-switches and schedule other host processes until
85 * there is an incoming IRQ or FIQ to the VM.
86 */
87static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
88{
89 trace_kvm_wfi(*vcpu_pc(vcpu));
90 kvm_vcpu_block(vcpu);
91 return 1;
92}
93
94static exit_handle_fn arm_exit_handlers[] = {
95 [HSR_EC_WFI] = kvm_handle_wfi,
96 [HSR_EC_CP15_32] = kvm_handle_cp15_32,
97 [HSR_EC_CP15_64] = kvm_handle_cp15_64,
98 [HSR_EC_CP14_MR] = kvm_handle_cp14_access,
99 [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store,
100 [HSR_EC_CP14_64] = kvm_handle_cp14_access,
101 [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access,
102 [HSR_EC_CP10_ID] = kvm_handle_cp10_id,
103 [HSR_EC_SVC_HYP] = handle_svc_hyp,
104 [HSR_EC_HVC] = handle_hvc,
105 [HSR_EC_SMC] = handle_smc,
106 [HSR_EC_IABT] = kvm_handle_guest_abort,
107 [HSR_EC_IABT_HYP] = handle_pabt_hyp,
108 [HSR_EC_DABT] = kvm_handle_guest_abort,
109 [HSR_EC_DABT_HYP] = handle_dabt_hyp,
110};
111
112static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
113{
114 u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
115
116 if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
117 !arm_exit_handlers[hsr_ec]) {
118 kvm_err("Unkown exception class: hsr: %#08x\n",
119 (unsigned int)kvm_vcpu_get_hsr(vcpu));
120 BUG();
121 }
122
123 return arm_exit_handlers[hsr_ec];
124}
125
126/*
127 * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
128 * proper exit to userspace.
129 */
130int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
131 int exception_index)
132{
133 exit_handle_fn exit_handler;
134
135 switch (exception_index) {
136 case ARM_EXCEPTION_IRQ:
137 return 1;
138 case ARM_EXCEPTION_UNDEFINED:
139 kvm_err("Undefined exception in Hyp mode at: %#08lx\n",
140 kvm_vcpu_get_hyp_pc(vcpu));
141 BUG();
142 panic("KVM: Hypervisor undefined exception!\n");
143 case ARM_EXCEPTION_DATA_ABORT:
144 case ARM_EXCEPTION_PREF_ABORT:
145 case ARM_EXCEPTION_HVC:
146 /*
147 * See ARM ARM B1.14.1: "Hyp traps on instructions
148 * that fail their condition code check"
149 */
150 if (!kvm_condition_valid(vcpu)) {
151 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
152 return 1;
153 }
154
155 exit_handler = kvm_get_exit_handler(vcpu);
156
157 return exit_handler(vcpu, run);
158 default:
159 kvm_pr_unimpl("Unsupported exception type: %d",
160 exception_index);
161 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
162 return 0;
163 }
164}
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
new file mode 100644
index 000000000000..9f37a79b880b
--- /dev/null
+++ b/arch/arm/kvm/init.S
@@ -0,0 +1,114 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <linux/linkage.h>
20#include <asm/unified.h>
21#include <asm/asm-offsets.h>
22#include <asm/kvm_asm.h>
23#include <asm/kvm_arm.h>
24
25/********************************************************************
26 * Hypervisor initialization
27 * - should be called with:
28 * r0,r1 = Hypervisor pgd pointer
29 * r2 = top of Hyp stack (kernel VA)
30 * r3 = pointer to hyp vectors
31 */
32
33 .text
34 .pushsection .hyp.idmap.text,"ax"
35 .align 5
36__kvm_hyp_init:
37 .globl __kvm_hyp_init
38
39 @ Hyp-mode exception vector
40 W(b) .
41 W(b) .
42 W(b) .
43 W(b) .
44 W(b) .
45 W(b) __do_hyp_init
46 W(b) .
47 W(b) .
48
49__do_hyp_init:
50 @ Set the HTTBR to point to the hypervisor PGD pointer passed
51 mcrr p15, 4, r0, r1, c2
52
53 @ Set the HTCR and VTCR to the same shareability and cacheability
54 @ settings as the non-secure TTBCR and with T0SZ == 0.
55 mrc p15, 4, r0, c2, c0, 2 @ HTCR
56 ldr r12, =HTCR_MASK
57 bic r0, r0, r12
58 mrc p15, 0, r1, c2, c0, 2 @ TTBCR
59 and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
60 orr r0, r0, r1
61 mcr p15, 4, r0, c2, c0, 2 @ HTCR
62
63 mrc p15, 4, r1, c2, c1, 2 @ VTCR
64 ldr r12, =VTCR_MASK
65 bic r1, r1, r12
66 bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits
67 orr r1, r0, r1
68 orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S)
69 mcr p15, 4, r1, c2, c1, 2 @ VTCR
70
71 @ Use the same memory attributes for hyp. accesses as the kernel
72 @ (copy MAIRx ro HMAIRx).
73 mrc p15, 0, r0, c10, c2, 0
74 mcr p15, 4, r0, c10, c2, 0
75 mrc p15, 0, r0, c10, c2, 1
76 mcr p15, 4, r0, c10, c2, 1
77
78 @ Set the HSCTLR to:
79 @ - ARM/THUMB exceptions: Kernel config (Thumb-2 kernel)
80 @ - Endianness: Kernel config
81 @ - Fast Interrupt Features: Kernel config
82 @ - Write permission implies XN: disabled
83 @ - Instruction cache: enabled
84 @ - Data/Unified cache: enabled
85 @ - Memory alignment checks: enabled
86 @ - MMU: enabled (this code must be run from an identity mapping)
87 mrc p15, 4, r0, c1, c0, 0 @ HSCR
88 ldr r12, =HSCTLR_MASK
89 bic r0, r0, r12
90 mrc p15, 0, r1, c1, c0, 0 @ SCTLR
91 ldr r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
92 and r1, r1, r12
93 ARM( ldr r12, =(HSCTLR_M | HSCTLR_A) )
94 THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) )
95 orr r1, r1, r12
96 orr r0, r0, r1
97 isb
98 mcr p15, 4, r0, c1, c0, 0 @ HSCR
99 isb
100
101 @ Set stack pointer and return to the kernel
102 mov sp, r2
103
104 @ Set HVBAR to point to the HYP vectors
105 mcr p15, 4, r3, c12, c0, 0 @ HVBAR
106
107 eret
108
109 .ltorg
110
111 .globl __kvm_hyp_init_end
112__kvm_hyp_init_end:
113
114 .popsection
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
new file mode 100644
index 000000000000..f7793df62f58
--- /dev/null
+++ b/arch/arm/kvm/interrupts.S
@@ -0,0 +1,487 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <linux/linkage.h>
20#include <linux/const.h>
21#include <asm/unified.h>
22#include <asm/page.h>
23#include <asm/ptrace.h>
24#include <asm/asm-offsets.h>
25#include <asm/kvm_asm.h>
26#include <asm/kvm_arm.h>
27#include <asm/vfpmacros.h>
28#include "interrupts_head.S"
29
30 .text
31
32__kvm_hyp_code_start:
33 .globl __kvm_hyp_code_start
34
35/********************************************************************
36 * Flush per-VMID TLBs
37 *
38 * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
39 *
40 * We rely on the hardware to broadcast the TLB invalidation to all CPUs
41 * inside the inner-shareable domain (which is the case for all v7
42 * implementations). If we come across a non-IS SMP implementation, we'll
43 * have to use an IPI based mechanism. Until then, we stick to the simple
44 * hardware assisted version.
45 *
46 * As v7 does not support flushing per IPA, just nuke the whole TLB
47 * instead, ignoring the ipa value.
48 */
49ENTRY(__kvm_tlb_flush_vmid_ipa)
50 push {r2, r3}
51
52 add r0, r0, #KVM_VTTBR
53 ldrd r2, r3, [r0]
54 mcrr p15, 6, r2, r3, c2 @ Write VTTBR
55 isb
56 mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored)
57 dsb
58 isb
59 mov r2, #0
60 mov r3, #0
61 mcrr p15, 6, r2, r3, c2 @ Back to VMID #0
62 isb @ Not necessary if followed by eret
63
64 pop {r2, r3}
65 bx lr
66ENDPROC(__kvm_tlb_flush_vmid_ipa)
67
68/********************************************************************
69 * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
70 * domain, for all VMIDs
71 *
72 * void __kvm_flush_vm_context(void);
73 */
74ENTRY(__kvm_flush_vm_context)
75 mov r0, #0 @ rn parameter for c15 flushes is SBZ
76
77 /* Invalidate NS Non-Hyp TLB Inner Shareable (TLBIALLNSNHIS) */
78 mcr p15, 4, r0, c8, c3, 4
79 /* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
80 mcr p15, 0, r0, c7, c1, 0
81 dsb
82 isb @ Not necessary if followed by eret
83
84 bx lr
85ENDPROC(__kvm_flush_vm_context)
86
87
88/********************************************************************
89 * Hypervisor world-switch code
90 *
91 *
92 * int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
93 */
94ENTRY(__kvm_vcpu_run)
95 @ Save the vcpu pointer
96 mcr p15, 4, vcpu, c13, c0, 2 @ HTPIDR
97
98 save_host_regs
99
100 restore_vgic_state
101 restore_timer_state
102
103 @ Store hardware CP15 state and load guest state
104 read_cp15_state store_to_vcpu = 0
105 write_cp15_state read_from_vcpu = 1
106
107 @ If the host kernel has not been configured with VFPv3 support,
108 @ then it is safer if we deny guests from using it as well.
109#ifdef CONFIG_VFPv3
110 @ Set FPEXC_EN so the guest doesn't trap floating point instructions
111 VFPFMRX r2, FPEXC @ VMRS
112 push {r2}
113 orr r2, r2, #FPEXC_EN
114 VFPFMXR FPEXC, r2 @ VMSR
115#endif
116
117 @ Configure Hyp-role
118 configure_hyp_role vmentry
119
120 @ Trap coprocessor CRx accesses
121 set_hstr vmentry
122 set_hcptr vmentry, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
123 set_hdcr vmentry
124
125 @ Write configured ID register into MIDR alias
126 ldr r1, [vcpu, #VCPU_MIDR]
127 mcr p15, 4, r1, c0, c0, 0
128
129 @ Write guest view of MPIDR into VMPIDR
130 ldr r1, [vcpu, #CP15_OFFSET(c0_MPIDR)]
131 mcr p15, 4, r1, c0, c0, 5
132
133 @ Set up guest memory translation
134 ldr r1, [vcpu, #VCPU_KVM]
135 add r1, r1, #KVM_VTTBR
136 ldrd r2, r3, [r1]
137 mcrr p15, 6, r2, r3, c2 @ Write VTTBR
138
139 @ We're all done, just restore the GPRs and go to the guest
140 restore_guest_regs
141 clrex @ Clear exclusive monitor
142 eret
143
144__kvm_vcpu_return:
145 /*
146 * return convention:
147 * guest r0, r1, r2 saved on the stack
148 * r0: vcpu pointer
149 * r1: exception code
150 */
151 save_guest_regs
152
153 @ Set VMID == 0
154 mov r2, #0
155 mov r3, #0
156 mcrr p15, 6, r2, r3, c2 @ Write VTTBR
157
158 @ Don't trap coprocessor accesses for host kernel
159 set_hstr vmexit
160 set_hdcr vmexit
161 set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
162
163#ifdef CONFIG_VFPv3
164 @ Save floating point registers we if let guest use them.
165 tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
166 bne after_vfp_restore
167
168 @ Switch VFP/NEON hardware state to the host's
169 add r7, vcpu, #VCPU_VFP_GUEST
170 store_vfp_state r7
171 add r7, vcpu, #VCPU_VFP_HOST
172 ldr r7, [r7]
173 restore_vfp_state r7
174
175after_vfp_restore:
176 @ Restore FPEXC_EN which we clobbered on entry
177 pop {r2}
178 VFPFMXR FPEXC, r2
179#endif
180
181 @ Reset Hyp-role
182 configure_hyp_role vmexit
183
184 @ Let host read hardware MIDR
185 mrc p15, 0, r2, c0, c0, 0
186 mcr p15, 4, r2, c0, c0, 0
187
188 @ Back to hardware MPIDR
189 mrc p15, 0, r2, c0, c0, 5
190 mcr p15, 4, r2, c0, c0, 5
191
192 @ Store guest CP15 state and restore host state
193 read_cp15_state store_to_vcpu = 1
194 write_cp15_state read_from_vcpu = 0
195
196 save_timer_state
197 save_vgic_state
198
199 restore_host_regs
200 clrex @ Clear exclusive monitor
201 mov r0, r1 @ Return the return code
202 mov r1, #0 @ Clear upper bits in return value
203 bx lr @ return to IOCTL
204
205/********************************************************************
206 * Call function in Hyp mode
207 *
208 *
209 * u64 kvm_call_hyp(void *hypfn, ...);
210 *
211 * This is not really a variadic function in the classic C-way and care must
212 * be taken when calling this to ensure parameters are passed in registers
213 * only, since the stack will change between the caller and the callee.
214 *
215 * Call the function with the first argument containing a pointer to the
216 * function you wish to call in Hyp mode, and subsequent arguments will be
217 * passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the
218 * function pointer can be passed). The function being called must be mapped
219 * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are
220 * passed in r0 and r1.
221 *
222 * The calling convention follows the standard AAPCS:
223 * r0 - r3: caller save
224 * r12: caller save
225 * rest: callee save
226 */
227ENTRY(kvm_call_hyp)
228 hvc #0
229 bx lr
230
231/********************************************************************
232 * Hypervisor exception vector and handlers
233 *
234 *
235 * The KVM/ARM Hypervisor ABI is defined as follows:
236 *
237 * Entry to Hyp mode from the host kernel will happen _only_ when an HVC
238 * instruction is issued since all traps are disabled when running the host
239 * kernel as per the Hyp-mode initialization at boot time.
240 *
241 * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
242 * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
243 * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
244 * instructions are called from within Hyp-mode.
245 *
246 * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
247 * Switching to Hyp mode is done through a simple HVC #0 instruction. The
248 * exception vector code will check that the HVC comes from VMID==0 and if
249 * so will push the necessary state (SPSR, lr_usr) on the Hyp stack.
250 * - r0 contains a pointer to a HYP function
251 * - r1, r2, and r3 contain arguments to the above function.
252 * - The HYP function will be called with its arguments in r0, r1 and r2.
253 * On HYP function return, we return directly to SVC.
254 *
255 * Note that the above is used to execute code in Hyp-mode from a host-kernel
256 * point of view, and is a different concept from performing a world-switch and
257 * executing guest code SVC mode (with a VMID != 0).
258 */
259
260/* Handle undef, svc, pabt, or dabt by crashing with a user notice */
261.macro bad_exception exception_code, panic_str
262 push {r0-r2}
263 mrrc p15, 6, r0, r1, c2 @ Read VTTBR
264 lsr r1, r1, #16
265 ands r1, r1, #0xff
266 beq 99f
267
268 load_vcpu @ Load VCPU pointer
269 .if \exception_code == ARM_EXCEPTION_DATA_ABORT
270 mrc p15, 4, r2, c5, c2, 0 @ HSR
271 mrc p15, 4, r1, c6, c0, 0 @ HDFAR
272 str r2, [vcpu, #VCPU_HSR]
273 str r1, [vcpu, #VCPU_HxFAR]
274 .endif
275 .if \exception_code == ARM_EXCEPTION_PREF_ABORT
276 mrc p15, 4, r2, c5, c2, 0 @ HSR
277 mrc p15, 4, r1, c6, c0, 2 @ HIFAR
278 str r2, [vcpu, #VCPU_HSR]
279 str r1, [vcpu, #VCPU_HxFAR]
280 .endif
281 mov r1, #\exception_code
282 b __kvm_vcpu_return
283
284 @ We were in the host already. Let's craft a panic-ing return to SVC.
28599: mrs r2, cpsr
286 bic r2, r2, #MODE_MASK
287 orr r2, r2, #SVC_MODE
288THUMB( orr r2, r2, #PSR_T_BIT )
289 msr spsr_cxsf, r2
290 mrs r1, ELR_hyp
291 ldr r2, =BSYM(panic)
292 msr ELR_hyp, r2
293 ldr r0, =\panic_str
294 eret
295.endm
296
297 .text
298
299 .align 5
300__kvm_hyp_vector:
301 .globl __kvm_hyp_vector
302
303 @ Hyp-mode exception vector
304 W(b) hyp_reset
305 W(b) hyp_undef
306 W(b) hyp_svc
307 W(b) hyp_pabt
308 W(b) hyp_dabt
309 W(b) hyp_hvc
310 W(b) hyp_irq
311 W(b) hyp_fiq
312
313 .align
314hyp_reset:
315 b hyp_reset
316
317 .align
318hyp_undef:
319 bad_exception ARM_EXCEPTION_UNDEFINED, und_die_str
320
321 .align
322hyp_svc:
323 bad_exception ARM_EXCEPTION_HVC, svc_die_str
324
325 .align
326hyp_pabt:
327 bad_exception ARM_EXCEPTION_PREF_ABORT, pabt_die_str
328
329 .align
330hyp_dabt:
331 bad_exception ARM_EXCEPTION_DATA_ABORT, dabt_die_str
332
333 .align
334hyp_hvc:
335 /*
336 * Getting here is either becuase of a trap from a guest or from calling
337 * HVC from the host kernel, which means "switch to Hyp mode".
338 */
339 push {r0, r1, r2}
340
341 @ Check syndrome register
342 mrc p15, 4, r1, c5, c2, 0 @ HSR
343 lsr r0, r1, #HSR_EC_SHIFT
344#ifdef CONFIG_VFPv3
345 cmp r0, #HSR_EC_CP_0_13
346 beq switch_to_guest_vfp
347#endif
348 cmp r0, #HSR_EC_HVC
349 bne guest_trap @ Not HVC instr.
350
351 /*
352 * Let's check if the HVC came from VMID 0 and allow simple
353 * switch to Hyp mode
354 */
355 mrrc p15, 6, r0, r2, c2
356 lsr r2, r2, #16
357 and r2, r2, #0xff
358 cmp r2, #0
359 bne guest_trap @ Guest called HVC
360
361host_switch_to_hyp:
362 pop {r0, r1, r2}
363
364 push {lr}
365 mrs lr, SPSR
366 push {lr}
367
368 mov lr, r0
369 mov r0, r1
370 mov r1, r2
371 mov r2, r3
372
373THUMB( orr lr, #1)
374 blx lr @ Call the HYP function
375
376 pop {lr}
377 msr SPSR_csxf, lr
378 pop {lr}
379 eret
380
381guest_trap:
382 load_vcpu @ Load VCPU pointer to r0
383 str r1, [vcpu, #VCPU_HSR]
384
385 @ Check if we need the fault information
386 lsr r1, r1, #HSR_EC_SHIFT
387 cmp r1, #HSR_EC_IABT
388 mrceq p15, 4, r2, c6, c0, 2 @ HIFAR
389 beq 2f
390 cmp r1, #HSR_EC_DABT
391 bne 1f
392 mrc p15, 4, r2, c6, c0, 0 @ HDFAR
393
3942: str r2, [vcpu, #VCPU_HxFAR]
395
396 /*
397 * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode:
398 *
399 * Abort on the stage 2 translation for a memory access from a
400 * Non-secure PL1 or PL0 mode:
401 *
402 * For any Access flag fault or Translation fault, and also for any
403 * Permission fault on the stage 2 translation of a memory access
404 * made as part of a translation table walk for a stage 1 translation,
405 * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR
406 * is UNKNOWN.
407 */
408
409 /* Check for permission fault, and S1PTW */
410 mrc p15, 4, r1, c5, c2, 0 @ HSR
411 and r0, r1, #HSR_FSC_TYPE
412 cmp r0, #FSC_PERM
413 tsteq r1, #(1 << 7) @ S1PTW
414 mrcne p15, 4, r2, c6, c0, 4 @ HPFAR
415 bne 3f
416
417 /* Resolve IPA using the xFAR */
418 mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR
419 isb
420 mrrc p15, 0, r0, r1, c7 @ PAR
421 tst r0, #1
422 bne 4f @ Failed translation
423 ubfx r2, r0, #12, #20
424 lsl r2, r2, #4
425 orr r2, r2, r1, lsl #24
426
4273: load_vcpu @ Load VCPU pointer to r0
428 str r2, [r0, #VCPU_HPFAR]
429
4301: mov r1, #ARM_EXCEPTION_HVC
431 b __kvm_vcpu_return
432
4334: pop {r0, r1, r2} @ Failed translation, return to guest
434 eret
435
436/*
437 * If VFPv3 support is not available, then we will not switch the VFP
438 * registers; however cp10 and cp11 accesses will still trap and fallback
439 * to the regular coprocessor emulation code, which currently will
440 * inject an undefined exception to the guest.
441 */
442#ifdef CONFIG_VFPv3
443switch_to_guest_vfp:
444 load_vcpu @ Load VCPU pointer to r0
445 push {r3-r7}
446
447 @ NEON/VFP used. Turn on VFP access.
448 set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11))
449
450 @ Switch VFP/NEON hardware state to the guest's
451 add r7, r0, #VCPU_VFP_HOST
452 ldr r7, [r7]
453 store_vfp_state r7
454 add r7, r0, #VCPU_VFP_GUEST
455 restore_vfp_state r7
456
457 pop {r3-r7}
458 pop {r0-r2}
459 eret
460#endif
461
462 .align
463hyp_irq:
464 push {r0, r1, r2}
465 mov r1, #ARM_EXCEPTION_IRQ
466 load_vcpu @ Load VCPU pointer to r0
467 b __kvm_vcpu_return
468
469 .align
470hyp_fiq:
471 b hyp_fiq
472
473 .ltorg
474
475__kvm_hyp_code_end:
476 .globl __kvm_hyp_code_end
477
478 .section ".rodata"
479
480und_die_str:
481 .ascii "unexpected undefined exception in Hyp mode at: %#08x"
482pabt_die_str:
483 .ascii "unexpected prefetch abort in Hyp mode at: %#08x"
484dabt_die_str:
485 .ascii "unexpected data abort in Hyp mode at: %#08x"
486svc_die_str:
487 .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x"
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
new file mode 100644
index 000000000000..3c8f2f0b4c5e
--- /dev/null
+++ b/arch/arm/kvm/interrupts_head.S
@@ -0,0 +1,605 @@
1#include <linux/irqchip/arm-gic.h>
2
3#define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4))
4#define VCPU_USR_SP (VCPU_USR_REG(13))
5#define VCPU_USR_LR (VCPU_USR_REG(14))
6#define CP15_OFFSET(_cp15_reg_idx) (VCPU_CP15 + (_cp15_reg_idx * 4))
7
8/*
9 * Many of these macros need to access the VCPU structure, which is always
10 * held in r0. These macros should never clobber r1, as it is used to hold the
11 * exception code on the return path (except of course the macro that switches
12 * all the registers before the final jump to the VM).
13 */
14vcpu .req r0 @ vcpu pointer always in r0
15
16/* Clobbers {r2-r6} */
17.macro store_vfp_state vfp_base
18 @ The VFPFMRX and VFPFMXR macros are the VMRS and VMSR instructions
19 VFPFMRX r2, FPEXC
20 @ Make sure VFP is enabled so we can touch the registers.
21 orr r6, r2, #FPEXC_EN
22 VFPFMXR FPEXC, r6
23
24 VFPFMRX r3, FPSCR
25 tst r2, #FPEXC_EX @ Check for VFP Subarchitecture
26 beq 1f
27 @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so
28 @ we only need to save them if FPEXC_EX is set.
29 VFPFMRX r4, FPINST
30 tst r2, #FPEXC_FP2V
31 VFPFMRX r5, FPINST2, ne @ vmrsne
32 bic r6, r2, #FPEXC_EX @ FPEXC_EX disable
33 VFPFMXR FPEXC, r6
341:
35 VFPFSTMIA \vfp_base, r6 @ Save VFP registers
36 stm \vfp_base, {r2-r5} @ Save FPEXC, FPSCR, FPINST, FPINST2
37.endm
38
39/* Assume FPEXC_EN is on and FPEXC_EX is off, clobbers {r2-r6} */
40.macro restore_vfp_state vfp_base
41 VFPFLDMIA \vfp_base, r6 @ Load VFP registers
42 ldm \vfp_base, {r2-r5} @ Load FPEXC, FPSCR, FPINST, FPINST2
43
44 VFPFMXR FPSCR, r3
45 tst r2, #FPEXC_EX @ Check for VFP Subarchitecture
46 beq 1f
47 VFPFMXR FPINST, r4
48 tst r2, #FPEXC_FP2V
49 VFPFMXR FPINST2, r5, ne
501:
51 VFPFMXR FPEXC, r2 @ FPEXC (last, in case !EN)
52.endm
53
54/* These are simply for the macros to work - value don't have meaning */
55.equ usr, 0
56.equ svc, 1
57.equ abt, 2
58.equ und, 3
59.equ irq, 4
60.equ fiq, 5
61
62.macro push_host_regs_mode mode
63 mrs r2, SP_\mode
64 mrs r3, LR_\mode
65 mrs r4, SPSR_\mode
66 push {r2, r3, r4}
67.endm
68
69/*
70 * Store all host persistent registers on the stack.
71 * Clobbers all registers, in all modes, except r0 and r1.
72 */
73.macro save_host_regs
74 /* Hyp regs. Only ELR_hyp (SPSR_hyp already saved) */
75 mrs r2, ELR_hyp
76 push {r2}
77
78 /* usr regs */
79 push {r4-r12} @ r0-r3 are always clobbered
80 mrs r2, SP_usr
81 mov r3, lr
82 push {r2, r3}
83
84 push_host_regs_mode svc
85 push_host_regs_mode abt
86 push_host_regs_mode und
87 push_host_regs_mode irq
88
89 /* fiq regs */
90 mrs r2, r8_fiq
91 mrs r3, r9_fiq
92 mrs r4, r10_fiq
93 mrs r5, r11_fiq
94 mrs r6, r12_fiq
95 mrs r7, SP_fiq
96 mrs r8, LR_fiq
97 mrs r9, SPSR_fiq
98 push {r2-r9}
99.endm
100
101.macro pop_host_regs_mode mode
102 pop {r2, r3, r4}
103 msr SP_\mode, r2
104 msr LR_\mode, r3
105 msr SPSR_\mode, r4
106.endm
107
108/*
109 * Restore all host registers from the stack.
110 * Clobbers all registers, in all modes, except r0 and r1.
111 */
112.macro restore_host_regs
113 pop {r2-r9}
114 msr r8_fiq, r2
115 msr r9_fiq, r3
116 msr r10_fiq, r4
117 msr r11_fiq, r5
118 msr r12_fiq, r6
119 msr SP_fiq, r7
120 msr LR_fiq, r8
121 msr SPSR_fiq, r9
122
123 pop_host_regs_mode irq
124 pop_host_regs_mode und
125 pop_host_regs_mode abt
126 pop_host_regs_mode svc
127
128 pop {r2, r3}
129 msr SP_usr, r2
130 mov lr, r3
131 pop {r4-r12}
132
133 pop {r2}
134 msr ELR_hyp, r2
135.endm
136
137/*
138 * Restore SP, LR and SPSR for a given mode. offset is the offset of
139 * this mode's registers from the VCPU base.
140 *
141 * Assumes vcpu pointer in vcpu reg
142 *
143 * Clobbers r1, r2, r3, r4.
144 */
145.macro restore_guest_regs_mode mode, offset
146 add r1, vcpu, \offset
147 ldm r1, {r2, r3, r4}
148 msr SP_\mode, r2
149 msr LR_\mode, r3
150 msr SPSR_\mode, r4
151.endm
152
153/*
154 * Restore all guest registers from the vcpu struct.
155 *
156 * Assumes vcpu pointer in vcpu reg
157 *
158 * Clobbers *all* registers.
159 */
160.macro restore_guest_regs
161 restore_guest_regs_mode svc, #VCPU_SVC_REGS
162 restore_guest_regs_mode abt, #VCPU_ABT_REGS
163 restore_guest_regs_mode und, #VCPU_UND_REGS
164 restore_guest_regs_mode irq, #VCPU_IRQ_REGS
165
166 add r1, vcpu, #VCPU_FIQ_REGS
167 ldm r1, {r2-r9}
168 msr r8_fiq, r2
169 msr r9_fiq, r3
170 msr r10_fiq, r4
171 msr r11_fiq, r5
172 msr r12_fiq, r6
173 msr SP_fiq, r7
174 msr LR_fiq, r8
175 msr SPSR_fiq, r9
176
177 @ Load return state
178 ldr r2, [vcpu, #VCPU_PC]
179 ldr r3, [vcpu, #VCPU_CPSR]
180 msr ELR_hyp, r2
181 msr SPSR_cxsf, r3
182
183 @ Load user registers
184 ldr r2, [vcpu, #VCPU_USR_SP]
185 ldr r3, [vcpu, #VCPU_USR_LR]
186 msr SP_usr, r2
187 mov lr, r3
188 add vcpu, vcpu, #(VCPU_USR_REGS)
189 ldm vcpu, {r0-r12}
190.endm
191
192/*
193 * Save SP, LR and SPSR for a given mode. offset is the offset of
194 * this mode's registers from the VCPU base.
195 *
196 * Assumes vcpu pointer in vcpu reg
197 *
198 * Clobbers r2, r3, r4, r5.
199 */
200.macro save_guest_regs_mode mode, offset
201 add r2, vcpu, \offset
202 mrs r3, SP_\mode
203 mrs r4, LR_\mode
204 mrs r5, SPSR_\mode
205 stm r2, {r3, r4, r5}
206.endm
207
208/*
209 * Save all guest registers to the vcpu struct
210 * Expects guest's r0, r1, r2 on the stack.
211 *
212 * Assumes vcpu pointer in vcpu reg
213 *
214 * Clobbers r2, r3, r4, r5.
215 */
216.macro save_guest_regs
217 @ Store usr registers
218 add r2, vcpu, #VCPU_USR_REG(3)
219 stm r2, {r3-r12}
220 add r2, vcpu, #VCPU_USR_REG(0)
221 pop {r3, r4, r5} @ r0, r1, r2
222 stm r2, {r3, r4, r5}
223 mrs r2, SP_usr
224 mov r3, lr
225 str r2, [vcpu, #VCPU_USR_SP]
226 str r3, [vcpu, #VCPU_USR_LR]
227
228 @ Store return state
229 mrs r2, ELR_hyp
230 mrs r3, spsr
231 str r2, [vcpu, #VCPU_PC]
232 str r3, [vcpu, #VCPU_CPSR]
233
234 @ Store other guest registers
235 save_guest_regs_mode svc, #VCPU_SVC_REGS
236 save_guest_regs_mode abt, #VCPU_ABT_REGS
237 save_guest_regs_mode und, #VCPU_UND_REGS
238 save_guest_regs_mode irq, #VCPU_IRQ_REGS
239.endm
240
241/* Reads cp15 registers from hardware and stores them in memory
242 * @store_to_vcpu: If 0, registers are written in-order to the stack,
243 * otherwise to the VCPU struct pointed to by vcpup
244 *
245 * Assumes vcpu pointer in vcpu reg
246 *
247 * Clobbers r2 - r12
248 */
249.macro read_cp15_state store_to_vcpu
250 mrc p15, 0, r2, c1, c0, 0 @ SCTLR
251 mrc p15, 0, r3, c1, c0, 2 @ CPACR
252 mrc p15, 0, r4, c2, c0, 2 @ TTBCR
253 mrc p15, 0, r5, c3, c0, 0 @ DACR
254 mrrc p15, 0, r6, r7, c2 @ TTBR 0
255 mrrc p15, 1, r8, r9, c2 @ TTBR 1
256 mrc p15, 0, r10, c10, c2, 0 @ PRRR
257 mrc p15, 0, r11, c10, c2, 1 @ NMRR
258 mrc p15, 2, r12, c0, c0, 0 @ CSSELR
259
260 .if \store_to_vcpu == 0
261 push {r2-r12} @ Push CP15 registers
262 .else
263 str r2, [vcpu, #CP15_OFFSET(c1_SCTLR)]
264 str r3, [vcpu, #CP15_OFFSET(c1_CPACR)]
265 str r4, [vcpu, #CP15_OFFSET(c2_TTBCR)]
266 str r5, [vcpu, #CP15_OFFSET(c3_DACR)]
267 add r2, vcpu, #CP15_OFFSET(c2_TTBR0)
268 strd r6, r7, [r2]
269 add r2, vcpu, #CP15_OFFSET(c2_TTBR1)
270 strd r8, r9, [r2]
271 str r10, [vcpu, #CP15_OFFSET(c10_PRRR)]
272 str r11, [vcpu, #CP15_OFFSET(c10_NMRR)]
273 str r12, [vcpu, #CP15_OFFSET(c0_CSSELR)]
274 .endif
275
276 mrc p15, 0, r2, c13, c0, 1 @ CID
277 mrc p15, 0, r3, c13, c0, 2 @ TID_URW
278 mrc p15, 0, r4, c13, c0, 3 @ TID_URO
279 mrc p15, 0, r5, c13, c0, 4 @ TID_PRIV
280 mrc p15, 0, r6, c5, c0, 0 @ DFSR
281 mrc p15, 0, r7, c5, c0, 1 @ IFSR
282 mrc p15, 0, r8, c5, c1, 0 @ ADFSR
283 mrc p15, 0, r9, c5, c1, 1 @ AIFSR
284 mrc p15, 0, r10, c6, c0, 0 @ DFAR
285 mrc p15, 0, r11, c6, c0, 2 @ IFAR
286 mrc p15, 0, r12, c12, c0, 0 @ VBAR
287
288 .if \store_to_vcpu == 0
289 push {r2-r12} @ Push CP15 registers
290 .else
291 str r2, [vcpu, #CP15_OFFSET(c13_CID)]
292 str r3, [vcpu, #CP15_OFFSET(c13_TID_URW)]
293 str r4, [vcpu, #CP15_OFFSET(c13_TID_URO)]
294 str r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)]
295 str r6, [vcpu, #CP15_OFFSET(c5_DFSR)]
296 str r7, [vcpu, #CP15_OFFSET(c5_IFSR)]
297 str r8, [vcpu, #CP15_OFFSET(c5_ADFSR)]
298 str r9, [vcpu, #CP15_OFFSET(c5_AIFSR)]
299 str r10, [vcpu, #CP15_OFFSET(c6_DFAR)]
300 str r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
301 str r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
302 .endif
303
304 mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL
305
306 .if \store_to_vcpu == 0
307 push {r2}
308 .else
309 str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
310 .endif
311.endm
312
313/*
314 * Reads cp15 registers from memory and writes them to hardware
315 * @read_from_vcpu: If 0, registers are read in-order from the stack,
316 * otherwise from the VCPU struct pointed to by vcpup
317 *
318 * Assumes vcpu pointer in vcpu reg
319 */
320.macro write_cp15_state read_from_vcpu
321 .if \read_from_vcpu == 0
322 pop {r2}
323 .else
324 ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
325 .endif
326
327 mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL
328
329 .if \read_from_vcpu == 0
330 pop {r2-r12}
331 .else
332 ldr r2, [vcpu, #CP15_OFFSET(c13_CID)]
333 ldr r3, [vcpu, #CP15_OFFSET(c13_TID_URW)]
334 ldr r4, [vcpu, #CP15_OFFSET(c13_TID_URO)]
335 ldr r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)]
336 ldr r6, [vcpu, #CP15_OFFSET(c5_DFSR)]
337 ldr r7, [vcpu, #CP15_OFFSET(c5_IFSR)]
338 ldr r8, [vcpu, #CP15_OFFSET(c5_ADFSR)]
339 ldr r9, [vcpu, #CP15_OFFSET(c5_AIFSR)]
340 ldr r10, [vcpu, #CP15_OFFSET(c6_DFAR)]
341 ldr r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
342 ldr r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
343 .endif
344
345 mcr p15, 0, r2, c13, c0, 1 @ CID
346 mcr p15, 0, r3, c13, c0, 2 @ TID_URW
347 mcr p15, 0, r4, c13, c0, 3 @ TID_URO
348 mcr p15, 0, r5, c13, c0, 4 @ TID_PRIV
349 mcr p15, 0, r6, c5, c0, 0 @ DFSR
350 mcr p15, 0, r7, c5, c0, 1 @ IFSR
351 mcr p15, 0, r8, c5, c1, 0 @ ADFSR
352 mcr p15, 0, r9, c5, c1, 1 @ AIFSR
353 mcr p15, 0, r10, c6, c0, 0 @ DFAR
354 mcr p15, 0, r11, c6, c0, 2 @ IFAR
355 mcr p15, 0, r12, c12, c0, 0 @ VBAR
356
357 .if \read_from_vcpu == 0
358 pop {r2-r12}
359 .else
360 ldr r2, [vcpu, #CP15_OFFSET(c1_SCTLR)]
361 ldr r3, [vcpu, #CP15_OFFSET(c1_CPACR)]
362 ldr r4, [vcpu, #CP15_OFFSET(c2_TTBCR)]
363 ldr r5, [vcpu, #CP15_OFFSET(c3_DACR)]
364 add r12, vcpu, #CP15_OFFSET(c2_TTBR0)
365 ldrd r6, r7, [r12]
366 add r12, vcpu, #CP15_OFFSET(c2_TTBR1)
367 ldrd r8, r9, [r12]
368 ldr r10, [vcpu, #CP15_OFFSET(c10_PRRR)]
369 ldr r11, [vcpu, #CP15_OFFSET(c10_NMRR)]
370 ldr r12, [vcpu, #CP15_OFFSET(c0_CSSELR)]
371 .endif
372
373 mcr p15, 0, r2, c1, c0, 0 @ SCTLR
374 mcr p15, 0, r3, c1, c0, 2 @ CPACR
375 mcr p15, 0, r4, c2, c0, 2 @ TTBCR
376 mcr p15, 0, r5, c3, c0, 0 @ DACR
377 mcrr p15, 0, r6, r7, c2 @ TTBR 0
378 mcrr p15, 1, r8, r9, c2 @ TTBR 1
379 mcr p15, 0, r10, c10, c2, 0 @ PRRR
380 mcr p15, 0, r11, c10, c2, 1 @ NMRR
381 mcr p15, 2, r12, c0, c0, 0 @ CSSELR
382.endm
383
384/*
385 * Save the VGIC CPU state into memory
386 *
387 * Assumes vcpu pointer in vcpu reg
388 */
389.macro save_vgic_state
390#ifdef CONFIG_KVM_ARM_VGIC
391 /* Get VGIC VCTRL base into r2 */
392 ldr r2, [vcpu, #VCPU_KVM]
393 ldr r2, [r2, #KVM_VGIC_VCTRL]
394 cmp r2, #0
395 beq 2f
396
397 /* Compute the address of struct vgic_cpu */
398 add r11, vcpu, #VCPU_VGIC_CPU
399
400 /* Save all interesting registers */
401 ldr r3, [r2, #GICH_HCR]
402 ldr r4, [r2, #GICH_VMCR]
403 ldr r5, [r2, #GICH_MISR]
404 ldr r6, [r2, #GICH_EISR0]
405 ldr r7, [r2, #GICH_EISR1]
406 ldr r8, [r2, #GICH_ELRSR0]
407 ldr r9, [r2, #GICH_ELRSR1]
408 ldr r10, [r2, #GICH_APR]
409
410 str r3, [r11, #VGIC_CPU_HCR]
411 str r4, [r11, #VGIC_CPU_VMCR]
412 str r5, [r11, #VGIC_CPU_MISR]
413 str r6, [r11, #VGIC_CPU_EISR]
414 str r7, [r11, #(VGIC_CPU_EISR + 4)]
415 str r8, [r11, #VGIC_CPU_ELRSR]
416 str r9, [r11, #(VGIC_CPU_ELRSR + 4)]
417 str r10, [r11, #VGIC_CPU_APR]
418
419 /* Clear GICH_HCR */
420 mov r5, #0
421 str r5, [r2, #GICH_HCR]
422
423 /* Save list registers */
424 add r2, r2, #GICH_LR0
425 add r3, r11, #VGIC_CPU_LR
426 ldr r4, [r11, #VGIC_CPU_NR_LR]
4271: ldr r6, [r2], #4
428 str r6, [r3], #4
429 subs r4, r4, #1
430 bne 1b
4312:
432#endif
433.endm
434
435/*
436 * Restore the VGIC CPU state from memory
437 *
438 * Assumes vcpu pointer in vcpu reg
439 */
440.macro restore_vgic_state
441#ifdef CONFIG_KVM_ARM_VGIC
442 /* Get VGIC VCTRL base into r2 */
443 ldr r2, [vcpu, #VCPU_KVM]
444 ldr r2, [r2, #KVM_VGIC_VCTRL]
445 cmp r2, #0
446 beq 2f
447
448 /* Compute the address of struct vgic_cpu */
449 add r11, vcpu, #VCPU_VGIC_CPU
450
451 /* We only restore a minimal set of registers */
452 ldr r3, [r11, #VGIC_CPU_HCR]
453 ldr r4, [r11, #VGIC_CPU_VMCR]
454 ldr r8, [r11, #VGIC_CPU_APR]
455
456 str r3, [r2, #GICH_HCR]
457 str r4, [r2, #GICH_VMCR]
458 str r8, [r2, #GICH_APR]
459
460 /* Restore list registers */
461 add r2, r2, #GICH_LR0
462 add r3, r11, #VGIC_CPU_LR
463 ldr r4, [r11, #VGIC_CPU_NR_LR]
4641: ldr r6, [r3], #4
465 str r6, [r2], #4
466 subs r4, r4, #1
467 bne 1b
4682:
469#endif
470.endm
471
472#define CNTHCTL_PL1PCTEN (1 << 0)
473#define CNTHCTL_PL1PCEN (1 << 1)
474
475/*
476 * Save the timer state onto the VCPU and allow physical timer/counter access
477 * for the host.
478 *
479 * Assumes vcpu pointer in vcpu reg
480 * Clobbers r2-r5
481 */
482.macro save_timer_state
483#ifdef CONFIG_KVM_ARM_TIMER
484 ldr r4, [vcpu, #VCPU_KVM]
485 ldr r2, [r4, #KVM_TIMER_ENABLED]
486 cmp r2, #0
487 beq 1f
488
489 mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL
490 str r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
491 bic r2, #1 @ Clear ENABLE
492 mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
493 isb
494
495 mrrc p15, 3, r2, r3, c14 @ CNTV_CVAL
496 ldr r4, =VCPU_TIMER_CNTV_CVAL
497 add r5, vcpu, r4
498 strd r2, r3, [r5]
499
5001:
501#endif
502 @ Allow physical timer/counter access for the host
503 mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
504 orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
505 mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
506.endm
507
508/*
509 * Load the timer state from the VCPU and deny physical timer/counter access
510 * for the host.
511 *
512 * Assumes vcpu pointer in vcpu reg
513 * Clobbers r2-r5
514 */
515.macro restore_timer_state
516 @ Disallow physical timer access for the guest
517 @ Physical counter access is allowed
518 mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
519 orr r2, r2, #CNTHCTL_PL1PCTEN
520 bic r2, r2, #CNTHCTL_PL1PCEN
521 mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
522
523#ifdef CONFIG_KVM_ARM_TIMER
524 ldr r4, [vcpu, #VCPU_KVM]
525 ldr r2, [r4, #KVM_TIMER_ENABLED]
526 cmp r2, #0
527 beq 1f
528
529 ldr r2, [r4, #KVM_TIMER_CNTVOFF]
530 ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
531 mcrr p15, 4, r2, r3, c14 @ CNTVOFF
532
533 ldr r4, =VCPU_TIMER_CNTV_CVAL
534 add r5, vcpu, r4
535 ldrd r2, r3, [r5]
536 mcrr p15, 3, r2, r3, c14 @ CNTV_CVAL
537 isb
538
539 ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
540 and r2, r2, #3
541 mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
5421:
543#endif
544.endm
545
546.equ vmentry, 0
547.equ vmexit, 1
548
549/* Configures the HSTR (Hyp System Trap Register) on entry/return
550 * (hardware reset value is 0) */
551.macro set_hstr operation
552 mrc p15, 4, r2, c1, c1, 3
553 ldr r3, =HSTR_T(15)
554 .if \operation == vmentry
555 orr r2, r2, r3 @ Trap CR{15}
556 .else
557 bic r2, r2, r3 @ Don't trap any CRx accesses
558 .endif
559 mcr p15, 4, r2, c1, c1, 3
560.endm
561
562/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return
563 * (hardware reset value is 0). Keep previous value in r2. */
564.macro set_hcptr operation, mask
565 mrc p15, 4, r2, c1, c1, 2
566 ldr r3, =\mask
567 .if \operation == vmentry
568 orr r3, r2, r3 @ Trap coproc-accesses defined in mask
569 .else
570 bic r3, r2, r3 @ Don't trap defined coproc-accesses
571 .endif
572 mcr p15, 4, r3, c1, c1, 2
573.endm
574
575/* Configures the HDCR (Hyp Debug Configuration Register) on entry/return
576 * (hardware reset value is 0) */
577.macro set_hdcr operation
578 mrc p15, 4, r2, c1, c1, 1
579 ldr r3, =(HDCR_TPM|HDCR_TPMCR)
580 .if \operation == vmentry
581 orr r2, r2, r3 @ Trap some perfmon accesses
582 .else
583 bic r2, r2, r3 @ Don't trap any perfmon accesses
584 .endif
585 mcr p15, 4, r2, c1, c1, 1
586.endm
587
588/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
589.macro configure_hyp_role operation
590 mrc p15, 4, r2, c1, c1, 0 @ HCR
591 bic r2, r2, #HCR_VIRT_EXCP_MASK
592 ldr r3, =HCR_GUEST_MASK
593 .if \operation == vmentry
594 orr r2, r2, r3
595 ldr r3, [vcpu, #VCPU_IRQ_LINES]
596 orr r2, r2, r3
597 .else
598 bic r2, r2, r3
599 .endif
600 mcr p15, 4, r2, c1, c1, 0
601.endm
602
603.macro load_vcpu
604 mrc p15, 4, vcpu, c13, c0, 2 @ HTPIDR
605.endm
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
new file mode 100644
index 000000000000..72a12f2171b2
--- /dev/null
+++ b/arch/arm/kvm/mmio.c
@@ -0,0 +1,146 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <linux/kvm_host.h>
20#include <asm/kvm_mmio.h>
21#include <asm/kvm_emulate.h>
22#include <trace/events/kvm.h>
23
24#include "trace.h"
25
26/**
27 * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
28 * @vcpu: The VCPU pointer
29 * @run: The VCPU run struct containing the mmio data
30 *
31 * This should only be called after returning from userspace for MMIO load
32 * emulation.
33 */
34int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
35{
36 unsigned long *dest;
37 unsigned int len;
38 int mask;
39
40 if (!run->mmio.is_write) {
41 dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt);
42 *dest = 0;
43
44 len = run->mmio.len;
45 if (len > sizeof(unsigned long))
46 return -EINVAL;
47
48 memcpy(dest, run->mmio.data, len);
49
50 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
51 *((u64 *)run->mmio.data));
52
53 if (vcpu->arch.mmio_decode.sign_extend &&
54 len < sizeof(unsigned long)) {
55 mask = 1U << ((len * 8) - 1);
56 *dest = (*dest ^ mask) - mask;
57 }
58 }
59
60 return 0;
61}
62
63static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
64 struct kvm_exit_mmio *mmio)
65{
66 unsigned long rt, len;
67 bool is_write, sign_extend;
68
69 if (kvm_vcpu_dabt_isextabt(vcpu)) {
70 /* cache operation on I/O addr, tell guest unsupported */
71 kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
72 return 1;
73 }
74
75 if (kvm_vcpu_dabt_iss1tw(vcpu)) {
76 /* page table accesses IO mem: tell guest to fix its TTBR */
77 kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
78 return 1;
79 }
80
81 len = kvm_vcpu_dabt_get_as(vcpu);
82 if (unlikely(len < 0))
83 return len;
84
85 is_write = kvm_vcpu_dabt_iswrite(vcpu);
86 sign_extend = kvm_vcpu_dabt_issext(vcpu);
87 rt = kvm_vcpu_dabt_get_rd(vcpu);
88
89 if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
90 /* IO memory trying to read/write pc */
91 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
92 return 1;
93 }
94
95 mmio->is_write = is_write;
96 mmio->phys_addr = fault_ipa;
97 mmio->len = len;
98 vcpu->arch.mmio_decode.sign_extend = sign_extend;
99 vcpu->arch.mmio_decode.rt = rt;
100
101 /*
102 * The MMIO instruction is emulated and should not be re-executed
103 * in the guest.
104 */
105 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
106 return 0;
107}
108
109int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
110 phys_addr_t fault_ipa)
111{
112 struct kvm_exit_mmio mmio;
113 unsigned long rt;
114 int ret;
115
116 /*
117 * Prepare MMIO operation. First stash it in a private
118 * structure that we can use for in-kernel emulation. If the
119 * kernel can't handle it, copy it into run->mmio and let user
120 * space do its magic.
121 */
122
123 if (kvm_vcpu_dabt_isvalid(vcpu)) {
124 ret = decode_hsr(vcpu, fault_ipa, &mmio);
125 if (ret)
126 return ret;
127 } else {
128 kvm_err("load/store instruction decoding not implemented\n");
129 return -ENOSYS;
130 }
131
132 rt = vcpu->arch.mmio_decode.rt;
133 trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
134 KVM_TRACE_MMIO_READ_UNSATISFIED,
135 mmio.len, fault_ipa,
136 (mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0);
137
138 if (mmio.is_write)
139 memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len);
140
141 if (vgic_handle_mmio(vcpu, run, &mmio))
142 return 1;
143
144 kvm_prepare_mmio(run, &mmio);
145 return 0;
146}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
new file mode 100644
index 000000000000..2f12e4056408
--- /dev/null
+++ b/arch/arm/kvm/mmu.c
@@ -0,0 +1,774 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <linux/mman.h>
20#include <linux/kvm_host.h>
21#include <linux/io.h>
22#include <trace/events/kvm.h>
23#include <asm/pgalloc.h>
24#include <asm/cacheflush.h>
25#include <asm/kvm_arm.h>
26#include <asm/kvm_mmu.h>
27#include <asm/kvm_mmio.h>
28#include <asm/kvm_asm.h>
29#include <asm/kvm_emulate.h>
30
31#include "trace.h"
32
33extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
34
35static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
36
37static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
38{
39 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
40}
41
42static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
43 int min, int max)
44{
45 void *page;
46
47 BUG_ON(max > KVM_NR_MEM_OBJS);
48 if (cache->nobjs >= min)
49 return 0;
50 while (cache->nobjs < max) {
51 page = (void *)__get_free_page(PGALLOC_GFP);
52 if (!page)
53 return -ENOMEM;
54 cache->objects[cache->nobjs++] = page;
55 }
56 return 0;
57}
58
59static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
60{
61 while (mc->nobjs)
62 free_page((unsigned long)mc->objects[--mc->nobjs]);
63}
64
65static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
66{
67 void *p;
68
69 BUG_ON(!mc || !mc->nobjs);
70 p = mc->objects[--mc->nobjs];
71 return p;
72}
73
74static void free_ptes(pmd_t *pmd, unsigned long addr)
75{
76 pte_t *pte;
77 unsigned int i;
78
79 for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
80 if (!pmd_none(*pmd) && pmd_table(*pmd)) {
81 pte = pte_offset_kernel(pmd, addr);
82 pte_free_kernel(NULL, pte);
83 }
84 pmd++;
85 }
86}
87
88static void free_hyp_pgd_entry(unsigned long addr)
89{
90 pgd_t *pgd;
91 pud_t *pud;
92 pmd_t *pmd;
93 unsigned long hyp_addr = KERN_TO_HYP(addr);
94
95 pgd = hyp_pgd + pgd_index(hyp_addr);
96 pud = pud_offset(pgd, hyp_addr);
97
98 if (pud_none(*pud))
99 return;
100 BUG_ON(pud_bad(*pud));
101
102 pmd = pmd_offset(pud, hyp_addr);
103 free_ptes(pmd, addr);
104 pmd_free(NULL, pmd);
105 pud_clear(pud);
106}
107
108/**
109 * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
110 *
111 * Assumes this is a page table used strictly in Hyp-mode and therefore contains
112 * either mappings in the kernel memory area (above PAGE_OFFSET), or
113 * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END).
114 */
115void free_hyp_pmds(void)
116{
117 unsigned long addr;
118
119 mutex_lock(&kvm_hyp_pgd_mutex);
120 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
121 free_hyp_pgd_entry(addr);
122 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
123 free_hyp_pgd_entry(addr);
124 mutex_unlock(&kvm_hyp_pgd_mutex);
125}
126
127static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
128 unsigned long end)
129{
130 pte_t *pte;
131 unsigned long addr;
132 struct page *page;
133
134 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
135 unsigned long hyp_addr = KERN_TO_HYP(addr);
136
137 pte = pte_offset_kernel(pmd, hyp_addr);
138 BUG_ON(!virt_addr_valid(addr));
139 page = virt_to_page(addr);
140 kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
141 }
142}
143
144static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start,
145 unsigned long end,
146 unsigned long *pfn_base)
147{
148 pte_t *pte;
149 unsigned long addr;
150
151 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
152 unsigned long hyp_addr = KERN_TO_HYP(addr);
153
154 pte = pte_offset_kernel(pmd, hyp_addr);
155 BUG_ON(pfn_valid(*pfn_base));
156 kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
157 (*pfn_base)++;
158 }
159}
160
161static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
162 unsigned long end, unsigned long *pfn_base)
163{
164 pmd_t *pmd;
165 pte_t *pte;
166 unsigned long addr, next;
167
168 for (addr = start; addr < end; addr = next) {
169 unsigned long hyp_addr = KERN_TO_HYP(addr);
170 pmd = pmd_offset(pud, hyp_addr);
171
172 BUG_ON(pmd_sect(*pmd));
173
174 if (pmd_none(*pmd)) {
175 pte = pte_alloc_one_kernel(NULL, hyp_addr);
176 if (!pte) {
177 kvm_err("Cannot allocate Hyp pte\n");
178 return -ENOMEM;
179 }
180 pmd_populate_kernel(NULL, pmd, pte);
181 }
182
183 next = pmd_addr_end(addr, end);
184
185 /*
186 * If pfn_base is NULL, we map kernel pages into HYP with the
187 * virtual address. Otherwise, this is considered an I/O
188 * mapping and we map the physical region starting at
189 * *pfn_base to [start, end[.
190 */
191 if (!pfn_base)
192 create_hyp_pte_mappings(pmd, addr, next);
193 else
194 create_hyp_io_pte_mappings(pmd, addr, next, pfn_base);
195 }
196
197 return 0;
198}
199
200static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base)
201{
202 unsigned long start = (unsigned long)from;
203 unsigned long end = (unsigned long)to;
204 pgd_t *pgd;
205 pud_t *pud;
206 pmd_t *pmd;
207 unsigned long addr, next;
208 int err = 0;
209
210 if (start >= end)
211 return -EINVAL;
212 /* Check for a valid kernel memory mapping */
213 if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1)))
214 return -EINVAL;
215 /* Check for a valid kernel IO mapping */
216 if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)))
217 return -EINVAL;
218
219 mutex_lock(&kvm_hyp_pgd_mutex);
220 for (addr = start; addr < end; addr = next) {
221 unsigned long hyp_addr = KERN_TO_HYP(addr);
222 pgd = hyp_pgd + pgd_index(hyp_addr);
223 pud = pud_offset(pgd, hyp_addr);
224
225 if (pud_none_or_clear_bad(pud)) {
226 pmd = pmd_alloc_one(NULL, hyp_addr);
227 if (!pmd) {
228 kvm_err("Cannot allocate Hyp pmd\n");
229 err = -ENOMEM;
230 goto out;
231 }
232 pud_populate(NULL, pud, pmd);
233 }
234
235 next = pgd_addr_end(addr, end);
236 err = create_hyp_pmd_mappings(pud, addr, next, pfn_base);
237 if (err)
238 goto out;
239 }
240out:
241 mutex_unlock(&kvm_hyp_pgd_mutex);
242 return err;
243}
244
245/**
246 * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
247 * @from: The virtual kernel start address of the range
248 * @to: The virtual kernel end address of the range (exclusive)
249 *
250 * The same virtual address as the kernel virtual address is also used
251 * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
252 * physical pages.
253 *
254 * Note: Wrapping around zero in the "to" address is not supported.
255 */
256int create_hyp_mappings(void *from, void *to)
257{
258 return __create_hyp_mappings(from, to, NULL);
259}
260
261/**
262 * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
263 * @from: The kernel start VA of the range
264 * @to: The kernel end VA of the range (exclusive)
265 * @addr: The physical start address which gets mapped
266 *
267 * The resulting HYP VA is the same as the kernel VA, modulo
268 * HYP_PAGE_OFFSET.
269 */
270int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
271{
272 unsigned long pfn = __phys_to_pfn(addr);
273 return __create_hyp_mappings(from, to, &pfn);
274}
275
276/**
277 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
278 * @kvm: The KVM struct pointer for the VM.
279 *
280 * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
281 * support either full 40-bit input addresses or limited to 32-bit input
282 * addresses). Clears the allocated pages.
283 *
284 * Note we don't need locking here as this is only called when the VM is
285 * created, which can only be done once.
286 */
287int kvm_alloc_stage2_pgd(struct kvm *kvm)
288{
289 pgd_t *pgd;
290
291 if (kvm->arch.pgd != NULL) {
292 kvm_err("kvm_arch already initialized?\n");
293 return -EINVAL;
294 }
295
296 pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
297 if (!pgd)
298 return -ENOMEM;
299
300 /* stage-2 pgd must be aligned to its size */
301 VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
302
303 memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
304 kvm_clean_pgd(pgd);
305 kvm->arch.pgd = pgd;
306
307 return 0;
308}
309
310static void clear_pud_entry(pud_t *pud)
311{
312 pmd_t *pmd_table = pmd_offset(pud, 0);
313 pud_clear(pud);
314 pmd_free(NULL, pmd_table);
315 put_page(virt_to_page(pud));
316}
317
318static void clear_pmd_entry(pmd_t *pmd)
319{
320 pte_t *pte_table = pte_offset_kernel(pmd, 0);
321 pmd_clear(pmd);
322 pte_free_kernel(NULL, pte_table);
323 put_page(virt_to_page(pmd));
324}
325
326static bool pmd_empty(pmd_t *pmd)
327{
328 struct page *pmd_page = virt_to_page(pmd);
329 return page_count(pmd_page) == 1;
330}
331
332static void clear_pte_entry(pte_t *pte)
333{
334 if (pte_present(*pte)) {
335 kvm_set_pte(pte, __pte(0));
336 put_page(virt_to_page(pte));
337 }
338}
339
340static bool pte_empty(pte_t *pte)
341{
342 struct page *pte_page = virt_to_page(pte);
343 return page_count(pte_page) == 1;
344}
345
346/**
347 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
348 * @kvm: The VM pointer
349 * @start: The intermediate physical base address of the range to unmap
350 * @size: The size of the area to unmap
351 *
352 * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
353 * be called while holding mmu_lock (unless for freeing the stage2 pgd before
354 * destroying the VM), otherwise another faulting VCPU may come in and mess
355 * with things behind our backs.
356 */
357static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
358{
359 pgd_t *pgd;
360 pud_t *pud;
361 pmd_t *pmd;
362 pte_t *pte;
363 phys_addr_t addr = start, end = start + size;
364 u64 range;
365
366 while (addr < end) {
367 pgd = kvm->arch.pgd + pgd_index(addr);
368 pud = pud_offset(pgd, addr);
369 if (pud_none(*pud)) {
370 addr += PUD_SIZE;
371 continue;
372 }
373
374 pmd = pmd_offset(pud, addr);
375 if (pmd_none(*pmd)) {
376 addr += PMD_SIZE;
377 continue;
378 }
379
380 pte = pte_offset_kernel(pmd, addr);
381 clear_pte_entry(pte);
382 range = PAGE_SIZE;
383
384 /* If we emptied the pte, walk back up the ladder */
385 if (pte_empty(pte)) {
386 clear_pmd_entry(pmd);
387 range = PMD_SIZE;
388 if (pmd_empty(pmd)) {
389 clear_pud_entry(pud);
390 range = PUD_SIZE;
391 }
392 }
393
394 addr += range;
395 }
396}
397
398/**
399 * kvm_free_stage2_pgd - free all stage-2 tables
400 * @kvm: The KVM struct pointer for the VM.
401 *
402 * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
403 * underlying level-2 and level-3 tables before freeing the actual level-1 table
404 * and setting the struct pointer to NULL.
405 *
406 * Note we don't need locking here as this is only called when the VM is
407 * destroyed, which can only be done once.
408 */
409void kvm_free_stage2_pgd(struct kvm *kvm)
410{
411 if (kvm->arch.pgd == NULL)
412 return;
413
414 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
415 free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
416 kvm->arch.pgd = NULL;
417}
418
419
420static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
421 phys_addr_t addr, const pte_t *new_pte, bool iomap)
422{
423 pgd_t *pgd;
424 pud_t *pud;
425 pmd_t *pmd;
426 pte_t *pte, old_pte;
427
428 /* Create 2nd stage page table mapping - Level 1 */
429 pgd = kvm->arch.pgd + pgd_index(addr);
430 pud = pud_offset(pgd, addr);
431 if (pud_none(*pud)) {
432 if (!cache)
433 return 0; /* ignore calls from kvm_set_spte_hva */
434 pmd = mmu_memory_cache_alloc(cache);
435 pud_populate(NULL, pud, pmd);
436 get_page(virt_to_page(pud));
437 }
438
439 pmd = pmd_offset(pud, addr);
440
441 /* Create 2nd stage page table mapping - Level 2 */
442 if (pmd_none(*pmd)) {
443 if (!cache)
444 return 0; /* ignore calls from kvm_set_spte_hva */
445 pte = mmu_memory_cache_alloc(cache);
446 kvm_clean_pte(pte);
447 pmd_populate_kernel(NULL, pmd, pte);
448 get_page(virt_to_page(pmd));
449 }
450
451 pte = pte_offset_kernel(pmd, addr);
452
453 if (iomap && pte_present(*pte))
454 return -EFAULT;
455
456 /* Create 2nd stage page table mapping - Level 3 */
457 old_pte = *pte;
458 kvm_set_pte(pte, *new_pte);
459 if (pte_present(old_pte))
460 kvm_tlb_flush_vmid_ipa(kvm, addr);
461 else
462 get_page(virt_to_page(pte));
463
464 return 0;
465}
466
467/**
468 * kvm_phys_addr_ioremap - map a device range to guest IPA
469 *
470 * @kvm: The KVM pointer
471 * @guest_ipa: The IPA at which to insert the mapping
472 * @pa: The physical address of the device
473 * @size: The size of the mapping
474 */
475int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
476 phys_addr_t pa, unsigned long size)
477{
478 phys_addr_t addr, end;
479 int ret = 0;
480 unsigned long pfn;
481 struct kvm_mmu_memory_cache cache = { 0, };
482
483 end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
484 pfn = __phys_to_pfn(pa);
485
486 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
487 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
488 kvm_set_s2pte_writable(&pte);
489
490 ret = mmu_topup_memory_cache(&cache, 2, 2);
491 if (ret)
492 goto out;
493 spin_lock(&kvm->mmu_lock);
494 ret = stage2_set_pte(kvm, &cache, addr, &pte, true);
495 spin_unlock(&kvm->mmu_lock);
496 if (ret)
497 goto out;
498
499 pfn++;
500 }
501
502out:
503 mmu_free_memory_cache(&cache);
504 return ret;
505}
506
507static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
508 gfn_t gfn, struct kvm_memory_slot *memslot,
509 unsigned long fault_status)
510{
511 pte_t new_pte;
512 pfn_t pfn;
513 int ret;
514 bool write_fault, writable;
515 unsigned long mmu_seq;
516 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
517
518 write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
519 if (fault_status == FSC_PERM && !write_fault) {
520 kvm_err("Unexpected L2 read permission error\n");
521 return -EFAULT;
522 }
523
524 /* We need minimum second+third level pages */
525 ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
526 if (ret)
527 return ret;
528
529 mmu_seq = vcpu->kvm->mmu_notifier_seq;
530 /*
531 * Ensure the read of mmu_notifier_seq happens before we call
532 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
533 * the page we just got a reference to gets unmapped before we have a
534 * chance to grab the mmu_lock, which ensure that if the page gets
535 * unmapped afterwards, the call to kvm_unmap_hva will take it away
536 * from us again properly. This smp_rmb() interacts with the smp_wmb()
537 * in kvm_mmu_notifier_invalidate_<page|range_end>.
538 */
539 smp_rmb();
540
541 pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
542 if (is_error_pfn(pfn))
543 return -EFAULT;
544
545 new_pte = pfn_pte(pfn, PAGE_S2);
546 coherent_icache_guest_page(vcpu->kvm, gfn);
547
548 spin_lock(&vcpu->kvm->mmu_lock);
549 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
550 goto out_unlock;
551 if (writable) {
552 kvm_set_s2pte_writable(&new_pte);
553 kvm_set_pfn_dirty(pfn);
554 }
555 stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
556
557out_unlock:
558 spin_unlock(&vcpu->kvm->mmu_lock);
559 kvm_release_pfn_clean(pfn);
560 return 0;
561}
562
563/**
564 * kvm_handle_guest_abort - handles all 2nd stage aborts
565 * @vcpu: the VCPU pointer
566 * @run: the kvm_run structure
567 *
568 * Any abort that gets to the host is almost guaranteed to be caused by a
569 * missing second stage translation table entry, which can mean that either the
570 * guest simply needs more memory and we must allocate an appropriate page or it
571 * can mean that the guest tried to access I/O memory, which is emulated by user
572 * space. The distinction is based on the IPA causing the fault and whether this
573 * memory region has been registered as standard RAM by user space.
574 */
575int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
576{
577 unsigned long fault_status;
578 phys_addr_t fault_ipa;
579 struct kvm_memory_slot *memslot;
580 bool is_iabt;
581 gfn_t gfn;
582 int ret, idx;
583
584 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
585 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
586
587 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
588 kvm_vcpu_get_hfar(vcpu), fault_ipa);
589
590 /* Check the stage-2 fault is trans. fault or write fault */
591 fault_status = kvm_vcpu_trap_get_fault(vcpu);
592 if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
593 kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
594 kvm_vcpu_trap_get_class(vcpu), fault_status);
595 return -EFAULT;
596 }
597
598 idx = srcu_read_lock(&vcpu->kvm->srcu);
599
600 gfn = fault_ipa >> PAGE_SHIFT;
601 if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
602 if (is_iabt) {
603 /* Prefetch Abort on I/O address */
604 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
605 ret = 1;
606 goto out_unlock;
607 }
608
609 if (fault_status != FSC_FAULT) {
610 kvm_err("Unsupported fault status on io memory: %#lx\n",
611 fault_status);
612 ret = -EFAULT;
613 goto out_unlock;
614 }
615
616 /*
617 * The IPA is reported as [MAX:12], so we need to
618 * complement it with the bottom 12 bits from the
619 * faulting VA. This is always 12 bits, irrespective
620 * of the page size.
621 */
622 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
623 ret = io_mem_abort(vcpu, run, fault_ipa);
624 goto out_unlock;
625 }
626
627 memslot = gfn_to_memslot(vcpu->kvm, gfn);
628
629 ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
630 if (ret == 0)
631 ret = 1;
632out_unlock:
633 srcu_read_unlock(&vcpu->kvm->srcu, idx);
634 return ret;
635}
636
637static void handle_hva_to_gpa(struct kvm *kvm,
638 unsigned long start,
639 unsigned long end,
640 void (*handler)(struct kvm *kvm,
641 gpa_t gpa, void *data),
642 void *data)
643{
644 struct kvm_memslots *slots;
645 struct kvm_memory_slot *memslot;
646
647 slots = kvm_memslots(kvm);
648
649 /* we only care about the pages that the guest sees */
650 kvm_for_each_memslot(memslot, slots) {
651 unsigned long hva_start, hva_end;
652 gfn_t gfn, gfn_end;
653
654 hva_start = max(start, memslot->userspace_addr);
655 hva_end = min(end, memslot->userspace_addr +
656 (memslot->npages << PAGE_SHIFT));
657 if (hva_start >= hva_end)
658 continue;
659
660 /*
661 * {gfn(page) | page intersects with [hva_start, hva_end)} =
662 * {gfn_start, gfn_start+1, ..., gfn_end-1}.
663 */
664 gfn = hva_to_gfn_memslot(hva_start, memslot);
665 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
666
667 for (; gfn < gfn_end; ++gfn) {
668 gpa_t gpa = gfn << PAGE_SHIFT;
669 handler(kvm, gpa, data);
670 }
671 }
672}
673
674static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
675{
676 unmap_stage2_range(kvm, gpa, PAGE_SIZE);
677 kvm_tlb_flush_vmid_ipa(kvm, gpa);
678}
679
680int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
681{
682 unsigned long end = hva + PAGE_SIZE;
683
684 if (!kvm->arch.pgd)
685 return 0;
686
687 trace_kvm_unmap_hva(hva);
688 handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
689 return 0;
690}
691
692int kvm_unmap_hva_range(struct kvm *kvm,
693 unsigned long start, unsigned long end)
694{
695 if (!kvm->arch.pgd)
696 return 0;
697
698 trace_kvm_unmap_hva_range(start, end);
699 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
700 return 0;
701}
702
703static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
704{
705 pte_t *pte = (pte_t *)data;
706
707 stage2_set_pte(kvm, NULL, gpa, pte, false);
708}
709
710
711void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
712{
713 unsigned long end = hva + PAGE_SIZE;
714 pte_t stage2_pte;
715
716 if (!kvm->arch.pgd)
717 return;
718
719 trace_kvm_set_spte_hva(hva);
720 stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
721 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
722}
723
724void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
725{
726 mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
727}
728
729phys_addr_t kvm_mmu_get_httbr(void)
730{
731 VM_BUG_ON(!virt_addr_valid(hyp_pgd));
732 return virt_to_phys(hyp_pgd);
733}
734
735int kvm_mmu_init(void)
736{
737 if (!hyp_pgd) {
738 kvm_err("Hyp mode PGD not allocated\n");
739 return -ENOMEM;
740 }
741
742 return 0;
743}
744
745/**
746 * kvm_clear_idmap - remove all idmaps from the hyp pgd
747 *
748 * Free the underlying pmds for all pgds in range and clear the pgds (but
749 * don't free them) afterwards.
750 */
751void kvm_clear_hyp_idmap(void)
752{
753 unsigned long addr, end;
754 unsigned long next;
755 pgd_t *pgd = hyp_pgd;
756 pud_t *pud;
757 pmd_t *pmd;
758
759 addr = virt_to_phys(__hyp_idmap_text_start);
760 end = virt_to_phys(__hyp_idmap_text_end);
761
762 pgd += pgd_index(addr);
763 do {
764 next = pgd_addr_end(addr, end);
765 if (pgd_none_or_clear_bad(pgd))
766 continue;
767 pud = pud_offset(pgd, addr);
768 pmd = pmd_offset(pud, addr);
769
770 pud_clear(pud);
771 kvm_clean_pmd_entry(pmd);
772 pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
773 } while (pgd++, addr = next, addr < end);
774}
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
new file mode 100644
index 000000000000..7ee5bb7a3667
--- /dev/null
+++ b/arch/arm/kvm/psci.c
@@ -0,0 +1,108 @@
1/*
2 * Copyright (C) 2012 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/kvm_host.h>
19#include <linux/wait.h>
20
21#include <asm/kvm_emulate.h>
22#include <asm/kvm_psci.h>
23
24/*
25 * This is an implementation of the Power State Coordination Interface
26 * as described in ARM document number ARM DEN 0022A.
27 */
28
29static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
30{
31 vcpu->arch.pause = true;
32}
33
34static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
35{
36 struct kvm *kvm = source_vcpu->kvm;
37 struct kvm_vcpu *vcpu;
38 wait_queue_head_t *wq;
39 unsigned long cpu_id;
40 phys_addr_t target_pc;
41
42 cpu_id = *vcpu_reg(source_vcpu, 1);
43 if (vcpu_mode_is_32bit(source_vcpu))
44 cpu_id &= ~((u32) 0);
45
46 if (cpu_id >= atomic_read(&kvm->online_vcpus))
47 return KVM_PSCI_RET_INVAL;
48
49 target_pc = *vcpu_reg(source_vcpu, 2);
50
51 vcpu = kvm_get_vcpu(kvm, cpu_id);
52
53 wq = kvm_arch_vcpu_wq(vcpu);
54 if (!waitqueue_active(wq))
55 return KVM_PSCI_RET_INVAL;
56
57 kvm_reset_vcpu(vcpu);
58
59 /* Gracefully handle Thumb2 entry point */
60 if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
61 target_pc &= ~((phys_addr_t) 1);
62 vcpu_set_thumb(vcpu);
63 }
64
65 *vcpu_pc(vcpu) = target_pc;
66 vcpu->arch.pause = false;
67 smp_mb(); /* Make sure the above is visible */
68
69 wake_up_interruptible(wq);
70
71 return KVM_PSCI_RET_SUCCESS;
72}
73
74/**
75 * kvm_psci_call - handle PSCI call if r0 value is in range
76 * @vcpu: Pointer to the VCPU struct
77 *
78 * Handle PSCI calls from guests through traps from HVC or SMC instructions.
79 * The calling convention is similar to SMC calls to the secure world where
80 * the function number is placed in r0 and this function returns true if the
81 * function number specified in r0 is withing the PSCI range, and false
82 * otherwise.
83 */
84bool kvm_psci_call(struct kvm_vcpu *vcpu)
85{
86 unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
87 unsigned long val;
88
89 switch (psci_fn) {
90 case KVM_PSCI_FN_CPU_OFF:
91 kvm_psci_vcpu_off(vcpu);
92 val = KVM_PSCI_RET_SUCCESS;
93 break;
94 case KVM_PSCI_FN_CPU_ON:
95 val = kvm_psci_vcpu_on(vcpu);
96 break;
97 case KVM_PSCI_FN_CPU_SUSPEND:
98 case KVM_PSCI_FN_MIGRATE:
99 val = KVM_PSCI_RET_NI;
100 break;
101
102 default:
103 return false;
104 }
105
106 *vcpu_reg(vcpu, 0) = val;
107 return true;
108}
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
new file mode 100644
index 000000000000..b80256b554cd
--- /dev/null
+++ b/arch/arm/kvm/reset.c
@@ -0,0 +1,74 @@
1/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18#include <linux/compiler.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/kvm_host.h>
22#include <linux/kvm.h>
23
24#include <asm/unified.h>
25#include <asm/ptrace.h>
26#include <asm/cputype.h>
27#include <asm/kvm_arm.h>
28#include <asm/kvm_coproc.h>
29
30/******************************************************************************
31 * Cortex-A15 Reset Values
32 */
33
34static const int a15_max_cpu_idx = 3;
35
36static struct kvm_regs a15_regs_reset = {
37 .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
38};
39
40
41/*******************************************************************************
42 * Exported reset function
43 */
44
45/**
46 * kvm_reset_vcpu - sets core registers and cp15 registers to reset value
47 * @vcpu: The VCPU pointer
48 *
49 * This function finds the right table above and sets the registers on the
50 * virtual CPU struct to their architectually defined reset values.
51 */
52int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
53{
54 struct kvm_regs *cpu_reset;
55
56 switch (vcpu->arch.target) {
57 case KVM_ARM_TARGET_CORTEX_A15:
58 if (vcpu->vcpu_id > a15_max_cpu_idx)
59 return -EINVAL;
60 cpu_reset = &a15_regs_reset;
61 vcpu->arch.midr = read_cpuid_id();
62 break;
63 default:
64 return -ENODEV;
65 }
66
67 /* Reset core registers */
68 memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs));
69
70 /* Reset CP15 registers */
71 kvm_reset_coprocs(vcpu);
72
73 return 0;
74}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
new file mode 100644
index 000000000000..a8e73ed5ad5b
--- /dev/null
+++ b/arch/arm/kvm/trace.h
@@ -0,0 +1,235 @@
1#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_KVM_H
3
4#include <linux/tracepoint.h>
5
6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM kvm
8
9/*
10 * Tracepoints for entry/exit to guest
11 */
12TRACE_EVENT(kvm_entry,
13 TP_PROTO(unsigned long vcpu_pc),
14 TP_ARGS(vcpu_pc),
15
16 TP_STRUCT__entry(
17 __field( unsigned long, vcpu_pc )
18 ),
19
20 TP_fast_assign(
21 __entry->vcpu_pc = vcpu_pc;
22 ),
23
24 TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
25);
26
27TRACE_EVENT(kvm_exit,
28 TP_PROTO(unsigned long vcpu_pc),
29 TP_ARGS(vcpu_pc),
30
31 TP_STRUCT__entry(
32 __field( unsigned long, vcpu_pc )
33 ),
34
35 TP_fast_assign(
36 __entry->vcpu_pc = vcpu_pc;
37 ),
38
39 TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
40);
41
42TRACE_EVENT(kvm_guest_fault,
43 TP_PROTO(unsigned long vcpu_pc, unsigned long hsr,
44 unsigned long hxfar,
45 unsigned long long ipa),
46 TP_ARGS(vcpu_pc, hsr, hxfar, ipa),
47
48 TP_STRUCT__entry(
49 __field( unsigned long, vcpu_pc )
50 __field( unsigned long, hsr )
51 __field( unsigned long, hxfar )
52 __field( unsigned long long, ipa )
53 ),
54
55 TP_fast_assign(
56 __entry->vcpu_pc = vcpu_pc;
57 __entry->hsr = hsr;
58 __entry->hxfar = hxfar;
59 __entry->ipa = ipa;
60 ),
61
62 TP_printk("guest fault at PC %#08lx (hxfar %#08lx, "
63 "ipa %#16llx, hsr %#08lx",
64 __entry->vcpu_pc, __entry->hxfar,
65 __entry->ipa, __entry->hsr)
66);
67
68TRACE_EVENT(kvm_irq_line,
69 TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
70 TP_ARGS(type, vcpu_idx, irq_num, level),
71
72 TP_STRUCT__entry(
73 __field( unsigned int, type )
74 __field( int, vcpu_idx )
75 __field( int, irq_num )
76 __field( int, level )
77 ),
78
79 TP_fast_assign(
80 __entry->type = type;
81 __entry->vcpu_idx = vcpu_idx;
82 __entry->irq_num = irq_num;
83 __entry->level = level;
84 ),
85
86 TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d",
87 (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" :
88 (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" :
89 (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN",
90 __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level)
91);
92
93TRACE_EVENT(kvm_mmio_emulate,
94 TP_PROTO(unsigned long vcpu_pc, unsigned long instr,
95 unsigned long cpsr),
96 TP_ARGS(vcpu_pc, instr, cpsr),
97
98 TP_STRUCT__entry(
99 __field( unsigned long, vcpu_pc )
100 __field( unsigned long, instr )
101 __field( unsigned long, cpsr )
102 ),
103
104 TP_fast_assign(
105 __entry->vcpu_pc = vcpu_pc;
106 __entry->instr = instr;
107 __entry->cpsr = cpsr;
108 ),
109
110 TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
111 __entry->vcpu_pc, __entry->instr, __entry->cpsr)
112);
113
114/* Architecturally implementation defined CP15 register access */
115TRACE_EVENT(kvm_emulate_cp15_imp,
116 TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
117 unsigned long CRm, unsigned long Op2, bool is_write),
118 TP_ARGS(Op1, Rt1, CRn, CRm, Op2, is_write),
119
120 TP_STRUCT__entry(
121 __field( unsigned int, Op1 )
122 __field( unsigned int, Rt1 )
123 __field( unsigned int, CRn )
124 __field( unsigned int, CRm )
125 __field( unsigned int, Op2 )
126 __field( bool, is_write )
127 ),
128
129 TP_fast_assign(
130 __entry->is_write = is_write;
131 __entry->Op1 = Op1;
132 __entry->Rt1 = Rt1;
133 __entry->CRn = CRn;
134 __entry->CRm = CRm;
135 __entry->Op2 = Op2;
136 ),
137
138 TP_printk("Implementation defined CP15: %s\tp15, %u, r%u, c%u, c%u, %u",
139 (__entry->is_write) ? "mcr" : "mrc",
140 __entry->Op1, __entry->Rt1, __entry->CRn,
141 __entry->CRm, __entry->Op2)
142);
143
144TRACE_EVENT(kvm_wfi,
145 TP_PROTO(unsigned long vcpu_pc),
146 TP_ARGS(vcpu_pc),
147
148 TP_STRUCT__entry(
149 __field( unsigned long, vcpu_pc )
150 ),
151
152 TP_fast_assign(
153 __entry->vcpu_pc = vcpu_pc;
154 ),
155
156 TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc)
157);
158
159TRACE_EVENT(kvm_unmap_hva,
160 TP_PROTO(unsigned long hva),
161 TP_ARGS(hva),
162
163 TP_STRUCT__entry(
164 __field( unsigned long, hva )
165 ),
166
167 TP_fast_assign(
168 __entry->hva = hva;
169 ),
170
171 TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
172);
173
174TRACE_EVENT(kvm_unmap_hva_range,
175 TP_PROTO(unsigned long start, unsigned long end),
176 TP_ARGS(start, end),
177
178 TP_STRUCT__entry(
179 __field( unsigned long, start )
180 __field( unsigned long, end )
181 ),
182
183 TP_fast_assign(
184 __entry->start = start;
185 __entry->end = end;
186 ),
187
188 TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
189 __entry->start, __entry->end)
190);
191
192TRACE_EVENT(kvm_set_spte_hva,
193 TP_PROTO(unsigned long hva),
194 TP_ARGS(hva),
195
196 TP_STRUCT__entry(
197 __field( unsigned long, hva )
198 ),
199
200 TP_fast_assign(
201 __entry->hva = hva;
202 ),
203
204 TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
205);
206
207TRACE_EVENT(kvm_hvc,
208 TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
209 TP_ARGS(vcpu_pc, r0, imm),
210
211 TP_STRUCT__entry(
212 __field( unsigned long, vcpu_pc )
213 __field( unsigned long, r0 )
214 __field( unsigned long, imm )
215 ),
216
217 TP_fast_assign(
218 __entry->vcpu_pc = vcpu_pc;
219 __entry->r0 = r0;
220 __entry->imm = imm;
221 ),
222
223 TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx",
224 __entry->vcpu_pc, __entry->r0, __entry->imm)
225);
226
227#endif /* _TRACE_KVM_H */
228
229#undef TRACE_INCLUDE_PATH
230#define TRACE_INCLUDE_PATH arch/arm/kvm
231#undef TRACE_INCLUDE_FILE
232#define TRACE_INCLUDE_FILE trace
233
234/* This part must be outside protection */
235#include <trace/define_trace.h>
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
new file mode 100644
index 000000000000..17c5ac7d10ed
--- /dev/null
+++ b/arch/arm/kvm/vgic.c
@@ -0,0 +1,1499 @@
1/*
2 * Copyright (C) 2012 ARM Ltd.
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <linux/cpu.h>
20#include <linux/kvm.h>
21#include <linux/kvm_host.h>
22#include <linux/interrupt.h>
23#include <linux/io.h>
24#include <linux/of.h>
25#include <linux/of_address.h>
26#include <linux/of_irq.h>
27
28#include <linux/irqchip/arm-gic.h>
29
30#include <asm/kvm_emulate.h>
31#include <asm/kvm_arm.h>
32#include <asm/kvm_mmu.h>
33
34/*
35 * How the whole thing works (courtesy of Christoffer Dall):
36 *
37 * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
38 * something is pending
39 * - VGIC pending interrupts are stored on the vgic.irq_state vgic
40 * bitmap (this bitmap is updated by both user land ioctls and guest
41 * mmio ops, and other in-kernel peripherals such as the
42 * arch. timers) and indicate the 'wire' state.
43 * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
44 * recalculated
45 * - To calculate the oracle, we need info for each cpu from
46 * compute_pending_for_cpu, which considers:
47 * - PPI: dist->irq_state & dist->irq_enable
48 * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
49 * - irq_spi_target is a 'formatted' version of the GICD_ICFGR
50 * registers, stored on each vcpu. We only keep one bit of
51 * information per interrupt, making sure that only one vcpu can
52 * accept the interrupt.
53 * - The same is true when injecting an interrupt, except that we only
54 * consider a single interrupt at a time. The irq_spi_cpu array
55 * contains the target CPU for each SPI.
56 *
57 * The handling of level interrupts adds some extra complexity. We
58 * need to track when the interrupt has been EOIed, so we can sample
59 * the 'line' again. This is achieved as such:
60 *
61 * - When a level interrupt is moved onto a vcpu, the corresponding
62 * bit in irq_active is set. As long as this bit is set, the line
63 * will be ignored for further interrupts. The interrupt is injected
64 * into the vcpu with the GICH_LR_EOI bit set (generate a
65 * maintenance interrupt on EOI).
66 * - When the interrupt is EOIed, the maintenance interrupt fires,
67 * and clears the corresponding bit in irq_active. This allow the
68 * interrupt line to be sampled again.
69 */
70
71#define VGIC_ADDR_UNDEF (-1)
72#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
73
74/* Physical address of vgic virtual cpu interface */
75static phys_addr_t vgic_vcpu_base;
76
77/* Virtual control interface base address */
78static void __iomem *vgic_vctrl_base;
79
80static struct device_node *vgic_node;
81
82#define ACCESS_READ_VALUE (1 << 0)
83#define ACCESS_READ_RAZ (0 << 0)
84#define ACCESS_READ_MASK(x) ((x) & (1 << 0))
85#define ACCESS_WRITE_IGNORED (0 << 1)
86#define ACCESS_WRITE_SETBIT (1 << 1)
87#define ACCESS_WRITE_CLEARBIT (2 << 1)
88#define ACCESS_WRITE_VALUE (3 << 1)
89#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1))
90
91static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
92static void vgic_update_state(struct kvm *kvm);
93static void vgic_kick_vcpus(struct kvm *kvm);
94static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
95static u32 vgic_nr_lr;
96
97static unsigned int vgic_maint_irq;
98
99static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
100 int cpuid, u32 offset)
101{
102 offset >>= 2;
103 if (!offset)
104 return x->percpu[cpuid].reg;
105 else
106 return x->shared.reg + offset - 1;
107}
108
109static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
110 int cpuid, int irq)
111{
112 if (irq < VGIC_NR_PRIVATE_IRQS)
113 return test_bit(irq, x->percpu[cpuid].reg_ul);
114
115 return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
116}
117
118static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
119 int irq, int val)
120{
121 unsigned long *reg;
122
123 if (irq < VGIC_NR_PRIVATE_IRQS) {
124 reg = x->percpu[cpuid].reg_ul;
125 } else {
126 reg = x->shared.reg_ul;
127 irq -= VGIC_NR_PRIVATE_IRQS;
128 }
129
130 if (val)
131 set_bit(irq, reg);
132 else
133 clear_bit(irq, reg);
134}
135
136static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
137{
138 if (unlikely(cpuid >= VGIC_MAX_CPUS))
139 return NULL;
140 return x->percpu[cpuid].reg_ul;
141}
142
143static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
144{
145 return x->shared.reg_ul;
146}
147
148static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
149{
150 offset >>= 2;
151 BUG_ON(offset > (VGIC_NR_IRQS / 4));
152 if (offset < 4)
153 return x->percpu[cpuid] + offset;
154 else
155 return x->shared + offset - 8;
156}
157
158#define VGIC_CFG_LEVEL 0
159#define VGIC_CFG_EDGE 1
160
161static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
162{
163 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
164 int irq_val;
165
166 irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
167 return irq_val == VGIC_CFG_EDGE;
168}
169
170static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
171{
172 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
173
174 return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
175}
176
177static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
178{
179 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
180
181 return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
182}
183
184static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
185{
186 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
187
188 vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
189}
190
191static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
192{
193 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
194
195 vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
196}
197
198static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
199{
200 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
201
202 return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
203}
204
205static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
206{
207 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
208
209 vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
210}
211
212static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
213{
214 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
215
216 vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
217}
218
219static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
220{
221 if (irq < VGIC_NR_PRIVATE_IRQS)
222 set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
223 else
224 set_bit(irq - VGIC_NR_PRIVATE_IRQS,
225 vcpu->arch.vgic_cpu.pending_shared);
226}
227
228static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
229{
230 if (irq < VGIC_NR_PRIVATE_IRQS)
231 clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
232 else
233 clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
234 vcpu->arch.vgic_cpu.pending_shared);
235}
236
237static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
238{
239 return *((u32 *)mmio->data) & mask;
240}
241
242static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
243{
244 *((u32 *)mmio->data) = value & mask;
245}
246
247/**
248 * vgic_reg_access - access vgic register
249 * @mmio: pointer to the data describing the mmio access
250 * @reg: pointer to the virtual backing of vgic distributor data
251 * @offset: least significant 2 bits used for word offset
252 * @mode: ACCESS_ mode (see defines above)
253 *
254 * Helper to make vgic register access easier using one of the access
255 * modes defined for vgic register access
256 * (read,raz,write-ignored,setbit,clearbit,write)
257 */
258static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
259 phys_addr_t offset, int mode)
260{
261 int word_offset = (offset & 3) * 8;
262 u32 mask = (1UL << (mmio->len * 8)) - 1;
263 u32 regval;
264
265 /*
266 * Any alignment fault should have been delivered to the guest
267 * directly (ARM ARM B3.12.7 "Prioritization of aborts").
268 */
269
270 if (reg) {
271 regval = *reg;
272 } else {
273 BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
274 regval = 0;
275 }
276
277 if (mmio->is_write) {
278 u32 data = mmio_data_read(mmio, mask) << word_offset;
279 switch (ACCESS_WRITE_MASK(mode)) {
280 case ACCESS_WRITE_IGNORED:
281 return;
282
283 case ACCESS_WRITE_SETBIT:
284 regval |= data;
285 break;
286
287 case ACCESS_WRITE_CLEARBIT:
288 regval &= ~data;
289 break;
290
291 case ACCESS_WRITE_VALUE:
292 regval = (regval & ~(mask << word_offset)) | data;
293 break;
294 }
295 *reg = regval;
296 } else {
297 switch (ACCESS_READ_MASK(mode)) {
298 case ACCESS_READ_RAZ:
299 regval = 0;
300 /* fall through */
301
302 case ACCESS_READ_VALUE:
303 mmio_data_write(mmio, mask, regval >> word_offset);
304 }
305 }
306}
307
308static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
309 struct kvm_exit_mmio *mmio, phys_addr_t offset)
310{
311 u32 reg;
312 u32 word_offset = offset & 3;
313
314 switch (offset & ~3) {
315 case 0: /* CTLR */
316 reg = vcpu->kvm->arch.vgic.enabled;
317 vgic_reg_access(mmio, &reg, word_offset,
318 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
319 if (mmio->is_write) {
320 vcpu->kvm->arch.vgic.enabled = reg & 1;
321 vgic_update_state(vcpu->kvm);
322 return true;
323 }
324 break;
325
326 case 4: /* TYPER */
327 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
328 reg |= (VGIC_NR_IRQS >> 5) - 1;
329 vgic_reg_access(mmio, &reg, word_offset,
330 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
331 break;
332
333 case 8: /* IIDR */
334 reg = 0x4B00043B;
335 vgic_reg_access(mmio, &reg, word_offset,
336 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
337 break;
338 }
339
340 return false;
341}
342
343static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
344 struct kvm_exit_mmio *mmio, phys_addr_t offset)
345{
346 vgic_reg_access(mmio, NULL, offset,
347 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
348 return false;
349}
350
351static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
352 struct kvm_exit_mmio *mmio,
353 phys_addr_t offset)
354{
355 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
356 vcpu->vcpu_id, offset);
357 vgic_reg_access(mmio, reg, offset,
358 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
359 if (mmio->is_write) {
360 vgic_update_state(vcpu->kvm);
361 return true;
362 }
363
364 return false;
365}
366
367static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
368 struct kvm_exit_mmio *mmio,
369 phys_addr_t offset)
370{
371 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
372 vcpu->vcpu_id, offset);
373 vgic_reg_access(mmio, reg, offset,
374 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
375 if (mmio->is_write) {
376 if (offset < 4) /* Force SGI enabled */
377 *reg |= 0xffff;
378 vgic_retire_disabled_irqs(vcpu);
379 vgic_update_state(vcpu->kvm);
380 return true;
381 }
382
383 return false;
384}
385
386static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
387 struct kvm_exit_mmio *mmio,
388 phys_addr_t offset)
389{
390 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
391 vcpu->vcpu_id, offset);
392 vgic_reg_access(mmio, reg, offset,
393 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
394 if (mmio->is_write) {
395 vgic_update_state(vcpu->kvm);
396 return true;
397 }
398
399 return false;
400}
401
402static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
403 struct kvm_exit_mmio *mmio,
404 phys_addr_t offset)
405{
406 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
407 vcpu->vcpu_id, offset);
408 vgic_reg_access(mmio, reg, offset,
409 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
410 if (mmio->is_write) {
411 vgic_update_state(vcpu->kvm);
412 return true;
413 }
414
415 return false;
416}
417
418static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
419 struct kvm_exit_mmio *mmio,
420 phys_addr_t offset)
421{
422 u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
423 vcpu->vcpu_id, offset);
424 vgic_reg_access(mmio, reg, offset,
425 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
426 return false;
427}
428
429#define GICD_ITARGETSR_SIZE 32
430#define GICD_CPUTARGETS_BITS 8
431#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
432static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
433{
434 struct vgic_dist *dist = &kvm->arch.vgic;
435 struct kvm_vcpu *vcpu;
436 int i, c;
437 unsigned long *bmap;
438 u32 val = 0;
439
440 irq -= VGIC_NR_PRIVATE_IRQS;
441
442 kvm_for_each_vcpu(c, vcpu, kvm) {
443 bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
444 for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
445 if (test_bit(irq + i, bmap))
446 val |= 1 << (c + i * 8);
447 }
448
449 return val;
450}
451
452static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
453{
454 struct vgic_dist *dist = &kvm->arch.vgic;
455 struct kvm_vcpu *vcpu;
456 int i, c;
457 unsigned long *bmap;
458 u32 target;
459
460 irq -= VGIC_NR_PRIVATE_IRQS;
461
462 /*
463 * Pick the LSB in each byte. This ensures we target exactly
464 * one vcpu per IRQ. If the byte is null, assume we target
465 * CPU0.
466 */
467 for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
468 int shift = i * GICD_CPUTARGETS_BITS;
469 target = ffs((val >> shift) & 0xffU);
470 target = target ? (target - 1) : 0;
471 dist->irq_spi_cpu[irq + i] = target;
472 kvm_for_each_vcpu(c, vcpu, kvm) {
473 bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
474 if (c == target)
475 set_bit(irq + i, bmap);
476 else
477 clear_bit(irq + i, bmap);
478 }
479 }
480}
481
482static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
483 struct kvm_exit_mmio *mmio,
484 phys_addr_t offset)
485{
486 u32 reg;
487
488 /* We treat the banked interrupts targets as read-only */
489 if (offset < 32) {
490 u32 roreg = 1 << vcpu->vcpu_id;
491 roreg |= roreg << 8;
492 roreg |= roreg << 16;
493
494 vgic_reg_access(mmio, &roreg, offset,
495 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
496 return false;
497 }
498
499 reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
500 vgic_reg_access(mmio, &reg, offset,
501 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
502 if (mmio->is_write) {
503 vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
504 vgic_update_state(vcpu->kvm);
505 return true;
506 }
507
508 return false;
509}
510
511static u32 vgic_cfg_expand(u16 val)
512{
513 u32 res = 0;
514 int i;
515
516 /*
517 * Turn a 16bit value like abcd...mnop into a 32bit word
518 * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
519 */
520 for (i = 0; i < 16; i++)
521 res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
522
523 return res;
524}
525
526static u16 vgic_cfg_compress(u32 val)
527{
528 u16 res = 0;
529 int i;
530
531 /*
532 * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
533 * abcd...mnop which is what we really care about.
534 */
535 for (i = 0; i < 16; i++)
536 res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
537
538 return res;
539}
540
541/*
542 * The distributor uses 2 bits per IRQ for the CFG register, but the
543 * LSB is always 0. As such, we only keep the upper bit, and use the
544 * two above functions to compress/expand the bits
545 */
546static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
547 struct kvm_exit_mmio *mmio, phys_addr_t offset)
548{
549 u32 val;
550 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
551 vcpu->vcpu_id, offset >> 1);
552 if (offset & 2)
553 val = *reg >> 16;
554 else
555 val = *reg & 0xffff;
556
557 val = vgic_cfg_expand(val);
558 vgic_reg_access(mmio, &val, offset,
559 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
560 if (mmio->is_write) {
561 if (offset < 4) {
562 *reg = ~0U; /* Force PPIs/SGIs to 1 */
563 return false;
564 }
565
566 val = vgic_cfg_compress(val);
567 if (offset & 2) {
568 *reg &= 0xffff;
569 *reg |= val << 16;
570 } else {
571 *reg &= 0xffff << 16;
572 *reg |= val;
573 }
574 }
575
576 return false;
577}
578
579static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
580 struct kvm_exit_mmio *mmio, phys_addr_t offset)
581{
582 u32 reg;
583 vgic_reg_access(mmio, &reg, offset,
584 ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
585 if (mmio->is_write) {
586 vgic_dispatch_sgi(vcpu, reg);
587 vgic_update_state(vcpu->kvm);
588 return true;
589 }
590
591 return false;
592}
593
594/*
595 * I would have liked to use the kvm_bus_io_*() API instead, but it
596 * cannot cope with banked registers (only the VM pointer is passed
597 * around, and we need the vcpu). One of these days, someone please
598 * fix it!
599 */
600struct mmio_range {
601 phys_addr_t base;
602 unsigned long len;
603 bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
604 phys_addr_t offset);
605};
606
607static const struct mmio_range vgic_ranges[] = {
608 {
609 .base = GIC_DIST_CTRL,
610 .len = 12,
611 .handle_mmio = handle_mmio_misc,
612 },
613 {
614 .base = GIC_DIST_IGROUP,
615 .len = VGIC_NR_IRQS / 8,
616 .handle_mmio = handle_mmio_raz_wi,
617 },
618 {
619 .base = GIC_DIST_ENABLE_SET,
620 .len = VGIC_NR_IRQS / 8,
621 .handle_mmio = handle_mmio_set_enable_reg,
622 },
623 {
624 .base = GIC_DIST_ENABLE_CLEAR,
625 .len = VGIC_NR_IRQS / 8,
626 .handle_mmio = handle_mmio_clear_enable_reg,
627 },
628 {
629 .base = GIC_DIST_PENDING_SET,
630 .len = VGIC_NR_IRQS / 8,
631 .handle_mmio = handle_mmio_set_pending_reg,
632 },
633 {
634 .base = GIC_DIST_PENDING_CLEAR,
635 .len = VGIC_NR_IRQS / 8,
636 .handle_mmio = handle_mmio_clear_pending_reg,
637 },
638 {
639 .base = GIC_DIST_ACTIVE_SET,
640 .len = VGIC_NR_IRQS / 8,
641 .handle_mmio = handle_mmio_raz_wi,
642 },
643 {
644 .base = GIC_DIST_ACTIVE_CLEAR,
645 .len = VGIC_NR_IRQS / 8,
646 .handle_mmio = handle_mmio_raz_wi,
647 },
648 {
649 .base = GIC_DIST_PRI,
650 .len = VGIC_NR_IRQS,
651 .handle_mmio = handle_mmio_priority_reg,
652 },
653 {
654 .base = GIC_DIST_TARGET,
655 .len = VGIC_NR_IRQS,
656 .handle_mmio = handle_mmio_target_reg,
657 },
658 {
659 .base = GIC_DIST_CONFIG,
660 .len = VGIC_NR_IRQS / 4,
661 .handle_mmio = handle_mmio_cfg_reg,
662 },
663 {
664 .base = GIC_DIST_SOFTINT,
665 .len = 4,
666 .handle_mmio = handle_mmio_sgi_reg,
667 },
668 {}
669};
670
671static const
672struct mmio_range *find_matching_range(const struct mmio_range *ranges,
673 struct kvm_exit_mmio *mmio,
674 phys_addr_t base)
675{
676 const struct mmio_range *r = ranges;
677 phys_addr_t addr = mmio->phys_addr - base;
678
679 while (r->len) {
680 if (addr >= r->base &&
681 (addr + mmio->len) <= (r->base + r->len))
682 return r;
683 r++;
684 }
685
686 return NULL;
687}
688
689/**
690 * vgic_handle_mmio - handle an in-kernel MMIO access
691 * @vcpu: pointer to the vcpu performing the access
692 * @run: pointer to the kvm_run structure
693 * @mmio: pointer to the data describing the access
694 *
695 * returns true if the MMIO access has been performed in kernel space,
696 * and false if it needs to be emulated in user space.
697 */
698bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
699 struct kvm_exit_mmio *mmio)
700{
701 const struct mmio_range *range;
702 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
703 unsigned long base = dist->vgic_dist_base;
704 bool updated_state;
705 unsigned long offset;
706
707 if (!irqchip_in_kernel(vcpu->kvm) ||
708 mmio->phys_addr < base ||
709 (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
710 return false;
711
712 /* We don't support ldrd / strd or ldm / stm to the emulated vgic */
713 if (mmio->len > 4) {
714 kvm_inject_dabt(vcpu, mmio->phys_addr);
715 return true;
716 }
717
718 range = find_matching_range(vgic_ranges, mmio, base);
719 if (unlikely(!range || !range->handle_mmio)) {
720 pr_warn("Unhandled access %d %08llx %d\n",
721 mmio->is_write, mmio->phys_addr, mmio->len);
722 return false;
723 }
724
725 spin_lock(&vcpu->kvm->arch.vgic.lock);
726 offset = mmio->phys_addr - range->base - base;
727 updated_state = range->handle_mmio(vcpu, mmio, offset);
728 spin_unlock(&vcpu->kvm->arch.vgic.lock);
729 kvm_prepare_mmio(run, mmio);
730 kvm_handle_mmio_return(vcpu, run);
731
732 if (updated_state)
733 vgic_kick_vcpus(vcpu->kvm);
734
735 return true;
736}
737
738static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
739{
740 struct kvm *kvm = vcpu->kvm;
741 struct vgic_dist *dist = &kvm->arch.vgic;
742 int nrcpus = atomic_read(&kvm->online_vcpus);
743 u8 target_cpus;
744 int sgi, mode, c, vcpu_id;
745
746 vcpu_id = vcpu->vcpu_id;
747
748 sgi = reg & 0xf;
749 target_cpus = (reg >> 16) & 0xff;
750 mode = (reg >> 24) & 3;
751
752 switch (mode) {
753 case 0:
754 if (!target_cpus)
755 return;
756
757 case 1:
758 target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
759 break;
760
761 case 2:
762 target_cpus = 1 << vcpu_id;
763 break;
764 }
765
766 kvm_for_each_vcpu(c, vcpu, kvm) {
767 if (target_cpus & 1) {
768 /* Flag the SGI as pending */
769 vgic_dist_irq_set(vcpu, sgi);
770 dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
771 kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
772 }
773
774 target_cpus >>= 1;
775 }
776}
777
778static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
779{
780 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
781 unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
782 unsigned long pending_private, pending_shared;
783 int vcpu_id;
784
785 vcpu_id = vcpu->vcpu_id;
786 pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
787 pend_shared = vcpu->arch.vgic_cpu.pending_shared;
788
789 pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
790 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
791 bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
792
793 pending = vgic_bitmap_get_shared_map(&dist->irq_state);
794 enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
795 bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
796 bitmap_and(pend_shared, pend_shared,
797 vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
798 VGIC_NR_SHARED_IRQS);
799
800 pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
801 pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
802 return (pending_private < VGIC_NR_PRIVATE_IRQS ||
803 pending_shared < VGIC_NR_SHARED_IRQS);
804}
805
806/*
807 * Update the interrupt state and determine which CPUs have pending
808 * interrupts. Must be called with distributor lock held.
809 */
810static void vgic_update_state(struct kvm *kvm)
811{
812 struct vgic_dist *dist = &kvm->arch.vgic;
813 struct kvm_vcpu *vcpu;
814 int c;
815
816 if (!dist->enabled) {
817 set_bit(0, &dist->irq_pending_on_cpu);
818 return;
819 }
820
821 kvm_for_each_vcpu(c, vcpu, kvm) {
822 if (compute_pending_for_cpu(vcpu)) {
823 pr_debug("CPU%d has pending interrupts\n", c);
824 set_bit(c, &dist->irq_pending_on_cpu);
825 }
826 }
827}
828
829#define LR_CPUID(lr) \
830 (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
831#define MK_LR_PEND(src, irq) \
832 (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
833
834/*
835 * An interrupt may have been disabled after being made pending on the
836 * CPU interface (the classic case is a timer running while we're
837 * rebooting the guest - the interrupt would kick as soon as the CPU
838 * interface gets enabled, with deadly consequences).
839 *
840 * The solution is to examine already active LRs, and check the
841 * interrupt is still enabled. If not, just retire it.
842 */
843static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
844{
845 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
846 int lr;
847
848 for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
849 int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
850
851 if (!vgic_irq_is_enabled(vcpu, irq)) {
852 vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
853 clear_bit(lr, vgic_cpu->lr_used);
854 vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE;
855 if (vgic_irq_is_active(vcpu, irq))
856 vgic_irq_clear_active(vcpu, irq);
857 }
858 }
859}
860
861/*
862 * Queue an interrupt to a CPU virtual interface. Return true on success,
863 * or false if it wasn't possible to queue it.
864 */
865static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
866{
867 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
868 int lr;
869
870 /* Sanitize the input... */
871 BUG_ON(sgi_source_id & ~7);
872 BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
873 BUG_ON(irq >= VGIC_NR_IRQS);
874
875 kvm_debug("Queue IRQ%d\n", irq);
876
877 lr = vgic_cpu->vgic_irq_lr_map[irq];
878
879 /* Do we have an active interrupt for the same CPUID? */
880 if (lr != LR_EMPTY &&
881 (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) {
882 kvm_debug("LR%d piggyback for IRQ%d %x\n",
883 lr, irq, vgic_cpu->vgic_lr[lr]);
884 BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
885 vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
886 return true;
887 }
888
889 /* Try to use another LR for this interrupt */
890 lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
891 vgic_cpu->nr_lr);
892 if (lr >= vgic_cpu->nr_lr)
893 return false;
894
895 kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
896 vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
897 vgic_cpu->vgic_irq_lr_map[irq] = lr;
898 set_bit(lr, vgic_cpu->lr_used);
899
900 if (!vgic_irq_is_edge(vcpu, irq))
901 vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
902
903 return true;
904}
905
906static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
907{
908 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
909 unsigned long sources;
910 int vcpu_id = vcpu->vcpu_id;
911 int c;
912
913 sources = dist->irq_sgi_sources[vcpu_id][irq];
914
915 for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
916 if (vgic_queue_irq(vcpu, c, irq))
917 clear_bit(c, &sources);
918 }
919
920 dist->irq_sgi_sources[vcpu_id][irq] = sources;
921
922 /*
923 * If the sources bitmap has been cleared it means that we
924 * could queue all the SGIs onto link registers (see the
925 * clear_bit above), and therefore we are done with them in
926 * our emulated gic and can get rid of them.
927 */
928 if (!sources) {
929 vgic_dist_irq_clear(vcpu, irq);
930 vgic_cpu_irq_clear(vcpu, irq);
931 return true;
932 }
933
934 return false;
935}
936
937static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
938{
939 if (vgic_irq_is_active(vcpu, irq))
940 return true; /* level interrupt, already queued */
941
942 if (vgic_queue_irq(vcpu, 0, irq)) {
943 if (vgic_irq_is_edge(vcpu, irq)) {
944 vgic_dist_irq_clear(vcpu, irq);
945 vgic_cpu_irq_clear(vcpu, irq);
946 } else {
947 vgic_irq_set_active(vcpu, irq);
948 }
949
950 return true;
951 }
952
953 return false;
954}
955
956/*
957 * Fill the list registers with pending interrupts before running the
958 * guest.
959 */
960static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
961{
962 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
963 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
964 int i, vcpu_id;
965 int overflow = 0;
966
967 vcpu_id = vcpu->vcpu_id;
968
969 /*
970 * We may not have any pending interrupt, or the interrupts
971 * may have been serviced from another vcpu. In all cases,
972 * move along.
973 */
974 if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
975 pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
976 goto epilog;
977 }
978
979 /* SGIs */
980 for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
981 if (!vgic_queue_sgi(vcpu, i))
982 overflow = 1;
983 }
984
985 /* PPIs */
986 for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
987 if (!vgic_queue_hwirq(vcpu, i))
988 overflow = 1;
989 }
990
991 /* SPIs */
992 for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
993 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
994 overflow = 1;
995 }
996
997epilog:
998 if (overflow) {
999 vgic_cpu->vgic_hcr |= GICH_HCR_UIE;
1000 } else {
1001 vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
1002 /*
1003 * We're about to run this VCPU, and we've consumed
1004 * everything the distributor had in store for
1005 * us. Claim we don't have anything pending. We'll
1006 * adjust that if needed while exiting.
1007 */
1008 clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
1009 }
1010}
1011
1012static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1013{
1014 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1015 bool level_pending = false;
1016
1017 kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
1018
1019 if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
1020 /*
1021 * Some level interrupts have been EOIed. Clear their
1022 * active bit.
1023 */
1024 int lr, irq;
1025
1026 for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
1027 vgic_cpu->nr_lr) {
1028 irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
1029
1030 vgic_irq_clear_active(vcpu, irq);
1031 vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI;
1032
1033 /* Any additional pending interrupt? */
1034 if (vgic_dist_irq_is_pending(vcpu, irq)) {
1035 vgic_cpu_irq_set(vcpu, irq);
1036 level_pending = true;
1037 } else {
1038 vgic_cpu_irq_clear(vcpu, irq);
1039 }
1040
1041 /*
1042 * Despite being EOIed, the LR may not have
1043 * been marked as empty.
1044 */
1045 set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
1046 vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
1047 }
1048 }
1049
1050 if (vgic_cpu->vgic_misr & GICH_MISR_U)
1051 vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
1052
1053 return level_pending;
1054}
1055
1056/*
1057 * Sync back the VGIC state after a guest run. The distributor lock is
1058 * needed so we don't get preempted in the middle of the state processing.
1059 */
1060static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1061{
1062 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1063 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1064 int lr, pending;
1065 bool level_pending;
1066
1067 level_pending = vgic_process_maintenance(vcpu);
1068
1069 /* Clear mappings for empty LRs */
1070 for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
1071 vgic_cpu->nr_lr) {
1072 int irq;
1073
1074 if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
1075 continue;
1076
1077 irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
1078
1079 BUG_ON(irq >= VGIC_NR_IRQS);
1080 vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
1081 }
1082
1083 /* Check if we still have something up our sleeve... */
1084 pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
1085 vgic_cpu->nr_lr);
1086 if (level_pending || pending < vgic_cpu->nr_lr)
1087 set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
1088}
1089
1090void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1091{
1092 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1093
1094 if (!irqchip_in_kernel(vcpu->kvm))
1095 return;
1096
1097 spin_lock(&dist->lock);
1098 __kvm_vgic_flush_hwstate(vcpu);
1099 spin_unlock(&dist->lock);
1100}
1101
1102void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1103{
1104 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1105
1106 if (!irqchip_in_kernel(vcpu->kvm))
1107 return;
1108
1109 spin_lock(&dist->lock);
1110 __kvm_vgic_sync_hwstate(vcpu);
1111 spin_unlock(&dist->lock);
1112}
1113
1114int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1115{
1116 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1117
1118 if (!irqchip_in_kernel(vcpu->kvm))
1119 return 0;
1120
1121 return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
1122}
1123
1124static void vgic_kick_vcpus(struct kvm *kvm)
1125{
1126 struct kvm_vcpu *vcpu;
1127 int c;
1128
1129 /*
1130 * We've injected an interrupt, time to find out who deserves
1131 * a good kick...
1132 */
1133 kvm_for_each_vcpu(c, vcpu, kvm) {
1134 if (kvm_vgic_vcpu_pending_irq(vcpu))
1135 kvm_vcpu_kick(vcpu);
1136 }
1137}
1138
1139static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1140{
1141 int is_edge = vgic_irq_is_edge(vcpu, irq);
1142 int state = vgic_dist_irq_is_pending(vcpu, irq);
1143
1144 /*
1145 * Only inject an interrupt if:
1146 * - edge triggered and we have a rising edge
1147 * - level triggered and we change level
1148 */
1149 if (is_edge)
1150 return level > state;
1151 else
1152 return level != state;
1153}
1154
1155static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
1156 unsigned int irq_num, bool level)
1157{
1158 struct vgic_dist *dist = &kvm->arch.vgic;
1159 struct kvm_vcpu *vcpu;
1160 int is_edge, is_level;
1161 int enabled;
1162 bool ret = true;
1163
1164 spin_lock(&dist->lock);
1165
1166 vcpu = kvm_get_vcpu(kvm, cpuid);
1167 is_edge = vgic_irq_is_edge(vcpu, irq_num);
1168 is_level = !is_edge;
1169
1170 if (!vgic_validate_injection(vcpu, irq_num, level)) {
1171 ret = false;
1172 goto out;
1173 }
1174
1175 if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
1176 cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
1177 vcpu = kvm_get_vcpu(kvm, cpuid);
1178 }
1179
1180 kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
1181
1182 if (level)
1183 vgic_dist_irq_set(vcpu, irq_num);
1184 else
1185 vgic_dist_irq_clear(vcpu, irq_num);
1186
1187 enabled = vgic_irq_is_enabled(vcpu, irq_num);
1188
1189 if (!enabled) {
1190 ret = false;
1191 goto out;
1192 }
1193
1194 if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
1195 /*
1196 * Level interrupt in progress, will be picked up
1197 * when EOId.
1198 */
1199 ret = false;
1200 goto out;
1201 }
1202
1203 if (level) {
1204 vgic_cpu_irq_set(vcpu, irq_num);
1205 set_bit(cpuid, &dist->irq_pending_on_cpu);
1206 }
1207
1208out:
1209 spin_unlock(&dist->lock);
1210
1211 return ret;
1212}
1213
1214/**
1215 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
1216 * @kvm: The VM structure pointer
1217 * @cpuid: The CPU for PPIs
1218 * @irq_num: The IRQ number that is assigned to the device
1219 * @level: Edge-triggered: true: to trigger the interrupt
1220 * false: to ignore the call
1221 * Level-sensitive true: activates an interrupt
1222 * false: deactivates an interrupt
1223 *
1224 * The GIC is not concerned with devices being active-LOW or active-HIGH for
1225 * level-sensitive interrupts. You can think of the level parameter as 1
1226 * being HIGH and 0 being LOW and all devices being active-HIGH.
1227 */
1228int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1229 bool level)
1230{
1231 if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
1232 vgic_kick_vcpus(kvm);
1233
1234 return 0;
1235}
1236
1237static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1238{
1239 /*
1240 * We cannot rely on the vgic maintenance interrupt to be
1241 * delivered synchronously. This means we can only use it to
1242 * exit the VM, and we perform the handling of EOIed
1243 * interrupts on the exit path (see vgic_process_maintenance).
1244 */
1245 return IRQ_HANDLED;
1246}
1247
1248int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1249{
1250 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1251 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1252 int i;
1253
1254 if (!irqchip_in_kernel(vcpu->kvm))
1255 return 0;
1256
1257 if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
1258 return -EBUSY;
1259
1260 for (i = 0; i < VGIC_NR_IRQS; i++) {
1261 if (i < VGIC_NR_PPIS)
1262 vgic_bitmap_set_irq_val(&dist->irq_enabled,
1263 vcpu->vcpu_id, i, 1);
1264 if (i < VGIC_NR_PRIVATE_IRQS)
1265 vgic_bitmap_set_irq_val(&dist->irq_cfg,
1266 vcpu->vcpu_id, i, VGIC_CFG_EDGE);
1267
1268 vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
1269 }
1270
1271 /*
1272 * By forcing VMCR to zero, the GIC will restore the binary
1273 * points to their reset values. Anything else resets to zero
1274 * anyway.
1275 */
1276 vgic_cpu->vgic_vmcr = 0;
1277
1278 vgic_cpu->nr_lr = vgic_nr_lr;
1279 vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
1280
1281 return 0;
1282}
1283
1284static void vgic_init_maintenance_interrupt(void *info)
1285{
1286 enable_percpu_irq(vgic_maint_irq, 0);
1287}
1288
1289static int vgic_cpu_notify(struct notifier_block *self,
1290 unsigned long action, void *cpu)
1291{
1292 switch (action) {
1293 case CPU_STARTING:
1294 case CPU_STARTING_FROZEN:
1295 vgic_init_maintenance_interrupt(NULL);
1296 break;
1297 case CPU_DYING:
1298 case CPU_DYING_FROZEN:
1299 disable_percpu_irq(vgic_maint_irq);
1300 break;
1301 }
1302
1303 return NOTIFY_OK;
1304}
1305
1306static struct notifier_block vgic_cpu_nb = {
1307 .notifier_call = vgic_cpu_notify,
1308};
1309
1310int kvm_vgic_hyp_init(void)
1311{
1312 int ret;
1313 struct resource vctrl_res;
1314 struct resource vcpu_res;
1315
1316 vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
1317 if (!vgic_node) {
1318 kvm_err("error: no compatible vgic node in DT\n");
1319 return -ENODEV;
1320 }
1321
1322 vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
1323 if (!vgic_maint_irq) {
1324 kvm_err("error getting vgic maintenance irq from DT\n");
1325 ret = -ENXIO;
1326 goto out;
1327 }
1328
1329 ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
1330 "vgic", kvm_get_running_vcpus());
1331 if (ret) {
1332 kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
1333 goto out;
1334 }
1335
1336 ret = register_cpu_notifier(&vgic_cpu_nb);
1337 if (ret) {
1338 kvm_err("Cannot register vgic CPU notifier\n");
1339 goto out_free_irq;
1340 }
1341
1342 ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
1343 if (ret) {
1344 kvm_err("Cannot obtain VCTRL resource\n");
1345 goto out_free_irq;
1346 }
1347
1348 vgic_vctrl_base = of_iomap(vgic_node, 2);
1349 if (!vgic_vctrl_base) {
1350 kvm_err("Cannot ioremap VCTRL\n");
1351 ret = -ENOMEM;
1352 goto out_free_irq;
1353 }
1354
1355 vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
1356 vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
1357
1358 ret = create_hyp_io_mappings(vgic_vctrl_base,
1359 vgic_vctrl_base + resource_size(&vctrl_res),
1360 vctrl_res.start);
1361 if (ret) {
1362 kvm_err("Cannot map VCTRL into hyp\n");
1363 goto out_unmap;
1364 }
1365
1366 kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
1367 vctrl_res.start, vgic_maint_irq);
1368 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
1369
1370 if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
1371 kvm_err("Cannot obtain VCPU resource\n");
1372 ret = -ENXIO;
1373 goto out_unmap;
1374 }
1375 vgic_vcpu_base = vcpu_res.start;
1376
1377 goto out;
1378
1379out_unmap:
1380 iounmap(vgic_vctrl_base);
1381out_free_irq:
1382 free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
1383out:
1384 of_node_put(vgic_node);
1385 return ret;
1386}
1387
1388int kvm_vgic_init(struct kvm *kvm)
1389{
1390 int ret = 0, i;
1391
1392 mutex_lock(&kvm->lock);
1393
1394 if (vgic_initialized(kvm))
1395 goto out;
1396
1397 if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
1398 IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
1399 kvm_err("Need to set vgic cpu and dist addresses first\n");
1400 ret = -ENXIO;
1401 goto out;
1402 }
1403
1404 ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
1405 vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
1406 if (ret) {
1407 kvm_err("Unable to remap VGIC CPU to VCPU\n");
1408 goto out;
1409 }
1410
1411 for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
1412 vgic_set_target_reg(kvm, 0, i);
1413
1414 kvm_timer_init(kvm);
1415 kvm->arch.vgic.ready = true;
1416out:
1417 mutex_unlock(&kvm->lock);
1418 return ret;
1419}
1420
1421int kvm_vgic_create(struct kvm *kvm)
1422{
1423 int ret = 0;
1424
1425 mutex_lock(&kvm->lock);
1426
1427 if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
1428 ret = -EEXIST;
1429 goto out;
1430 }
1431
1432 spin_lock_init(&kvm->arch.vgic.lock);
1433 kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
1434 kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
1435 kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
1436
1437out:
1438 mutex_unlock(&kvm->lock);
1439 return ret;
1440}
1441
1442static bool vgic_ioaddr_overlap(struct kvm *kvm)
1443{
1444 phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
1445 phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
1446
1447 if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
1448 return 0;
1449 if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
1450 (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
1451 return -EBUSY;
1452 return 0;
1453}
1454
1455static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
1456 phys_addr_t addr, phys_addr_t size)
1457{
1458 int ret;
1459
1460 if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
1461 return -EEXIST;
1462 if (addr + size < addr)
1463 return -EINVAL;
1464
1465 ret = vgic_ioaddr_overlap(kvm);
1466 if (ret)
1467 return ret;
1468 *ioaddr = addr;
1469 return ret;
1470}
1471
1472int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
1473{
1474 int r = 0;
1475 struct vgic_dist *vgic = &kvm->arch.vgic;
1476
1477 if (addr & ~KVM_PHYS_MASK)
1478 return -E2BIG;
1479
1480 if (addr & (SZ_4K - 1))
1481 return -EINVAL;
1482
1483 mutex_lock(&kvm->lock);
1484 switch (type) {
1485 case KVM_VGIC_V2_ADDR_TYPE_DIST:
1486 r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
1487 addr, KVM_VGIC_V2_DIST_SIZE);
1488 break;
1489 case KVM_VGIC_V2_ADDR_TYPE_CPU:
1490 r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
1491 addr, KVM_VGIC_V2_CPU_SIZE);
1492 break;
1493 default:
1494 r = -ENODEV;
1495 }
1496
1497 mutex_unlock(&kvm->lock);
1498 return r;
1499}