aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/Makefile2
-rw-r--r--arch/alpha/kernel/binfmt_loader.c51
-rw-r--r--arch/alpha/kernel/init_task.c1
-rw-r--r--arch/arm/kernel/init_task.c1
-rw-r--r--arch/avr32/kernel/init_task.c1
-rw-r--r--arch/blackfin/kernel/init_task.c1
-rw-r--r--arch/cris/kernel/process.c1
-rw-r--r--arch/frv/kernel/init_task.c1
-rw-r--r--arch/h8300/kernel/init_task.c1
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/include/asm/kvm.h6
-rw-r--r--arch/ia64/include/asm/kvm_host.h198
-rw-r--r--arch/ia64/kernel/init_task.c1
-rw-r--r--arch/ia64/kernel/time.c18
-rw-r--r--arch/ia64/kvm/Makefile6
-rw-r--r--arch/ia64/kvm/asm-offsets.c11
-rw-r--r--arch/ia64/kvm/kvm-ia64.c110
-rw-r--r--arch/ia64/kvm/kvm_lib.c15
-rw-r--r--arch/ia64/kvm/kvm_minstate.h4
-rw-r--r--arch/ia64/kvm/misc.h3
-rw-r--r--arch/ia64/kvm/mmio.c38
-rw-r--r--arch/ia64/kvm/process.c29
-rw-r--r--arch/ia64/kvm/vcpu.c76
-rw-r--r--arch/ia64/kvm/vcpu.h5
-rw-r--r--arch/ia64/kvm/vmm.c29
-rw-r--r--arch/ia64/kvm/vmm_ivt.S1469
-rw-r--r--arch/ia64/kvm/vtlb.c4
-rw-r--r--arch/m32r/kernel/init_task.c1
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/m68k/kernel/process.c1
-rw-r--r--arch/m68knommu/kernel/init_task.c1
-rw-r--r--arch/mips/kernel/init_task.c1
-rw-r--r--arch/mn10300/kernel/init_task.c1
-rw-r--r--arch/parisc/kernel/init_task.c1
-rw-r--r--arch/powerpc/include/asm/disassemble.h80
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h61
-rw-r--r--arch/powerpc/include/asm/kvm_host.h116
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h83
-rw-r--r--arch/powerpc/include/asm/mmu-44x.h1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c21
-rw-r--r--arch/powerpc/kernel/init_task.c1
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/time.c18
-rw-r--r--arch/powerpc/kvm/44x.c228
-rw-r--r--arch/powerpc/kvm/44x_emulate.c371
-rw-r--r--arch/powerpc/kvm/44x_tlb.c463
-rw-r--r--arch/powerpc/kvm/44x_tlb.h26
-rw-r--r--arch/powerpc/kvm/Kconfig28
-rw-r--r--arch/powerpc/kvm/Makefile12
-rw-r--r--arch/powerpc/kvm/booke.c (renamed from arch/powerpc/kvm/booke_guest.c)418
-rw-r--r--arch/powerpc/kvm/booke.h60
-rw-r--r--arch/powerpc/kvm/booke_host.c83
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S72
-rw-r--r--arch/powerpc/kvm/emulate.c447
-rw-r--r--arch/powerpc/kvm/powerpc.c130
-rw-r--r--arch/powerpc/kvm/timing.c239
-rw-r--r--arch/powerpc/kvm/timing.h102
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c2
-rw-r--r--arch/s390/include/asm/cpu.h7
-rw-r--r--arch/s390/include/asm/cputime.h42
-rw-r--r--arch/s390/include/asm/lowcore.h49
-rw-r--r--arch/s390/include/asm/system.h4
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/include/asm/timer.h16
-rw-r--r--arch/s390/include/asm/vdso.h15
-rw-r--r--arch/s390/kernel/asm-offsets.c5
-rw-r--r--arch/s390/kernel/entry.S5
-rw-r--r--arch/s390/kernel/entry64.S50
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/init_task.c1
-rw-r--r--arch/s390/kernel/process.c43
-rw-r--r--arch/s390/kernel/s390_ext.c2
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/smp.c34
-rw-r--r--arch/s390/kernel/vdso.c123
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S5
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S39
-rw-r--r--arch/s390/kernel/vtime.c486
-rw-r--r--arch/s390/kvm/kvm-s390.c41
-rw-r--r--arch/sh/kernel/init_task.c1
-rw-r--r--arch/sparc/kernel/init_task.c1
-rw-r--r--arch/um/kernel/init_task.c1
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/ia32/ia32_signal.c3
-rw-r--r--arch/x86/ia32/ipc32.c1
-rw-r--r--arch/x86/ia32/sys_ia32.c2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h61
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h47
-rw-r--r--arch/x86/include/asm/kvm_x86_emulate.h11
-rw-r--r--arch/x86/include/asm/mpspec.h2
-rw-r--r--arch/x86/include/asm/mtrr.h25
-rw-r--r--arch/x86/include/asm/pci_x86.h (renamed from arch/x86/pci/pci.h)17
-rw-r--r--arch/x86/include/asm/svm.h (renamed from arch/x86/kvm/svm.h)0
-rw-r--r--arch/x86/include/asm/sys_ia32.h101
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h46
-rw-r--r--arch/x86/include/asm/virtext.h132
-rw-r--r--arch/x86/include/asm/vmx.h (renamed from arch/x86/kvm/vmx.h)27
-rw-r--r--arch/x86/kernel/amd_iommu.c666
-rw-r--r--arch/x86/kernel/amd_iommu_init.c19
-rw-r--r--arch/x86/kernel/apic.c8
-rw-r--r--arch/x86/kernel/bios_uv.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h18
-rw-r--r--arch/x86/kernel/cpuid.c6
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/genx2apic_phys.c4
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic.c20
-rw-r--r--arch/x86/kernel/kvmclock.c10
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c3
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/reboot.c64
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/traps.c33
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/i8254.c19
-rw-r--r--arch/x86/kvm/i8259.c52
-rw-r--r--arch/x86/kvm/irq.h6
-rw-r--r--arch/x86/kvm/kvm_svm.h2
-rw-r--r--arch/x86/kvm/lapic.c58
-rw-r--r--arch/x86/kvm/mmu.c444
-rw-r--r--arch/x86/kvm/paging_tmpl.h44
-rw-r--r--arch/x86/kvm/svm.c48
-rw-r--r--arch/x86/kvm/vmx.c350
-rw-r--r--arch/x86/kvm/x86.c120
-rw-r--r--arch/x86/kvm/x86_emulate.c297
-rw-r--r--arch/x86/mach-default/setup.c15
-rw-r--r--arch/x86/mm/init_32.c8
-rw-r--r--arch/x86/pci/acpi.c2
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/common.c3
-rw-r--r--arch/x86/pci/direct.c2
-rw-r--r--arch/x86/pci/early.c2
-rw-r--r--arch/x86/pci/fixup.c3
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/pci/init.c2
-rw-r--r--arch/x86/pci/irq.c3
-rw-r--r--arch/x86/pci/legacy.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c3
-rw-r--r--arch/x86/pci/mmconfig_32.c2
-rw-r--r--arch/x86/pci/mmconfig_64.c3
-rw-r--r--arch/x86/pci/numaq_32.c2
-rw-r--r--arch/x86/pci/olpc.c2
-rw-r--r--arch/x86/pci/pcbios.c5
-rw-r--r--arch/x86/pci/visws.c3
-rw-r--r--arch/x86/xen/time.c10
-rw-r--r--arch/xtensa/kernel/init_task.c1
156 files changed, 5929 insertions, 3143 deletions
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index ac706c1d7ada..b4697759a123 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare
8 8
9obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ 9obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
10 irq_alpha.o signal.o setup.o ptrace.o time.o \ 10 irq_alpha.o signal.o setup.o ptrace.o time.o \
11 alpha_ksyms.o systbls.o err_common.o io.o 11 alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o
12 12
13obj-$(CONFIG_VGA_HOSE) += console.o 13obj-$(CONFIG_VGA_HOSE) += console.o
14obj-$(CONFIG_SMP) += smp.o 14obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c
new file mode 100644
index 000000000000..4a0af906b00a
--- /dev/null
+++ b/arch/alpha/kernel/binfmt_loader.c
@@ -0,0 +1,51 @@
1#include <linux/init.h>
2#include <linux/fs.h>
3#include <linux/file.h>
4#include <linux/mm_types.h>
5#include <linux/binfmts.h>
6#include <linux/a.out.h>
7
8static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs)
9{
10 struct exec *eh = (struct exec *)bprm->buf;
11 unsigned long loader;
12 struct file *file;
13 int retval;
14
15 if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000)
16 return -ENOEXEC;
17
18 if (bprm->loader)
19 return -ENOEXEC;
20
21 allow_write_access(bprm->file);
22 fput(bprm->file);
23 bprm->file = NULL;
24
25 loader = bprm->vma->vm_end - sizeof(void *);
26
27 file = open_exec("/sbin/loader");
28 retval = PTR_ERR(file);
29 if (IS_ERR(file))
30 return retval;
31
32 /* Remember if the application is TASO. */
33 bprm->taso = eh->ah.entry < 0x100000000UL;
34
35 bprm->file = file;
36 bprm->loader = loader;
37 retval = prepare_binprm(bprm);
38 if (retval < 0)
39 return retval;
40 return search_binary_handler(bprm,regs);
41}
42
43static struct linux_binfmt loader_format = {
44 .load_binary = load_binary,
45};
46
47static int __init init_loader_binfmt(void)
48{
49 return register_binfmt(&loader_format);
50}
51arch_initcall(init_loader_binfmt);
diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c
index 1f762189fa64..c2938e574a40 100644
--- a/arch/alpha/kernel/init_task.c
+++ b/arch/alpha/kernel/init_task.c
@@ -8,7 +8,6 @@
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10 10
11static struct fs_struct init_fs = INIT_FS;
12static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 11static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
13static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 12static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
14struct mm_struct init_mm = INIT_MM(init_mm); 13struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c
index 0bbf80625395..e859af349467 100644
--- a/arch/arm/kernel/init_task.c
+++ b/arch/arm/kernel/init_task.c
@@ -12,7 +12,6 @@
12 12
13#include <asm/pgtable.h> 13#include <asm/pgtable.h>
14 14
15static struct fs_struct init_fs = INIT_FS;
16static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 15static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
17static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 16static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
18struct mm_struct init_mm = INIT_MM(init_mm); 17struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
index 44058469c6ec..993d56ee3cf3 100644
--- a/arch/avr32/kernel/init_task.c
+++ b/arch/avr32/kernel/init_task.c
@@ -13,7 +13,6 @@
13 13
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15 15
16static struct fs_struct init_fs = INIT_FS;
17static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 16static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
18static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 17static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
19struct mm_struct init_mm = INIT_MM(init_mm); 18struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c
index 6bdba7b21109..2c228c020978 100644
--- a/arch/blackfin/kernel/init_task.c
+++ b/arch/blackfin/kernel/init_task.c
@@ -33,7 +33,6 @@
33#include <linux/mqueue.h> 33#include <linux/mqueue.h>
34#include <linux/fs.h> 34#include <linux/fs.h>
35 35
36static struct fs_struct init_fs = INIT_FS;
37static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 36static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
38static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 37static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
39 38
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 5933656db5a2..60816e876455 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -37,7 +37,6 @@
37 * setup. 37 * setup.
38 */ 38 */
39 39
40static struct fs_struct init_fs = INIT_FS;
41static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 40static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
42static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 41static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
43struct mm_struct init_mm = INIT_MM(init_mm); 42struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c
index e2198815b630..29429a8b7f6a 100644
--- a/arch/frv/kernel/init_task.c
+++ b/arch/frv/kernel/init_task.c
@@ -10,7 +10,6 @@
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11 11
12 12
13static struct fs_struct init_fs = INIT_FS;
14static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 13static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
16struct mm_struct init_mm = INIT_MM(init_mm); 15struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c
index 93a4899e46c2..cb5dc552da97 100644
--- a/arch/h8300/kernel/init_task.c
+++ b/arch/h8300/kernel/init_task.c
@@ -12,7 +12,6 @@
12#include <asm/uaccess.h> 12#include <asm/uaccess.h>
13#include <asm/pgtable.h> 13#include <asm/pgtable.h>
14 14
15static struct fs_struct init_fs = INIT_FS;
16static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 15static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
17static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 16static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
18struct mm_struct init_mm = INIT_MM(init_mm); 17struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 7fa8f615ba6e..3d31636cbafb 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -687,3 +687,6 @@ config IRQ_PER_CPU
687 687
688config IOMMU_HELPER 688config IOMMU_HELPER
689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) 689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
690
691config IOMMU_API
692 def_bool (DMAR)
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index f38472ac2267..68aa6da807c1 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -166,8 +166,6 @@ struct saved_vpd {
166}; 166};
167 167
168struct kvm_regs { 168struct kvm_regs {
169 char *saved_guest;
170 char *saved_stack;
171 struct saved_vpd vpd; 169 struct saved_vpd vpd;
172 /*Arch-regs*/ 170 /*Arch-regs*/
173 int mp_state; 171 int mp_state;
@@ -200,6 +198,10 @@ struct kvm_regs {
200 unsigned long fp_psr; /*used for lazy float register */ 198 unsigned long fp_psr; /*used for lazy float register */
201 unsigned long saved_gp; 199 unsigned long saved_gp;
202 /*for phycial emulation */ 200 /*for phycial emulation */
201
202 union context saved_guest;
203
204 unsigned long reserved[64]; /* for future use */
203}; 205};
204 206
205struct kvm_sregs { 207struct kvm_sregs {
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index c60d324da540..348663661659 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -23,17 +23,6 @@
23#ifndef __ASM_KVM_HOST_H 23#ifndef __ASM_KVM_HOST_H
24#define __ASM_KVM_HOST_H 24#define __ASM_KVM_HOST_H
25 25
26
27#include <linux/types.h>
28#include <linux/mm.h>
29#include <linux/kvm.h>
30#include <linux/kvm_para.h>
31#include <linux/kvm_types.h>
32
33#include <asm/pal.h>
34#include <asm/sal.h>
35
36#define KVM_MAX_VCPUS 4
37#define KVM_MEMORY_SLOTS 32 26#define KVM_MEMORY_SLOTS 32
38/* memory slots that does not exposed to userspace */ 27/* memory slots that does not exposed to userspace */
39#define KVM_PRIVATE_MEM_SLOTS 4 28#define KVM_PRIVATE_MEM_SLOTS 4
@@ -50,70 +39,132 @@
50#define EXIT_REASON_EXTERNAL_INTERRUPT 6 39#define EXIT_REASON_EXTERNAL_INTERRUPT 6
51#define EXIT_REASON_IPI 7 40#define EXIT_REASON_IPI 7
52#define EXIT_REASON_PTC_G 8 41#define EXIT_REASON_PTC_G 8
42#define EXIT_REASON_DEBUG 20
53 43
54/*Define vmm address space and vm data space.*/ 44/*Define vmm address space and vm data space.*/
55#define KVM_VMM_SIZE (16UL<<20) 45#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
56#define KVM_VMM_SHIFT 24 46#define KVM_VMM_SHIFT 24
57#define KVM_VMM_BASE 0xD000000000000000UL 47#define KVM_VMM_BASE 0xD000000000000000
58#define VMM_SIZE (8UL<<20) 48#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
59 49
60/* 50/*
61 * Define vm_buffer, used by PAL Services, base address. 51 * Define vm_buffer, used by PAL Services, base address.
62 * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M 52 * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
63 */ 53 */
64#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) 54#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
65#define KVM_VM_BUFFER_SIZE (8UL<<20) 55#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
66 56
67/*Define Virtual machine data layout.*/ 57/*
68#define KVM_VM_DATA_SHIFT 24 58 * kvm guest's data area looks as follow:
69#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT) 59 *
70#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE) 60 * +----------------------+ ------- KVM_VM_DATA_SIZE
71 61 * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET
72 62 * | | | / |
73#define KVM_P2M_BASE KVM_VM_DATA_BASE 63 * | .......... | | /vcpu's struct&stack |
74#define KVM_P2M_OFS 0 64 * | .......... | | /---------------------|---- 0
75#define KVM_P2M_SIZE (8UL << 20) 65 * | vcpu[5]'s data | | / vpd |
76 66 * | vcpu[4]'s data | |/-----------------------|
77#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE) 67 * | vcpu[3]'s data | / vtlb |
78#define KVM_VHPT_OFS KVM_P2M_SIZE 68 * | vcpu[2]'s data | /|------------------------|
79#define KVM_VHPT_BLOCK_SIZE (2UL << 20) 69 * | vcpu[1]'s data |/ | vhpt |
80#define VHPT_SHIFT 18 70 * | vcpu[0]'s data |____________________________|
81#define VHPT_SIZE (1UL << VHPT_SHIFT) 71 * +----------------------+ |
82#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5)) 72 * | memory dirty log | |
83 73 * +----------------------+ |
84#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE) 74 * | vm's data struct | |
85#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE) 75 * +----------------------+ |
86#define KVM_VTLB_BLOCK_SIZE (1UL<<20) 76 * | | |
87#define VTLB_SHIFT 17 77 * | | |
88#define VTLB_SIZE (1UL<<VTLB_SHIFT) 78 * | | |
89#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5)) 79 * | | |
90 80 * | | |
91#define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE) 81 * | | |
92#define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE) 82 * | | |
93#define KVM_VPD_BLOCK_SIZE (2UL<<20) 83 * | vm's p2m table | |
94#define VPD_SHIFT 16 84 * | | |
95#define VPD_SIZE (1UL<<VPD_SHIFT) 85 * | | |
96 86 * | | | |
97#define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE) 87 * vm's data->| | | |
98#define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE) 88 * +----------------------+ ------- 0
99#define KVM_VCPU_BLOCK_SIZE (2UL<<20) 89 * To support large memory, needs to increase the size of p2m.
100#define VCPU_SHIFT 18 90 * To support more vcpus, needs to ensure it has enough space to
101#define VCPU_SIZE (1UL<<VCPU_SHIFT) 91 * hold vcpus' data.
102#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE 92 */
103 93
104#define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE) 94#define KVM_VM_DATA_SHIFT 26
105#define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE) 95#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
106#define KVM_VM_BLOCK_SIZE (1UL<<19) 96#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE)
107 97
108#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE) 98#define KVM_P2M_BASE KVM_VM_DATA_BASE
109#define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE) 99#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20)
110#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19) 100
111 101#define VHPT_SHIFT 16
112/* Get vpd, vhpt, tlb, vcpu, base*/ 102#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT)
113#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE) 103#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
114#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE) 104
115#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE) 105#define VTLB_SHIFT 16
116#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE) 106#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT)
107#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5))
108
109#define VPD_SHIFT 16
110#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT)
111
112#define VCPU_STRUCT_SHIFT 16
113#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
114
115#define KVM_STK_OFFSET VCPU_STRUCT_SIZE
116
117#define KVM_VM_STRUCT_SHIFT 19
118#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
119
120#define KVM_MEM_DIRY_LOG_SHIFT 19
121#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
122
123#ifndef __ASSEMBLY__
124
125/*Define the max vcpus and memory for Guests.*/
126#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
127 KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
128#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
129
130#define VMM_LOG_LEN 256
131
132#include <linux/types.h>
133#include <linux/mm.h>
134#include <linux/kvm.h>
135#include <linux/kvm_para.h>
136#include <linux/kvm_types.h>
137
138#include <asm/pal.h>
139#include <asm/sal.h>
140#include <asm/page.h>
141
142struct kvm_vcpu_data {
143 char vcpu_vhpt[VHPT_SIZE];
144 char vcpu_vtlb[VTLB_SIZE];
145 char vcpu_vpd[VPD_SIZE];
146 char vcpu_struct[VCPU_STRUCT_SIZE];
147};
148
149struct kvm_vm_data {
150 char kvm_p2m[KVM_P2M_SIZE];
151 char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
152 char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
153 struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
154};
155
156#define VCPU_BASE(n) KVM_VM_DATA_BASE + \
157 offsetof(struct kvm_vm_data, vcpu_data[n])
158#define VM_BASE KVM_VM_DATA_BASE + \
159 offsetof(struct kvm_vm_data, kvm_vm_struct)
160#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
161 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
162
163#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
164#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
165#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
166#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \
167 offsetof(struct kvm_vcpu_data, vcpu_struct))
117 168
118/*IO section definitions*/ 169/*IO section definitions*/
119#define IOREQ_READ 1 170#define IOREQ_READ 1
@@ -389,6 +440,7 @@ struct kvm_vcpu_arch {
389 440
390 unsigned long opcode; 441 unsigned long opcode;
391 unsigned long cause; 442 unsigned long cause;
443 char log_buf[VMM_LOG_LEN];
392 union context host; 444 union context host;
393 union context guest; 445 union context guest;
394}; 446};
@@ -403,20 +455,19 @@ struct kvm_sal_data {
403}; 455};
404 456
405struct kvm_arch { 457struct kvm_arch {
458 spinlock_t dirty_log_lock;
459
406 unsigned long vm_base; 460 unsigned long vm_base;
407 unsigned long metaphysical_rr0; 461 unsigned long metaphysical_rr0;
408 unsigned long metaphysical_rr4; 462 unsigned long metaphysical_rr4;
409 unsigned long vmm_init_rr; 463 unsigned long vmm_init_rr;
410 unsigned long vhpt_base; 464
411 unsigned long vtlb_base;
412 unsigned long vpd_base;
413 spinlock_t dirty_log_lock;
414 struct kvm_ioapic *vioapic; 465 struct kvm_ioapic *vioapic;
415 struct kvm_vm_stat stat; 466 struct kvm_vm_stat stat;
416 struct kvm_sal_data rdv_sal_data; 467 struct kvm_sal_data rdv_sal_data;
417 468
418 struct list_head assigned_dev_head; 469 struct list_head assigned_dev_head;
419 struct dmar_domain *intel_iommu_domain; 470 struct iommu_domain *iommu_domain;
420 struct hlist_head irq_ack_notifier_list; 471 struct hlist_head irq_ack_notifier_list;
421 472
422 unsigned long irq_sources_bitmap; 473 unsigned long irq_sources_bitmap;
@@ -512,7 +563,7 @@ struct kvm_pt_regs {
512 563
513static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) 564static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
514{ 565{
515 return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; 566 return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
516} 567}
517 568
518typedef int kvm_vmm_entry(void); 569typedef int kvm_vmm_entry(void);
@@ -531,5 +582,6 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
531void kvm_sal_emul(struct kvm_vcpu *vcpu); 582void kvm_sal_emul(struct kvm_vcpu *vcpu);
532 583
533static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {} 584static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
585#endif /* __ASSEMBLY__*/
534 586
535#endif 587#endif
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index 9d7e1c66faf4..5b0e830c6f33 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -17,7 +17,6 @@
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19 19
20static struct fs_struct init_fs = INIT_FS;
21static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 20static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
22static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 21static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
23struct mm_struct init_mm = INIT_MM(init_mm); 22struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 65c10a42c88f..f0ebb342409d 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -93,13 +93,14 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
93 now = ia64_get_itc(); 93 now = ia64_get_itc();
94 94
95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); 95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
96 account_system_time(prev, 0, delta_stime); 96 if (idle_task(smp_processor_id()) != prev)
97 account_system_time_scaled(prev, delta_stime); 97 account_system_time(prev, 0, delta_stime, delta_stime);
98 else
99 account_idle_time(delta_stime);
98 100
99 if (pi->ac_utime) { 101 if (pi->ac_utime) {
100 delta_utime = cycle_to_cputime(pi->ac_utime); 102 delta_utime = cycle_to_cputime(pi->ac_utime);
101 account_user_time(prev, delta_utime); 103 account_user_time(prev, delta_utime, delta_utime);
102 account_user_time_scaled(prev, delta_utime);
103 } 104 }
104 105
105 pi->ac_stamp = ni->ac_stamp = now; 106 pi->ac_stamp = ni->ac_stamp = now;
@@ -122,8 +123,10 @@ void account_system_vtime(struct task_struct *tsk)
122 now = ia64_get_itc(); 123 now = ia64_get_itc();
123 124
124 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); 125 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
125 account_system_time(tsk, 0, delta_stime); 126 if (irq_count() || idle_task(smp_processor_id()) != tsk)
126 account_system_time_scaled(tsk, delta_stime); 127 account_system_time(tsk, 0, delta_stime, delta_stime);
128 else
129 account_idle_time(delta_stime);
127 ti->ac_stime = 0; 130 ti->ac_stime = 0;
128 131
129 ti->ac_stamp = now; 132 ti->ac_stamp = now;
@@ -143,8 +146,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
143 146
144 if (ti->ac_utime) { 147 if (ti->ac_utime) {
145 delta_utime = cycle_to_cputime(ti->ac_utime); 148 delta_utime = cycle_to_cputime(ti->ac_utime);
146 account_user_time(p, delta_utime); 149 account_user_time(p, delta_utime, delta_utime);
147 account_user_time_scaled(p, delta_utime);
148 ti->ac_utime = 0; 150 ti->ac_utime = 0;
149 } 151 }
150} 152}
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 92cef66ca268..0bb99b732908 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -51,8 +51,8 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
51common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ 51common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
52 coalesced_mmio.o irq_comm.o) 52 coalesced_mmio.o irq_comm.o)
53 53
54ifeq ($(CONFIG_DMAR),y) 54ifeq ($(CONFIG_IOMMU_API),y)
55common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 55common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
56endif 56endif
57 57
58kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o 58kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
@@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o
60 60
61CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 61CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
62kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ 62kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
63 vtlb.o process.o 63 vtlb.o process.o kvm_lib.o
64#Add link memcpy and memset to avoid possible structure assignment error 64#Add link memcpy and memset to avoid possible structure assignment error
65kvm-intel-objs += memcpy.o memset.o 65kvm-intel-objs += memcpy.o memset.o
66obj-$(CONFIG_KVM_INTEL) += kvm-intel.o 66obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
index 4e3dc13a619c..0c3564a7a033 100644
--- a/arch/ia64/kvm/asm-offsets.c
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -24,19 +24,10 @@
24 24
25#include <linux/autoconf.h> 25#include <linux/autoconf.h>
26#include <linux/kvm_host.h> 26#include <linux/kvm_host.h>
27#include <linux/kbuild.h>
27 28
28#include "vcpu.h" 29#include "vcpu.h"
29 30
30#define task_struct kvm_vcpu
31
32#define DEFINE(sym, val) \
33 asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
34
35#define BLANK() asm volatile("\n->" : :)
36
37#define OFFSET(_sym, _str, _mem) \
38 DEFINE(_sym, offsetof(_str, _mem));
39
40void foo(void) 31void foo(void)
41{ 32{
42 DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); 33 DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index af1464f7a6ad..4e586f6110aa 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -31,6 +31,7 @@
31#include <linux/bitops.h> 31#include <linux/bitops.h>
32#include <linux/hrtimer.h> 32#include <linux/hrtimer.h>
33#include <linux/uaccess.h> 33#include <linux/uaccess.h>
34#include <linux/iommu.h>
34#include <linux/intel-iommu.h> 35#include <linux/intel-iommu.h>
35 36
36#include <asm/pgtable.h> 37#include <asm/pgtable.h>
@@ -180,7 +181,6 @@ int kvm_dev_ioctl_check_extension(long ext)
180 181
181 switch (ext) { 182 switch (ext) {
182 case KVM_CAP_IRQCHIP: 183 case KVM_CAP_IRQCHIP:
183 case KVM_CAP_USER_MEMORY:
184 case KVM_CAP_MP_STATE: 184 case KVM_CAP_MP_STATE:
185 185
186 r = 1; 186 r = 1;
@@ -189,7 +189,7 @@ int kvm_dev_ioctl_check_extension(long ext)
189 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 189 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
190 break; 190 break;
191 case KVM_CAP_IOMMU: 191 case KVM_CAP_IOMMU:
192 r = intel_iommu_found(); 192 r = iommu_found();
193 break; 193 break;
194 default: 194 default:
195 r = 0; 195 r = 0;
@@ -439,7 +439,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
439 expires = div64_u64(itc_diff, cyc_per_usec); 439 expires = div64_u64(itc_diff, cyc_per_usec);
440 kt = ktime_set(0, 1000 * expires); 440 kt = ktime_set(0, 1000 * expires);
441 441
442 down_read(&vcpu->kvm->slots_lock);
443 vcpu->arch.ht_active = 1; 442 vcpu->arch.ht_active = 1;
444 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); 443 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
445 444
@@ -452,7 +451,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
452 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 451 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
453 vcpu->arch.mp_state = 452 vcpu->arch.mp_state =
454 KVM_MP_STATE_RUNNABLE; 453 KVM_MP_STATE_RUNNABLE;
455 up_read(&vcpu->kvm->slots_lock);
456 454
457 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) 455 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
458 return -EINTR; 456 return -EINTR;
@@ -476,6 +474,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
476 return 1; 474 return 1;
477} 475}
478 476
477static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
478 struct kvm_run *kvm_run)
479{
480 printk("VMM: %s", vcpu->arch.log_buf);
481 return 1;
482}
483
479static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, 484static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
480 struct kvm_run *kvm_run) = { 485 struct kvm_run *kvm_run) = {
481 [EXIT_REASON_VM_PANIC] = handle_vm_error, 486 [EXIT_REASON_VM_PANIC] = handle_vm_error,
@@ -487,6 +492,7 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
487 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 492 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
488 [EXIT_REASON_IPI] = handle_ipi, 493 [EXIT_REASON_IPI] = handle_ipi,
489 [EXIT_REASON_PTC_G] = handle_global_purge, 494 [EXIT_REASON_PTC_G] = handle_global_purge,
495 [EXIT_REASON_DEBUG] = handle_vcpu_debug,
490 496
491}; 497};
492 498
@@ -698,27 +704,24 @@ out:
698 return r; 704 return r;
699} 705}
700 706
701/*
702 * Allocate 16M memory for every vm to hold its specific data.
703 * Its memory map is defined in kvm_host.h.
704 */
705static struct kvm *kvm_alloc_kvm(void) 707static struct kvm *kvm_alloc_kvm(void)
706{ 708{
707 709
708 struct kvm *kvm; 710 struct kvm *kvm;
709 uint64_t vm_base; 711 uint64_t vm_base;
710 712
713 BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
714
711 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); 715 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
712 716
713 if (!vm_base) 717 if (!vm_base)
714 return ERR_PTR(-ENOMEM); 718 return ERR_PTR(-ENOMEM);
715 printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
716 719
717 /* Zero all pages before use! */
718 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); 720 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
719 721 kvm = (struct kvm *)(vm_base +
720 kvm = (struct kvm *)(vm_base + KVM_VM_OFS); 722 offsetof(struct kvm_vm_data, kvm_vm_struct));
721 kvm->arch.vm_base = vm_base; 723 kvm->arch.vm_base = vm_base;
724 printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
722 725
723 return kvm; 726 return kvm;
724} 727}
@@ -760,21 +763,12 @@ static void kvm_build_io_pmt(struct kvm *kvm)
760 763
761static void kvm_init_vm(struct kvm *kvm) 764static void kvm_init_vm(struct kvm *kvm)
762{ 765{
763 long vm_base;
764
765 BUG_ON(!kvm); 766 BUG_ON(!kvm);
766 767
767 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; 768 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
768 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; 769 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
769 kvm->arch.vmm_init_rr = VMM_INIT_RR; 770 kvm->arch.vmm_init_rr = VMM_INIT_RR;
770 771
771 vm_base = kvm->arch.vm_base;
772 if (vm_base) {
773 kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
774 kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
775 kvm->arch.vpd_base = vm_base + KVM_VPD_OFS;
776 }
777
778 /* 772 /*
779 *Fill P2M entries for MMIO/IO ranges 773 *Fill P2M entries for MMIO/IO ranges
780 */ 774 */
@@ -838,9 +832,8 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
838 832
839int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 833int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
840{ 834{
841 int i;
842 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 835 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
843 int r; 836 int i;
844 837
845 vcpu_load(vcpu); 838 vcpu_load(vcpu);
846 839
@@ -857,18 +850,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
857 850
858 vpd->vpr = regs->vpd.vpr; 851 vpd->vpr = regs->vpd.vpr;
859 852
860 r = -EFAULT; 853 memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
861 r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
862 sizeof(union context));
863 if (r)
864 goto out;
865 r = copy_from_user(vcpu + 1, regs->saved_stack +
866 sizeof(struct kvm_vcpu),
867 IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
868 if (r)
869 goto out;
870 vcpu->arch.exit_data =
871 ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
872 854
873 RESTORE_REGS(mp_state); 855 RESTORE_REGS(mp_state);
874 RESTORE_REGS(vmm_rr); 856 RESTORE_REGS(vmm_rr);
@@ -902,9 +884,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
902 set_bit(KVM_REQ_RESUME, &vcpu->requests); 884 set_bit(KVM_REQ_RESUME, &vcpu->requests);
903 885
904 vcpu_put(vcpu); 886 vcpu_put(vcpu);
905 r = 0; 887
906out: 888 return 0;
907 return r;
908} 889}
909 890
910long kvm_arch_vm_ioctl(struct file *filp, 891long kvm_arch_vm_ioctl(struct file *filp,
@@ -1166,10 +1147,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1166 /*Set entry address for first run.*/ 1147 /*Set entry address for first run.*/
1167 regs->cr_iip = PALE_RESET_ENTRY; 1148 regs->cr_iip = PALE_RESET_ENTRY;
1168 1149
1169 /*Initilize itc offset for vcpus*/ 1150 /*Initialize itc offset for vcpus*/
1170 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); 1151 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
1171 for (i = 0; i < MAX_VCPU_NUM; i++) { 1152 for (i = 0; i < KVM_MAX_VCPUS; i++) {
1172 v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); 1153 v = (struct kvm_vcpu *)((char *)vcpu +
1154 sizeof(struct kvm_vcpu_data) * i);
1173 v->arch.itc_offset = itc_offset; 1155 v->arch.itc_offset = itc_offset;
1174 v->arch.last_itc = 0; 1156 v->arch.last_itc = 0;
1175 } 1157 }
@@ -1183,7 +1165,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1183 vcpu->arch.apic->vcpu = vcpu; 1165 vcpu->arch.apic->vcpu = vcpu;
1184 1166
1185 p_ctx->gr[1] = 0; 1167 p_ctx->gr[1] = 0;
1186 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); 1168 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
1187 p_ctx->gr[13] = (unsigned long)vmm_vcpu; 1169 p_ctx->gr[13] = (unsigned long)vmm_vcpu;
1188 p_ctx->psr = 0x1008522000UL; 1170 p_ctx->psr = 0x1008522000UL;
1189 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ 1171 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
@@ -1218,12 +1200,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1218 vcpu->arch.hlt_timer.function = hlt_timer_fn; 1200 vcpu->arch.hlt_timer.function = hlt_timer_fn;
1219 1201
1220 vcpu->arch.last_run_cpu = -1; 1202 vcpu->arch.last_run_cpu = -1;
1221 vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); 1203 vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
1222 vcpu->arch.vsa_base = kvm_vsa_base; 1204 vcpu->arch.vsa_base = kvm_vsa_base;
1223 vcpu->arch.__gp = kvm_vmm_gp; 1205 vcpu->arch.__gp = kvm_vmm_gp;
1224 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); 1206 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
1225 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); 1207 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
1226 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); 1208 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
1227 init_ptce_info(vcpu); 1209 init_ptce_info(vcpu);
1228 1210
1229 r = 0; 1211 r = 0;
@@ -1273,12 +1255,22 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1273 int r; 1255 int r;
1274 int cpu; 1256 int cpu;
1275 1257
1258 BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
1259
1260 r = -EINVAL;
1261 if (id >= KVM_MAX_VCPUS) {
1262 printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
1263 KVM_MAX_VCPUS);
1264 goto fail;
1265 }
1266
1276 r = -ENOMEM; 1267 r = -ENOMEM;
1277 if (!vm_base) { 1268 if (!vm_base) {
1278 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); 1269 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
1279 goto fail; 1270 goto fail;
1280 } 1271 }
1281 vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); 1272 vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
1273 vcpu_data[id].vcpu_struct));
1282 vcpu->kvm = kvm; 1274 vcpu->kvm = kvm;
1283 1275
1284 cpu = get_cpu(); 1276 cpu = get_cpu();
@@ -1374,9 +1366,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1374 1366
1375int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1367int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1376{ 1368{
1377 int i;
1378 int r;
1379 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1369 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1370 int i;
1371
1380 vcpu_load(vcpu); 1372 vcpu_load(vcpu);
1381 1373
1382 for (i = 0; i < 16; i++) { 1374 for (i = 0; i < 16; i++) {
@@ -1391,14 +1383,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1391 regs->vpd.vpsr = vpd->vpsr; 1383 regs->vpd.vpsr = vpd->vpsr;
1392 regs->vpd.vpr = vpd->vpr; 1384 regs->vpd.vpr = vpd->vpr;
1393 1385
1394 r = -EFAULT; 1386 memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
1395 r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, 1387
1396 sizeof(union context));
1397 if (r)
1398 goto out;
1399 r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
1400 if (r)
1401 goto out;
1402 SAVE_REGS(mp_state); 1388 SAVE_REGS(mp_state);
1403 SAVE_REGS(vmm_rr); 1389 SAVE_REGS(vmm_rr);
1404 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); 1390 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
@@ -1426,10 +1412,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1426 SAVE_REGS(metaphysical_saved_rr4); 1412 SAVE_REGS(metaphysical_saved_rr4);
1427 SAVE_REGS(fp_psr); 1413 SAVE_REGS(fp_psr);
1428 SAVE_REGS(saved_gp); 1414 SAVE_REGS(saved_gp);
1415
1429 vcpu_put(vcpu); 1416 vcpu_put(vcpu);
1430 r = 0; 1417 return 0;
1431out:
1432 return r;
1433} 1418}
1434 1419
1435void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 1420void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
@@ -1457,6 +1442,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
1457 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; 1442 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
1458 unsigned long base_gfn = memslot->base_gfn; 1443 unsigned long base_gfn = memslot->base_gfn;
1459 1444
1445 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
1446 return -ENOMEM;
1447
1460 for (i = 0; i < npages; i++) { 1448 for (i = 0; i < npages; i++) {
1461 pfn = gfn_to_pfn(kvm, base_gfn + i); 1449 pfn = gfn_to_pfn(kvm, base_gfn + i);
1462 if (!kvm_is_mmio_pfn(pfn)) { 1450 if (!kvm_is_mmio_pfn(pfn)) {
@@ -1631,8 +1619,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1631 struct kvm_memory_slot *memslot; 1619 struct kvm_memory_slot *memslot;
1632 int r, i; 1620 int r, i;
1633 long n, base; 1621 long n, base;
1634 unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS 1622 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
1635 + KVM_MEM_DIRTY_LOG_OFS); 1623 offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
1636 1624
1637 r = -EINVAL; 1625 r = -EINVAL;
1638 if (log->slot >= KVM_MEMORY_SLOTS) 1626 if (log->slot >= KVM_MEMORY_SLOTS)
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
new file mode 100644
index 000000000000..a85cb611ecd7
--- /dev/null
+++ b/arch/ia64/kvm/kvm_lib.c
@@ -0,0 +1,15 @@
1/*
2 * kvm_lib.c: Compile some libraries for kvm-intel module.
3 *
4 * Just include kernel's library, and disable symbols export.
5 * Copyright (C) 2008, Intel Corporation.
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 */
13#undef CONFIG_MODULES
14#include "../../../lib/vsprintf.c"
15#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
index 2cc41d17cf99..b2bcaa2787aa 100644
--- a/arch/ia64/kvm/kvm_minstate.h
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -24,6 +24,8 @@
24#include <asm/asmmacro.h> 24#include <asm/asmmacro.h>
25#include <asm/types.h> 25#include <asm/types.h>
26#include <asm/kregs.h> 26#include <asm/kregs.h>
27#include <asm/kvm_host.h>
28
27#include "asm-offsets.h" 29#include "asm-offsets.h"
28 30
29#define KVM_MINSTATE_START_SAVE_MIN \ 31#define KVM_MINSTATE_START_SAVE_MIN \
@@ -33,7 +35,7 @@
33 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ 35 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \
34 ;; \ 36 ;; \
35 lfetch.fault.excl.nt1 [r22]; \ 37 lfetch.fault.excl.nt1 [r22]; \
36 addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ 38 addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \
37 mov r23 = ar.bspstore; /* save ar.bspstore */ \ 39 mov r23 = ar.bspstore; /* save ar.bspstore */ \
38 ;; \ 40 ;; \
39 mov ar.bspstore = r22; /* switch to kernel RBS */\ 41 mov ar.bspstore = r22; /* switch to kernel RBS */\
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
index e585c4607344..dd979e00b574 100644
--- a/arch/ia64/kvm/misc.h
+++ b/arch/ia64/kvm/misc.h
@@ -27,7 +27,8 @@
27 */ 27 */
28static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) 28static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
29{ 29{
30 return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); 30 return (uint64_t *)(kvm->arch.vm_base +
31 offsetof(struct kvm_vm_data, kvm_p2m));
31} 32}
32 33
33static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, 34static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 7f1a858bc69f..21f63fffc379 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -66,31 +66,25 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
66 66
67 switch (addr) { 67 switch (addr) {
68 case PIB_OFST_INTA: 68 case PIB_OFST_INTA:
69 /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ 69 panic_vm(v, "Undefined write on PIB INTA\n");
70 panic_vm(v);
71 break; 70 break;
72 case PIB_OFST_XTP: 71 case PIB_OFST_XTP:
73 if (length == 1) { 72 if (length == 1) {
74 vlsapic_write_xtp(v, val); 73 vlsapic_write_xtp(v, val);
75 } else { 74 } else {
76 /*panic_domain(NULL, 75 panic_vm(v, "Undefined write on PIB XTP\n");
77 "Undefined write on PIB XTP\n");*/
78 panic_vm(v);
79 } 76 }
80 break; 77 break;
81 default: 78 default:
82 if (PIB_LOW_HALF(addr)) { 79 if (PIB_LOW_HALF(addr)) {
83 /*lower half */ 80 /*Lower half */
84 if (length != 8) 81 if (length != 8)
85 /*panic_domain(NULL, 82 panic_vm(v, "Can't LHF write with size %ld!\n",
86 "Can't LHF write with size %ld!\n", 83 length);
87 length);*/
88 panic_vm(v);
89 else 84 else
90 vlsapic_write_ipi(v, addr, val); 85 vlsapic_write_ipi(v, addr, val);
91 } else { /* upper half 86 } else { /*Upper half */
92 printk("IPI-UHF write %lx\n",addr);*/ 87 panic_vm(v, "IPI-UHF write %lx\n", addr);
93 panic_vm(v);
94 } 88 }
95 break; 89 break;
96 } 90 }
@@ -108,22 +102,18 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
108 if (length == 1) /* 1 byte load */ 102 if (length == 1) /* 1 byte load */
109 ; /* There is no i8259, there is no INTA access*/ 103 ; /* There is no i8259, there is no INTA access*/
110 else 104 else
111 /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ 105 panic_vm(v, "Undefined read on PIB INTA\n");
112 panic_vm(v);
113 106
114 break; 107 break;
115 case PIB_OFST_XTP: 108 case PIB_OFST_XTP:
116 if (length == 1) { 109 if (length == 1) {
117 result = VLSAPIC_XTP(v); 110 result = VLSAPIC_XTP(v);
118 /* printk("read xtp %lx\n", result); */
119 } else { 111 } else {
120 /*panic_domain(NULL, 112 panic_vm(v, "Undefined read on PIB XTP\n");
121 "Undefined read on PIB XTP\n");*/
122 panic_vm(v);
123 } 113 }
124 break; 114 break;
125 default: 115 default:
126 panic_vm(v); 116 panic_vm(v, "Undefined addr access for lsapic!\n");
127 break; 117 break;
128 } 118 }
129 return result; 119 return result;
@@ -162,7 +152,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
162 /* it's necessary to ensure zero extending */ 152 /* it's necessary to ensure zero extending */
163 *dest = p->u.ioreq.data & (~0UL >> (64-(s*8))); 153 *dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
164 } else 154 } else
165 panic_vm(vcpu); 155 panic_vm(vcpu, "Unhandled mmio access returned!\n");
166out: 156out:
167 local_irq_restore(psr); 157 local_irq_restore(psr);
168 return ; 158 return ;
@@ -324,7 +314,9 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
324 return; 314 return;
325 } else { 315 } else {
326 inst_type = -1; 316 inst_type = -1;
327 panic_vm(vcpu); 317 panic_vm(vcpu, "Unsupported MMIO access instruction! \
318 Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
319 bundle.i64[0], bundle.i64[1]);
328 } 320 }
329 321
330 size = 1 << size; 322 size = 1 << size;
@@ -335,7 +327,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
335 if (inst_type == SL_INTEGER) 327 if (inst_type == SL_INTEGER)
336 vcpu_set_gr(vcpu, inst.M1.r1, data, 0); 328 vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
337 else 329 else
338 panic_vm(vcpu); 330 panic_vm(vcpu, "Unsupported instruction type!\n");
339 331
340 } 332 }
341 vcpu_increment_iip(vcpu); 333 vcpu_increment_iip(vcpu);
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 800817307b7b..552d07724207 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -527,7 +527,8 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
527 vector = vec2off[vec]; 527 vector = vec2off[vec];
528 528
529 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { 529 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
530 panic_vm(vcpu); 530 panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
531 "with psr.ic = 0\n", vector);
531 return; 532 return;
532 } 533 }
533 534
@@ -586,7 +587,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu)
586 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); 587 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
587 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); 588 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
588 } else 589 } else
589 panic_vm(vcpu); 590 panic_vm(vcpu, "Mis-set for exit reason!\n");
590} 591}
591 592
592static void set_sal_call_data(struct kvm_vcpu *vcpu) 593static void set_sal_call_data(struct kvm_vcpu *vcpu)
@@ -614,7 +615,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu)
614 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); 615 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
615 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); 616 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
616 } else 617 } else
617 panic_vm(vcpu); 618 panic_vm(vcpu, "Mis-set for exit reason!\n");
618} 619}
619 620
620void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, 621void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
@@ -680,7 +681,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu)
680 vpsr = VCPU(vcpu, vpsr); 681 vpsr = VCPU(vcpu, vpsr);
681 isr = vpsr & IA64_PSR_RI; 682 isr = vpsr & IA64_PSR_RI;
682 if (!(vpsr & IA64_PSR_IC)) 683 if (!(vpsr & IA64_PSR_IC))
683 panic_vm(vcpu); 684 panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
684 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ 685 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
685} 686}
686 687
@@ -941,8 +942,20 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu)
941 ia64_set_pta(vcpu->arch.vhpt.pta.val); 942 ia64_set_pta(vcpu->arch.vhpt.pta.val);
942} 943}
943 944
945static void vmm_sanity_check(struct kvm_vcpu *vcpu)
946{
947 struct exit_ctl_data *p = &vcpu->arch.exit_data;
948
949 if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
950 panic_vm(vcpu, "Failed to do vmm sanity check,"
951 "it maybe caused by crashed vmm!!\n\n");
952 }
953}
954
944static void kvm_do_resume_op(struct kvm_vcpu *vcpu) 955static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
945{ 956{
957 vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/
958
946 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { 959 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
947 vcpu_do_resume(vcpu); 960 vcpu_do_resume(vcpu);
948 return; 961 return;
@@ -968,3 +981,11 @@ void vmm_transition(struct kvm_vcpu *vcpu)
968 1, 0, 0, 0, 0, 0); 981 1, 0, 0, 0, 0, 0);
969 kvm_do_resume_op(vcpu); 982 kvm_do_resume_op(vcpu);
970} 983}
984
985void vmm_panic_handler(u64 vec)
986{
987 struct kvm_vcpu *vcpu = current_vcpu;
988 vmm_sanity = 0;
989 panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
990 vec2off[vec]);
991}
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index e44027ce5667..ecd526b55323 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -816,8 +816,9 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
816 unsigned long vitv = VCPU(vcpu, itv); 816 unsigned long vitv = VCPU(vcpu, itv);
817 817
818 if (vcpu->vcpu_id == 0) { 818 if (vcpu->vcpu_id == 0) {
819 for (i = 0; i < MAX_VCPU_NUM; i++) { 819 for (i = 0; i < KVM_MAX_VCPUS; i++) {
820 v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); 820 v = (struct kvm_vcpu *)((char *)vcpu +
821 sizeof(struct kvm_vcpu_data) * i);
821 VMX(v, itc_offset) = itc_offset; 822 VMX(v, itc_offset) = itc_offset;
822 VMX(v, last_itc) = 0; 823 VMX(v, last_itc) = 0;
823 } 824 }
@@ -1650,7 +1651,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
1650 * Otherwise panic 1651 * Otherwise panic
1651 */ 1652 */
1652 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) 1653 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
1653 panic_vm(vcpu); 1654 panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
1655 & vpsr.is=0\n");
1654 1656
1655 /* 1657 /*
1656 * For those IA64_PSR bits: id/da/dd/ss/ed/ia 1658 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
@@ -2103,7 +2105,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu)
2103 2105
2104 if (is_physical_mode(vcpu)) { 2106 if (is_physical_mode(vcpu)) {
2105 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) 2107 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
2106 panic_vm(vcpu); 2108 panic_vm(vcpu, "Machine Status conflicts!\n");
2107 2109
2108 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); 2110 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
2109 ia64_dv_serialize_data(); 2111 ia64_dv_serialize_data();
@@ -2152,10 +2154,70 @@ int vmm_entry(void)
2152 return 0; 2154 return 0;
2153} 2155}
2154 2156
2155void panic_vm(struct kvm_vcpu *v) 2157static void kvm_show_registers(struct kvm_pt_regs *regs)
2156{ 2158{
2159 unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
2160
2161 struct kvm_vcpu *vcpu = current_vcpu;
2162 if (vcpu != NULL)
2163 printk("vcpu 0x%p vcpu %d\n",
2164 vcpu, vcpu->vcpu_id);
2165
2166 printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n",
2167 regs->cr_ipsr, regs->cr_ifs, ip);
2168
2169 printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
2170 regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
2171 printk("rnat: %016lx bspstore: %016lx pr : %016lx\n",
2172 regs->ar_rnat, regs->ar_bspstore, regs->pr);
2173 printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
2174 regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
2175 printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
2176 printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0,
2177 regs->b6, regs->b7);
2178 printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
2179 regs->f6.u.bits[1], regs->f6.u.bits[0],
2180 regs->f7.u.bits[1], regs->f7.u.bits[0]);
2181 printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
2182 regs->f8.u.bits[1], regs->f8.u.bits[0],
2183 regs->f9.u.bits[1], regs->f9.u.bits[0]);
2184 printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
2185 regs->f10.u.bits[1], regs->f10.u.bits[0],
2186 regs->f11.u.bits[1], regs->f11.u.bits[0]);
2187
2188 printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1,
2189 regs->r2, regs->r3);
2190 printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8,
2191 regs->r9, regs->r10);
2192 printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
2193 regs->r12, regs->r13);
2194 printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
2195 regs->r15, regs->r16);
2196 printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
2197 regs->r18, regs->r19);
2198 printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
2199 regs->r21, regs->r22);
2200 printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
2201 regs->r24, regs->r25);
2202 printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
2203 regs->r27, regs->r28);
2204 printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
2205 regs->r30, regs->r31);
2206
2207}
2208
2209void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
2210{
2211 va_list args;
2212 char buf[256];
2213
2214 struct kvm_pt_regs *regs = vcpu_regs(v);
2157 struct exit_ctl_data *p = &v->arch.exit_data; 2215 struct exit_ctl_data *p = &v->arch.exit_data;
2158 2216 va_start(args, fmt);
2217 vsnprintf(buf, sizeof(buf), fmt, args);
2218 va_end(args);
2219 printk(buf);
2220 kvm_show_registers(regs);
2159 p->exit_reason = EXIT_REASON_VM_PANIC; 2221 p->exit_reason = EXIT_REASON_VM_PANIC;
2160 vmm_transition(v); 2222 vmm_transition(v);
2161 /*Never to return*/ 2223 /*Never to return*/
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index e9b2a4e121c0..b2f12a562bdf 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -737,9 +737,12 @@ void kvm_init_vtlb(struct kvm_vcpu *v);
737void kvm_init_vhpt(struct kvm_vcpu *v); 737void kvm_init_vhpt(struct kvm_vcpu *v);
738void thash_init(struct thash_cb *hcb, u64 sz); 738void thash_init(struct thash_cb *hcb, u64 sz);
739 739
740void panic_vm(struct kvm_vcpu *v); 740void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
741 741
742extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, 742extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
743 u64 arg4, u64 arg5, u64 arg6, u64 arg7); 743 u64 arg4, u64 arg5, u64 arg6, u64 arg7);
744
745extern long vmm_sanity;
746
744#endif 747#endif
745#endif /* __VCPU_H__ */ 748#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 2275bf4e681a..9eee5c04bacc 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -20,6 +20,7 @@
20 */ 20 */
21 21
22 22
23#include<linux/kernel.h>
23#include<linux/module.h> 24#include<linux/module.h>
24#include<asm/fpswa.h> 25#include<asm/fpswa.h>
25 26
@@ -31,6 +32,8 @@ MODULE_LICENSE("GPL");
31extern char kvm_ia64_ivt; 32extern char kvm_ia64_ivt;
32extern fpswa_interface_t *vmm_fpswa_interface; 33extern fpswa_interface_t *vmm_fpswa_interface;
33 34
35long vmm_sanity = 1;
36
34struct kvm_vmm_info vmm_info = { 37struct kvm_vmm_info vmm_info = {
35 .module = THIS_MODULE, 38 .module = THIS_MODULE,
36 .vmm_entry = vmm_entry, 39 .vmm_entry = vmm_entry,
@@ -62,5 +65,31 @@ void vmm_spin_unlock(spinlock_t *lock)
62{ 65{
63 _vmm_raw_spin_unlock(lock); 66 _vmm_raw_spin_unlock(lock);
64} 67}
68
69static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
70{
71 struct exit_ctl_data *p = &vcpu->arch.exit_data;
72 long psr;
73
74 local_irq_save(psr);
75 p->exit_reason = EXIT_REASON_DEBUG;
76 vmm_transition(vcpu);
77 local_irq_restore(psr);
78}
79
80asmlinkage int printk(const char *fmt, ...)
81{
82 struct kvm_vcpu *vcpu = current_vcpu;
83 va_list args;
84 int r;
85
86 memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
87 va_start(args, fmt);
88 r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
89 va_end(args);
90 vcpu_debug_exit(vcpu);
91 return r;
92}
93
65module_init(kvm_vmm_init) 94module_init(kvm_vmm_init)
66module_exit(kvm_vmm_exit) 95module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index c1d7251a1480..3ef1a017a318 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -1,5 +1,5 @@
1/* 1/*
2 * /ia64/kvm_ivt.S 2 * arch/ia64/kvm/vmm_ivt.S
3 * 3 *
4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co 4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com> 5 * Stephane Eranian <eranian@hpl.hp.com>
@@ -70,32 +70,39 @@
70# define PSR_DEFAULT_BITS 0 70# define PSR_DEFAULT_BITS 0
71#endif 71#endif
72 72
73
74#define KVM_FAULT(n) \ 73#define KVM_FAULT(n) \
75 kvm_fault_##n:; \ 74 kvm_fault_##n:; \
76 mov r19=n;; \ 75 mov r19=n;; \
77 br.sptk.many kvm_fault_##n; \ 76 br.sptk.many kvm_vmm_panic; \
78 ;; \ 77 ;; \
79
80 78
81#define KVM_REFLECT(n) \ 79#define KVM_REFLECT(n) \
82 mov r31=pr; \ 80 mov r31=pr; \
83 mov r19=n; /* prepare to save predicates */ \ 81 mov r19=n; /* prepare to save predicates */ \
84 mov r29=cr.ipsr; \ 82 mov r29=cr.ipsr; \
85 ;; \ 83 ;; \
86 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ 84 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
87(p7)br.sptk.many kvm_dispatch_reflection; \ 85(p7) br.sptk.many kvm_dispatch_reflection; \
88 br.sptk.many kvm_panic; \ 86 br.sptk.many kvm_vmm_panic; \
89 87
90 88GLOBAL_ENTRY(kvm_vmm_panic)
91GLOBAL_ENTRY(kvm_panic) 89 KVM_SAVE_MIN_WITH_COVER_R19
92 br.sptk.many kvm_panic 90 alloc r14=ar.pfs,0,0,1,0
93 ;; 91 mov out0=r15
94END(kvm_panic) 92 adds r3=8,r2 // set up second base pointer
95 93 ;;
96 94 ssm psr.ic
97 95 ;;
98 96 srlz.i // guarantee that interruption collection is on
97 ;;
98 //(p15) ssm psr.i // restore psr.i
99 addl r14=@gprel(ia64_leave_hypervisor),gp
100 ;;
101 KVM_SAVE_REST
102 mov rp=r14
103 ;;
104 br.call.sptk.many b6=vmm_panic_handler;
105END(kvm_vmm_panic)
99 106
100 .section .text.ivt,"ax" 107 .section .text.ivt,"ax"
101 108
@@ -105,308 +112,307 @@ kvm_ia64_ivt:
105/////////////////////////////////////////////////////////////// 112///////////////////////////////////////////////////////////////
106// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) 113// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
107ENTRY(kvm_vhpt_miss) 114ENTRY(kvm_vhpt_miss)
108 KVM_FAULT(0) 115 KVM_FAULT(0)
109END(kvm_vhpt_miss) 116END(kvm_vhpt_miss)
110 117
111
112 .org kvm_ia64_ivt+0x400 118 .org kvm_ia64_ivt+0x400
113//////////////////////////////////////////////////////////////// 119////////////////////////////////////////////////////////////////
114// 0x0400 Entry 1 (size 64 bundles) ITLB (21) 120// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
115ENTRY(kvm_itlb_miss) 121ENTRY(kvm_itlb_miss)
116 mov r31 = pr 122 mov r31 = pr
117 mov r29=cr.ipsr; 123 mov r29=cr.ipsr;
118 ;; 124 ;;
119 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; 125 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
120 (p6) br.sptk kvm_alt_itlb_miss 126(p6) br.sptk kvm_alt_itlb_miss
121 mov r19 = 1 127 mov r19 = 1
122 br.sptk kvm_itlb_miss_dispatch 128 br.sptk kvm_itlb_miss_dispatch
123 KVM_FAULT(1); 129 KVM_FAULT(1);
124END(kvm_itlb_miss) 130END(kvm_itlb_miss)
125 131
126 .org kvm_ia64_ivt+0x0800 132 .org kvm_ia64_ivt+0x0800
127////////////////////////////////////////////////////////////////// 133//////////////////////////////////////////////////////////////////
128// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) 134// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
129ENTRY(kvm_dtlb_miss) 135ENTRY(kvm_dtlb_miss)
130 mov r31 = pr 136 mov r31 = pr
131 mov r29=cr.ipsr; 137 mov r29=cr.ipsr;
132 ;; 138 ;;
133 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; 139 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
134(p6)br.sptk kvm_alt_dtlb_miss 140(p6) br.sptk kvm_alt_dtlb_miss
135 br.sptk kvm_dtlb_miss_dispatch 141 br.sptk kvm_dtlb_miss_dispatch
136END(kvm_dtlb_miss) 142END(kvm_dtlb_miss)
137 143
138 .org kvm_ia64_ivt+0x0c00 144 .org kvm_ia64_ivt+0x0c00
139//////////////////////////////////////////////////////////////////// 145////////////////////////////////////////////////////////////////////
140// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) 146// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
141ENTRY(kvm_alt_itlb_miss) 147ENTRY(kvm_alt_itlb_miss)
142 mov r16=cr.ifa // get address that caused the TLB miss 148 mov r16=cr.ifa // get address that caused the TLB miss
143 ;; 149 ;;
144 movl r17=PAGE_KERNEL 150 movl r17=PAGE_KERNEL
145 mov r24=cr.ipsr 151 mov r24=cr.ipsr
146 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 152 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
147 ;; 153 ;;
148 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits 154 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
149 ;; 155 ;;
150 or r19=r17,r19 // insert PTE control bits into r19 156 or r19=r17,r19 // insert PTE control bits into r19
151 ;; 157 ;;
152 movl r20=IA64_GRANULE_SHIFT<<2 158 movl r20=IA64_GRANULE_SHIFT<<2
153 ;; 159 ;;
154 mov cr.itir=r20 160 mov cr.itir=r20
155 ;; 161 ;;
156 itc.i r19 // insert the TLB entry 162 itc.i r19 // insert the TLB entry
157 mov pr=r31,-1 163 mov pr=r31,-1
158 rfi 164 rfi
159END(kvm_alt_itlb_miss) 165END(kvm_alt_itlb_miss)
160 166
161 .org kvm_ia64_ivt+0x1000 167 .org kvm_ia64_ivt+0x1000
162///////////////////////////////////////////////////////////////////// 168/////////////////////////////////////////////////////////////////////
163// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) 169// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
164ENTRY(kvm_alt_dtlb_miss) 170ENTRY(kvm_alt_dtlb_miss)
165 mov r16=cr.ifa // get address that caused the TLB miss 171 mov r16=cr.ifa // get address that caused the TLB miss
166 ;; 172 ;;
167 movl r17=PAGE_KERNEL 173 movl r17=PAGE_KERNEL
168 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 174 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
169 mov r24=cr.ipsr 175 mov r24=cr.ipsr
170 ;; 176 ;;
171 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits 177 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
172 ;; 178 ;;
173 or r19=r19,r17 // insert PTE control bits into r19 179 or r19=r19,r17 // insert PTE control bits into r19
174 ;; 180 ;;
175 movl r20=IA64_GRANULE_SHIFT<<2 181 movl r20=IA64_GRANULE_SHIFT<<2
176 ;; 182 ;;
177 mov cr.itir=r20 183 mov cr.itir=r20
178 ;; 184 ;;
179 itc.d r19 // insert the TLB entry 185 itc.d r19 // insert the TLB entry
180 mov pr=r31,-1 186 mov pr=r31,-1
181 rfi 187 rfi
182END(kvm_alt_dtlb_miss) 188END(kvm_alt_dtlb_miss)
183 189
184 .org kvm_ia64_ivt+0x1400 190 .org kvm_ia64_ivt+0x1400
185////////////////////////////////////////////////////////////////////// 191//////////////////////////////////////////////////////////////////////
186// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) 192// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
187ENTRY(kvm_nested_dtlb_miss) 193ENTRY(kvm_nested_dtlb_miss)
188 KVM_FAULT(5) 194 KVM_FAULT(5)
189END(kvm_nested_dtlb_miss) 195END(kvm_nested_dtlb_miss)
190 196
191 .org kvm_ia64_ivt+0x1800 197 .org kvm_ia64_ivt+0x1800
192///////////////////////////////////////////////////////////////////// 198/////////////////////////////////////////////////////////////////////
193// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) 199// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
194ENTRY(kvm_ikey_miss) 200ENTRY(kvm_ikey_miss)
195 KVM_REFLECT(6) 201 KVM_REFLECT(6)
196END(kvm_ikey_miss) 202END(kvm_ikey_miss)
197 203
198 .org kvm_ia64_ivt+0x1c00 204 .org kvm_ia64_ivt+0x1c00
199///////////////////////////////////////////////////////////////////// 205/////////////////////////////////////////////////////////////////////
200// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) 206// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
201ENTRY(kvm_dkey_miss) 207ENTRY(kvm_dkey_miss)
202 KVM_REFLECT(7) 208 KVM_REFLECT(7)
203END(kvm_dkey_miss) 209END(kvm_dkey_miss)
204 210
205 .org kvm_ia64_ivt+0x2000 211 .org kvm_ia64_ivt+0x2000
206//////////////////////////////////////////////////////////////////// 212////////////////////////////////////////////////////////////////////
207// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) 213// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
208ENTRY(kvm_dirty_bit) 214ENTRY(kvm_dirty_bit)
209 KVM_REFLECT(8) 215 KVM_REFLECT(8)
210END(kvm_dirty_bit) 216END(kvm_dirty_bit)
211 217
212 .org kvm_ia64_ivt+0x2400 218 .org kvm_ia64_ivt+0x2400
213//////////////////////////////////////////////////////////////////// 219////////////////////////////////////////////////////////////////////
214// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) 220// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
215ENTRY(kvm_iaccess_bit) 221ENTRY(kvm_iaccess_bit)
216 KVM_REFLECT(9) 222 KVM_REFLECT(9)
217END(kvm_iaccess_bit) 223END(kvm_iaccess_bit)
218 224
219 .org kvm_ia64_ivt+0x2800 225 .org kvm_ia64_ivt+0x2800
220/////////////////////////////////////////////////////////////////// 226///////////////////////////////////////////////////////////////////
221// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) 227// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
222ENTRY(kvm_daccess_bit) 228ENTRY(kvm_daccess_bit)
223 KVM_REFLECT(10) 229 KVM_REFLECT(10)
224END(kvm_daccess_bit) 230END(kvm_daccess_bit)
225 231
226 .org kvm_ia64_ivt+0x2c00 232 .org kvm_ia64_ivt+0x2c00
227///////////////////////////////////////////////////////////////// 233/////////////////////////////////////////////////////////////////
228// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) 234// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
229ENTRY(kvm_break_fault) 235ENTRY(kvm_break_fault)
230 mov r31=pr 236 mov r31=pr
231 mov r19=11 237 mov r19=11
232 mov r29=cr.ipsr 238 mov r29=cr.ipsr
233 ;; 239 ;;
234 KVM_SAVE_MIN_WITH_COVER_R19 240 KVM_SAVE_MIN_WITH_COVER_R19
235 ;; 241 ;;
236 alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) 242 alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
237 mov out0=cr.ifa 243 mov out0=cr.ifa
238 mov out2=cr.isr // FIXME: pity to make this slow access twice 244 mov out2=cr.isr // FIXME: pity to make this slow access twice
239 mov out3=cr.iim // FIXME: pity to make this slow access twice 245 mov out3=cr.iim // FIXME: pity to make this slow access twice
240 adds r3=8,r2 // set up second base pointer 246 adds r3=8,r2 // set up second base pointer
241 ;; 247 ;;
242 ssm psr.ic 248 ssm psr.ic
243 ;; 249 ;;
244 srlz.i // guarantee that interruption collection is on 250 srlz.i // guarantee that interruption collection is on
245 ;; 251 ;;
246 //(p15)ssm psr.i // restore psr.i 252 //(p15)ssm psr.i // restore psr.i
247 addl r14=@gprel(ia64_leave_hypervisor),gp 253 addl r14=@gprel(ia64_leave_hypervisor),gp
248 ;; 254 ;;
249 KVM_SAVE_REST 255 KVM_SAVE_REST
250 mov rp=r14 256 mov rp=r14
251 ;; 257 ;;
252 adds out1=16,sp 258 adds out1=16,sp
253 br.call.sptk.many b6=kvm_ia64_handle_break 259 br.call.sptk.many b6=kvm_ia64_handle_break
254 ;; 260 ;;
255END(kvm_break_fault) 261END(kvm_break_fault)
256 262
257 .org kvm_ia64_ivt+0x3000 263 .org kvm_ia64_ivt+0x3000
258///////////////////////////////////////////////////////////////// 264/////////////////////////////////////////////////////////////////
259// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) 265// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
260ENTRY(kvm_interrupt) 266ENTRY(kvm_interrupt)
261 mov r31=pr // prepare to save predicates 267 mov r31=pr // prepare to save predicates
262 mov r19=12 268 mov r19=12
263 mov r29=cr.ipsr 269 mov r29=cr.ipsr
264 ;; 270 ;;
265 tbit.z p6,p7=r29,IA64_PSR_VM_BIT 271 tbit.z p6,p7=r29,IA64_PSR_VM_BIT
266 tbit.z p0,p15=r29,IA64_PSR_I_BIT 272 tbit.z p0,p15=r29,IA64_PSR_I_BIT
267 ;; 273 ;;
268(p7) br.sptk kvm_dispatch_interrupt 274(p7) br.sptk kvm_dispatch_interrupt
269 ;; 275 ;;
270 mov r27=ar.rsc /* M */ 276 mov r27=ar.rsc /* M */
271 mov r20=r1 /* A */ 277 mov r20=r1 /* A */
272 mov r25=ar.unat /* M */ 278 mov r25=ar.unat /* M */
273 mov r26=ar.pfs /* I */ 279 mov r26=ar.pfs /* I */
274 mov r28=cr.iip /* M */ 280 mov r28=cr.iip /* M */
275 cover /* B (or nothing) */ 281 cover /* B (or nothing) */
276 ;; 282 ;;
277 mov r1=sp 283 mov r1=sp
278 ;; 284 ;;
279 invala /* M */ 285 invala /* M */
280 mov r30=cr.ifs 286 mov r30=cr.ifs
281 ;; 287 ;;
282 addl r1=-VMM_PT_REGS_SIZE,r1 288 addl r1=-VMM_PT_REGS_SIZE,r1
283 ;; 289 ;;
284 adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ 290 adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
285 adds r16=PT(CR_IPSR),r1 291 adds r16=PT(CR_IPSR),r1
286 ;; 292 ;;
287 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES 293 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
288 st8 [r16]=r29 /* save cr.ipsr */ 294 st8 [r16]=r29 /* save cr.ipsr */
289 ;; 295 ;;
290 lfetch.fault.excl.nt1 [r17] 296 lfetch.fault.excl.nt1 [r17]
291 mov r29=b0 297 mov r29=b0
292 ;; 298 ;;
293 adds r16=PT(R8),r1 /* initialize first base pointer */ 299 adds r16=PT(R8),r1 /* initialize first base pointer */
294 adds r17=PT(R9),r1 /* initialize second base pointer */ 300 adds r17=PT(R9),r1 /* initialize second base pointer */
295 mov r18=r0 /* make sure r18 isn't NaT */ 301 mov r18=r0 /* make sure r18 isn't NaT */
296 ;; 302 ;;
297.mem.offset 0,0; st8.spill [r16]=r8,16 303.mem.offset 0,0; st8.spill [r16]=r8,16
298.mem.offset 8,0; st8.spill [r17]=r9,16 304.mem.offset 8,0; st8.spill [r17]=r9,16
299 ;; 305 ;;
300.mem.offset 0,0; st8.spill [r16]=r10,24 306.mem.offset 0,0; st8.spill [r16]=r10,24
301.mem.offset 8,0; st8.spill [r17]=r11,24 307.mem.offset 8,0; st8.spill [r17]=r11,24
302 ;; 308 ;;
303 st8 [r16]=r28,16 /* save cr.iip */ 309 st8 [r16]=r28,16 /* save cr.iip */
304 st8 [r17]=r30,16 /* save cr.ifs */ 310 st8 [r17]=r30,16 /* save cr.ifs */
305 mov r8=ar.fpsr /* M */ 311 mov r8=ar.fpsr /* M */
306 mov r9=ar.csd 312 mov r9=ar.csd
307 mov r10=ar.ssd 313 mov r10=ar.ssd
308 movl r11=FPSR_DEFAULT /* L-unit */ 314 movl r11=FPSR_DEFAULT /* L-unit */
309 ;; 315 ;;
310 st8 [r16]=r25,16 /* save ar.unat */ 316 st8 [r16]=r25,16 /* save ar.unat */
311 st8 [r17]=r26,16 /* save ar.pfs */ 317 st8 [r17]=r26,16 /* save ar.pfs */
312 shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ 318 shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
313 ;; 319 ;;
314 st8 [r16]=r27,16 /* save ar.rsc */ 320 st8 [r16]=r27,16 /* save ar.rsc */
315 adds r17=16,r17 /* skip over ar_rnat field */ 321 adds r17=16,r17 /* skip over ar_rnat field */
316 ;; 322 ;;
317 st8 [r17]=r31,16 /* save predicates */ 323 st8 [r17]=r31,16 /* save predicates */
318 adds r16=16,r16 /* skip over ar_bspstore field */ 324 adds r16=16,r16 /* skip over ar_bspstore field */
319 ;; 325 ;;
320 st8 [r16]=r29,16 /* save b0 */ 326 st8 [r16]=r29,16 /* save b0 */
321 st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ 327 st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
322 ;; 328 ;;
323.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ 329.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
324.mem.offset 8,0; st8.spill [r17]=r12,16 330.mem.offset 8,0; st8.spill [r17]=r12,16
325 adds r12=-16,r1 331 adds r12=-16,r1
326 /* switch to kernel memory stack (with 16 bytes of scratch) */ 332 /* switch to kernel memory stack (with 16 bytes of scratch) */
327 ;; 333 ;;
328.mem.offset 0,0; st8.spill [r16]=r13,16 334.mem.offset 0,0; st8.spill [r16]=r13,16
329.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ 335.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
330 ;; 336 ;;
331.mem.offset 0,0; st8.spill [r16]=r15,16 337.mem.offset 0,0; st8.spill [r16]=r15,16
332.mem.offset 8,0; st8.spill [r17]=r14,16 338.mem.offset 8,0; st8.spill [r17]=r14,16
333 dep r14=-1,r0,60,4 339 dep r14=-1,r0,60,4
334 ;; 340 ;;
335.mem.offset 0,0; st8.spill [r16]=r2,16 341.mem.offset 0,0; st8.spill [r16]=r2,16
336.mem.offset 8,0; st8.spill [r17]=r3,16 342.mem.offset 8,0; st8.spill [r17]=r3,16
337 adds r2=VMM_PT_REGS_R16_OFFSET,r1 343 adds r2=VMM_PT_REGS_R16_OFFSET,r1
338 adds r14 = VMM_VCPU_GP_OFFSET,r13 344 adds r14 = VMM_VCPU_GP_OFFSET,r13
339 ;; 345 ;;
340 mov r8=ar.ccv 346 mov r8=ar.ccv
341 ld8 r14 = [r14] 347 ld8 r14 = [r14]
342 ;; 348 ;;
343 mov r1=r14 /* establish kernel global pointer */ 349 mov r1=r14 /* establish kernel global pointer */
344 ;; \ 350 ;; \
345 bsw.1 351 bsw.1
346 ;; 352 ;;
347 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group 353 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
348 mov out0=r13 354 mov out0=r13
349 ;; 355 ;;
350 ssm psr.ic 356 ssm psr.ic
351 ;; 357 ;;
352 srlz.i 358 srlz.i
353 ;; 359 ;;
354 //(p15) ssm psr.i 360 //(p15) ssm psr.i
355 adds r3=8,r2 // set up second base pointer for SAVE_REST 361 adds r3=8,r2 // set up second base pointer for SAVE_REST
356 srlz.i // ensure everybody knows psr.ic is back on 362 srlz.i // ensure everybody knows psr.ic is back on
357 ;; 363 ;;
358.mem.offset 0,0; st8.spill [r2]=r16,16 364.mem.offset 0,0; st8.spill [r2]=r16,16
359.mem.offset 8,0; st8.spill [r3]=r17,16 365.mem.offset 8,0; st8.spill [r3]=r17,16
360 ;; 366 ;;
361.mem.offset 0,0; st8.spill [r2]=r18,16 367.mem.offset 0,0; st8.spill [r2]=r18,16
362.mem.offset 8,0; st8.spill [r3]=r19,16 368.mem.offset 8,0; st8.spill [r3]=r19,16
363 ;; 369 ;;
364.mem.offset 0,0; st8.spill [r2]=r20,16 370.mem.offset 0,0; st8.spill [r2]=r20,16
365.mem.offset 8,0; st8.spill [r3]=r21,16 371.mem.offset 8,0; st8.spill [r3]=r21,16
366 mov r18=b6 372 mov r18=b6
367 ;; 373 ;;
368.mem.offset 0,0; st8.spill [r2]=r22,16 374.mem.offset 0,0; st8.spill [r2]=r22,16
369.mem.offset 8,0; st8.spill [r3]=r23,16 375.mem.offset 8,0; st8.spill [r3]=r23,16
370 mov r19=b7 376 mov r19=b7
371 ;; 377 ;;
372.mem.offset 0,0; st8.spill [r2]=r24,16 378.mem.offset 0,0; st8.spill [r2]=r24,16
373.mem.offset 8,0; st8.spill [r3]=r25,16 379.mem.offset 8,0; st8.spill [r3]=r25,16
374 ;; 380 ;;
375.mem.offset 0,0; st8.spill [r2]=r26,16 381.mem.offset 0,0; st8.spill [r2]=r26,16
376.mem.offset 8,0; st8.spill [r3]=r27,16 382.mem.offset 8,0; st8.spill [r3]=r27,16
377 ;; 383 ;;
378.mem.offset 0,0; st8.spill [r2]=r28,16 384.mem.offset 0,0; st8.spill [r2]=r28,16
379.mem.offset 8,0; st8.spill [r3]=r29,16 385.mem.offset 8,0; st8.spill [r3]=r29,16
380 ;; 386 ;;
381.mem.offset 0,0; st8.spill [r2]=r30,16 387.mem.offset 0,0; st8.spill [r2]=r30,16
382.mem.offset 8,0; st8.spill [r3]=r31,32 388.mem.offset 8,0; st8.spill [r3]=r31,32
383 ;; 389 ;;
384 mov ar.fpsr=r11 /* M-unit */ 390 mov ar.fpsr=r11 /* M-unit */
385 st8 [r2]=r8,8 /* ar.ccv */ 391 st8 [r2]=r8,8 /* ar.ccv */
386 adds r24=PT(B6)-PT(F7),r3 392 adds r24=PT(B6)-PT(F7),r3
387 ;; 393 ;;
388 stf.spill [r2]=f6,32 394 stf.spill [r2]=f6,32
389 stf.spill [r3]=f7,32 395 stf.spill [r3]=f7,32
390 ;; 396 ;;
391 stf.spill [r2]=f8,32 397 stf.spill [r2]=f8,32
392 stf.spill [r3]=f9,32 398 stf.spill [r3]=f9,32
393 ;; 399 ;;
394 stf.spill [r2]=f10 400 stf.spill [r2]=f10
395 stf.spill [r3]=f11 401 stf.spill [r3]=f11
396 adds r25=PT(B7)-PT(F11),r3 402 adds r25=PT(B7)-PT(F11),r3
397 ;; 403 ;;
398 st8 [r24]=r18,16 /* b6 */ 404 st8 [r24]=r18,16 /* b6 */
399 st8 [r25]=r19,16 /* b7 */ 405 st8 [r25]=r19,16 /* b7 */
400 ;; 406 ;;
401 st8 [r24]=r9 /* ar.csd */ 407 st8 [r24]=r9 /* ar.csd */
402 st8 [r25]=r10 /* ar.ssd */ 408 st8 [r25]=r10 /* ar.ssd */
403 ;; 409 ;;
404 srlz.d // make sure we see the effect of cr.ivr 410 srlz.d // make sure we see the effect of cr.ivr
405 addl r14=@gprel(ia64_leave_nested),gp 411 addl r14=@gprel(ia64_leave_nested),gp
406 ;; 412 ;;
407 mov rp=r14 413 mov rp=r14
408 br.call.sptk.many b6=kvm_ia64_handle_irq 414 br.call.sptk.many b6=kvm_ia64_handle_irq
409 ;; 415 ;;
410END(kvm_interrupt) 416END(kvm_interrupt)
411 417
412 .global kvm_dispatch_vexirq 418 .global kvm_dispatch_vexirq
@@ -414,387 +420,385 @@ END(kvm_interrupt)
414////////////////////////////////////////////////////////////////////// 420//////////////////////////////////////////////////////////////////////
415// 0x3400 Entry 13 (size 64 bundles) Reserved 421// 0x3400 Entry 13 (size 64 bundles) Reserved
416ENTRY(kvm_virtual_exirq) 422ENTRY(kvm_virtual_exirq)
417 mov r31=pr 423 mov r31=pr
418 mov r19=13 424 mov r19=13
419 mov r30 =r0 425 mov r30 =r0
420 ;; 426 ;;
421kvm_dispatch_vexirq: 427kvm_dispatch_vexirq:
422 cmp.eq p6,p0 = 1,r30 428 cmp.eq p6,p0 = 1,r30
423 ;; 429 ;;
424(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 430(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
425 ;; 431 ;;
426(p6)ld8 r1 = [r29] 432(p6) ld8 r1 = [r29]
427 ;; 433 ;;
428 KVM_SAVE_MIN_WITH_COVER_R19 434 KVM_SAVE_MIN_WITH_COVER_R19
429 alloc r14=ar.pfs,0,0,1,0 435 alloc r14=ar.pfs,0,0,1,0
430 mov out0=r13 436 mov out0=r13
431 437
432 ssm psr.ic 438 ssm psr.ic
433 ;; 439 ;;
434 srlz.i // guarantee that interruption collection is on 440 srlz.i // guarantee that interruption collection is on
435 ;; 441 ;;
436 //(p15) ssm psr.i // restore psr.i 442 //(p15) ssm psr.i // restore psr.i
437 adds r3=8,r2 // set up second base pointer 443 adds r3=8,r2 // set up second base pointer
438 ;; 444 ;;
439 KVM_SAVE_REST 445 KVM_SAVE_REST
440 addl r14=@gprel(ia64_leave_hypervisor),gp 446 addl r14=@gprel(ia64_leave_hypervisor),gp
441 ;; 447 ;;
442 mov rp=r14 448 mov rp=r14
443 br.call.sptk.many b6=kvm_vexirq 449 br.call.sptk.many b6=kvm_vexirq
444END(kvm_virtual_exirq) 450END(kvm_virtual_exirq)
445 451
446 .org kvm_ia64_ivt+0x3800 452 .org kvm_ia64_ivt+0x3800
447///////////////////////////////////////////////////////////////////// 453/////////////////////////////////////////////////////////////////////
448// 0x3800 Entry 14 (size 64 bundles) Reserved 454// 0x3800 Entry 14 (size 64 bundles) Reserved
449 KVM_FAULT(14) 455 KVM_FAULT(14)
450 // this code segment is from 2.6.16.13 456 // this code segment is from 2.6.16.13
451
452 457
453 .org kvm_ia64_ivt+0x3c00 458 .org kvm_ia64_ivt+0x3c00
454/////////////////////////////////////////////////////////////////////// 459///////////////////////////////////////////////////////////////////////
455// 0x3c00 Entry 15 (size 64 bundles) Reserved 460// 0x3c00 Entry 15 (size 64 bundles) Reserved
456 KVM_FAULT(15) 461 KVM_FAULT(15)
457
458 462
459 .org kvm_ia64_ivt+0x4000 463 .org kvm_ia64_ivt+0x4000
460/////////////////////////////////////////////////////////////////////// 464///////////////////////////////////////////////////////////////////////
461// 0x4000 Entry 16 (size 64 bundles) Reserved 465// 0x4000 Entry 16 (size 64 bundles) Reserved
462 KVM_FAULT(16) 466 KVM_FAULT(16)
463 467
464 .org kvm_ia64_ivt+0x4400 468 .org kvm_ia64_ivt+0x4400
465////////////////////////////////////////////////////////////////////// 469//////////////////////////////////////////////////////////////////////
466// 0x4400 Entry 17 (size 64 bundles) Reserved 470// 0x4400 Entry 17 (size 64 bundles) Reserved
467 KVM_FAULT(17) 471 KVM_FAULT(17)
468 472
469 .org kvm_ia64_ivt+0x4800 473 .org kvm_ia64_ivt+0x4800
470////////////////////////////////////////////////////////////////////// 474//////////////////////////////////////////////////////////////////////
471// 0x4800 Entry 18 (size 64 bundles) Reserved 475// 0x4800 Entry 18 (size 64 bundles) Reserved
472 KVM_FAULT(18) 476 KVM_FAULT(18)
473 477
474 .org kvm_ia64_ivt+0x4c00 478 .org kvm_ia64_ivt+0x4c00
475////////////////////////////////////////////////////////////////////// 479//////////////////////////////////////////////////////////////////////
476// 0x4c00 Entry 19 (size 64 bundles) Reserved 480// 0x4c00 Entry 19 (size 64 bundles) Reserved
477 KVM_FAULT(19) 481 KVM_FAULT(19)
478 482
479 .org kvm_ia64_ivt+0x5000 483 .org kvm_ia64_ivt+0x5000
480////////////////////////////////////////////////////////////////////// 484//////////////////////////////////////////////////////////////////////
481// 0x5000 Entry 20 (size 16 bundles) Page Not Present 485// 0x5000 Entry 20 (size 16 bundles) Page Not Present
482ENTRY(kvm_page_not_present) 486ENTRY(kvm_page_not_present)
483 KVM_REFLECT(20) 487 KVM_REFLECT(20)
484END(kvm_page_not_present) 488END(kvm_page_not_present)
485 489
486 .org kvm_ia64_ivt+0x5100 490 .org kvm_ia64_ivt+0x5100
487/////////////////////////////////////////////////////////////////////// 491///////////////////////////////////////////////////////////////////////
488// 0x5100 Entry 21 (size 16 bundles) Key Permission vector 492// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
489ENTRY(kvm_key_permission) 493ENTRY(kvm_key_permission)
490 KVM_REFLECT(21) 494 KVM_REFLECT(21)
491END(kvm_key_permission) 495END(kvm_key_permission)
492 496
493 .org kvm_ia64_ivt+0x5200 497 .org kvm_ia64_ivt+0x5200
494////////////////////////////////////////////////////////////////////// 498//////////////////////////////////////////////////////////////////////
495// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) 499// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
496ENTRY(kvm_iaccess_rights) 500ENTRY(kvm_iaccess_rights)
497 KVM_REFLECT(22) 501 KVM_REFLECT(22)
498END(kvm_iaccess_rights) 502END(kvm_iaccess_rights)
499 503
500 .org kvm_ia64_ivt+0x5300 504 .org kvm_ia64_ivt+0x5300
501////////////////////////////////////////////////////////////////////// 505//////////////////////////////////////////////////////////////////////
502// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) 506// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
503ENTRY(kvm_daccess_rights) 507ENTRY(kvm_daccess_rights)
504 KVM_REFLECT(23) 508 KVM_REFLECT(23)
505END(kvm_daccess_rights) 509END(kvm_daccess_rights)
506 510
507 .org kvm_ia64_ivt+0x5400 511 .org kvm_ia64_ivt+0x5400
508///////////////////////////////////////////////////////////////////// 512/////////////////////////////////////////////////////////////////////
509// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) 513// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
510ENTRY(kvm_general_exception) 514ENTRY(kvm_general_exception)
511 KVM_REFLECT(24) 515 KVM_REFLECT(24)
512 KVM_FAULT(24) 516 KVM_FAULT(24)
513END(kvm_general_exception) 517END(kvm_general_exception)
514 518
515 .org kvm_ia64_ivt+0x5500 519 .org kvm_ia64_ivt+0x5500
516////////////////////////////////////////////////////////////////////// 520//////////////////////////////////////////////////////////////////////
517// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) 521// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
518ENTRY(kvm_disabled_fp_reg) 522ENTRY(kvm_disabled_fp_reg)
519 KVM_REFLECT(25) 523 KVM_REFLECT(25)
520END(kvm_disabled_fp_reg) 524END(kvm_disabled_fp_reg)
521 525
522 .org kvm_ia64_ivt+0x5600 526 .org kvm_ia64_ivt+0x5600
523//////////////////////////////////////////////////////////////////// 527////////////////////////////////////////////////////////////////////
524// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) 528// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
525ENTRY(kvm_nat_consumption) 529ENTRY(kvm_nat_consumption)
526 KVM_REFLECT(26) 530 KVM_REFLECT(26)
527END(kvm_nat_consumption) 531END(kvm_nat_consumption)
528 532
529 .org kvm_ia64_ivt+0x5700 533 .org kvm_ia64_ivt+0x5700
530///////////////////////////////////////////////////////////////////// 534/////////////////////////////////////////////////////////////////////
531// 0x5700 Entry 27 (size 16 bundles) Speculation (40) 535// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
532ENTRY(kvm_speculation_vector) 536ENTRY(kvm_speculation_vector)
533 KVM_REFLECT(27) 537 KVM_REFLECT(27)
534END(kvm_speculation_vector) 538END(kvm_speculation_vector)
535 539
536 .org kvm_ia64_ivt+0x5800 540 .org kvm_ia64_ivt+0x5800
537///////////////////////////////////////////////////////////////////// 541/////////////////////////////////////////////////////////////////////
538// 0x5800 Entry 28 (size 16 bundles) Reserved 542// 0x5800 Entry 28 (size 16 bundles) Reserved
539 KVM_FAULT(28) 543 KVM_FAULT(28)
540 544
541 .org kvm_ia64_ivt+0x5900 545 .org kvm_ia64_ivt+0x5900
542/////////////////////////////////////////////////////////////////// 546///////////////////////////////////////////////////////////////////
543// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) 547// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
544ENTRY(kvm_debug_vector) 548ENTRY(kvm_debug_vector)
545 KVM_FAULT(29) 549 KVM_FAULT(29)
546END(kvm_debug_vector) 550END(kvm_debug_vector)
547 551
548 .org kvm_ia64_ivt+0x5a00 552 .org kvm_ia64_ivt+0x5a00
549/////////////////////////////////////////////////////////////// 553///////////////////////////////////////////////////////////////
550// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) 554// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
551ENTRY(kvm_unaligned_access) 555ENTRY(kvm_unaligned_access)
552 KVM_REFLECT(30) 556 KVM_REFLECT(30)
553END(kvm_unaligned_access) 557END(kvm_unaligned_access)
554 558
555 .org kvm_ia64_ivt+0x5b00 559 .org kvm_ia64_ivt+0x5b00
556////////////////////////////////////////////////////////////////////// 560//////////////////////////////////////////////////////////////////////
557// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) 561// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
558ENTRY(kvm_unsupported_data_reference) 562ENTRY(kvm_unsupported_data_reference)
559 KVM_REFLECT(31) 563 KVM_REFLECT(31)
560END(kvm_unsupported_data_reference) 564END(kvm_unsupported_data_reference)
561 565
562 .org kvm_ia64_ivt+0x5c00 566 .org kvm_ia64_ivt+0x5c00
563//////////////////////////////////////////////////////////////////// 567////////////////////////////////////////////////////////////////////
564// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) 568// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
565ENTRY(kvm_floating_point_fault) 569ENTRY(kvm_floating_point_fault)
566 KVM_REFLECT(32) 570 KVM_REFLECT(32)
567END(kvm_floating_point_fault) 571END(kvm_floating_point_fault)
568 572
569 .org kvm_ia64_ivt+0x5d00 573 .org kvm_ia64_ivt+0x5d00
570///////////////////////////////////////////////////////////////////// 574/////////////////////////////////////////////////////////////////////
571// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) 575// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
572ENTRY(kvm_floating_point_trap) 576ENTRY(kvm_floating_point_trap)
573 KVM_REFLECT(33) 577 KVM_REFLECT(33)
574END(kvm_floating_point_trap) 578END(kvm_floating_point_trap)
575 579
576 .org kvm_ia64_ivt+0x5e00 580 .org kvm_ia64_ivt+0x5e00
577////////////////////////////////////////////////////////////////////// 581//////////////////////////////////////////////////////////////////////
578// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) 582// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
579ENTRY(kvm_lower_privilege_trap) 583ENTRY(kvm_lower_privilege_trap)
580 KVM_REFLECT(34) 584 KVM_REFLECT(34)
581END(kvm_lower_privilege_trap) 585END(kvm_lower_privilege_trap)
582 586
583 .org kvm_ia64_ivt+0x5f00 587 .org kvm_ia64_ivt+0x5f00
584////////////////////////////////////////////////////////////////////// 588//////////////////////////////////////////////////////////////////////
585// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) 589// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
586ENTRY(kvm_taken_branch_trap) 590ENTRY(kvm_taken_branch_trap)
587 KVM_REFLECT(35) 591 KVM_REFLECT(35)
588END(kvm_taken_branch_trap) 592END(kvm_taken_branch_trap)
589 593
590 .org kvm_ia64_ivt+0x6000 594 .org kvm_ia64_ivt+0x6000
591//////////////////////////////////////////////////////////////////// 595////////////////////////////////////////////////////////////////////
592// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) 596// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
593ENTRY(kvm_single_step_trap) 597ENTRY(kvm_single_step_trap)
594 KVM_REFLECT(36) 598 KVM_REFLECT(36)
595END(kvm_single_step_trap) 599END(kvm_single_step_trap)
596 .global kvm_virtualization_fault_back 600 .global kvm_virtualization_fault_back
597 .org kvm_ia64_ivt+0x6100 601 .org kvm_ia64_ivt+0x6100
598///////////////////////////////////////////////////////////////////// 602/////////////////////////////////////////////////////////////////////
599// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault 603// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
600ENTRY(kvm_virtualization_fault) 604ENTRY(kvm_virtualization_fault)
601 mov r31=pr 605 mov r31=pr
602 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 606 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
603 ;; 607 ;;
604 st8 [r16] = r1 608 st8 [r16] = r1
605 adds r17 = VMM_VCPU_GP_OFFSET, r21 609 adds r17 = VMM_VCPU_GP_OFFSET, r21
606 ;; 610 ;;
607 ld8 r1 = [r17] 611 ld8 r1 = [r17]
608 cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 612 cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
609 cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 613 cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
610 cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 614 cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
611 cmp.eq p9,p0=EVENT_RSM,r24 615 cmp.eq p9,p0=EVENT_RSM,r24
612 cmp.eq p10,p0=EVENT_SSM,r24 616 cmp.eq p10,p0=EVENT_SSM,r24
613 cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 617 cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
614 cmp.eq p12,p0=EVENT_THASH,r24 618 cmp.eq p12,p0=EVENT_THASH,r24
615 (p6) br.dptk.many kvm_asm_mov_from_ar 619(p6) br.dptk.many kvm_asm_mov_from_ar
616 (p7) br.dptk.many kvm_asm_mov_from_rr 620(p7) br.dptk.many kvm_asm_mov_from_rr
617 (p8) br.dptk.many kvm_asm_mov_to_rr 621(p8) br.dptk.many kvm_asm_mov_to_rr
618 (p9) br.dptk.many kvm_asm_rsm 622(p9) br.dptk.many kvm_asm_rsm
619 (p10) br.dptk.many kvm_asm_ssm 623(p10) br.dptk.many kvm_asm_ssm
620 (p11) br.dptk.many kvm_asm_mov_to_psr 624(p11) br.dptk.many kvm_asm_mov_to_psr
621 (p12) br.dptk.many kvm_asm_thash 625(p12) br.dptk.many kvm_asm_thash
622 ;; 626 ;;
623kvm_virtualization_fault_back: 627kvm_virtualization_fault_back:
624 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 628 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
625 ;; 629 ;;
626 ld8 r1 = [r16] 630 ld8 r1 = [r16]
627 ;; 631 ;;
628 mov r19=37 632 mov r19=37
629 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 633 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
630 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 634 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
631 ;; 635 ;;
632 st8 [r16] = r24 636 st8 [r16] = r24
633 st8 [r17] = r25 637 st8 [r17] = r25
634 ;; 638 ;;
635 cmp.ne p6,p0=EVENT_RFI, r24 639 cmp.ne p6,p0=EVENT_RFI, r24
636 (p6) br.sptk kvm_dispatch_virtualization_fault 640(p6) br.sptk kvm_dispatch_virtualization_fault
637 ;; 641 ;;
638 adds r18=VMM_VPD_BASE_OFFSET,r21 642 adds r18=VMM_VPD_BASE_OFFSET,r21
639 ;; 643 ;;
640 ld8 r18=[r18] 644 ld8 r18=[r18]
641 ;; 645 ;;
642 adds r18=VMM_VPD_VIFS_OFFSET,r18 646 adds r18=VMM_VPD_VIFS_OFFSET,r18
643 ;; 647 ;;
644 ld8 r18=[r18] 648 ld8 r18=[r18]
645 ;; 649 ;;
646 tbit.z p6,p0=r18,63 650 tbit.z p6,p0=r18,63
647 (p6) br.sptk kvm_dispatch_virtualization_fault 651(p6) br.sptk kvm_dispatch_virtualization_fault
648 ;; 652 ;;
649 //if vifs.v=1 desert current register frame 653//if vifs.v=1 desert current register frame
650 alloc r18=ar.pfs,0,0,0,0 654 alloc r18=ar.pfs,0,0,0,0
651 br.sptk kvm_dispatch_virtualization_fault 655 br.sptk kvm_dispatch_virtualization_fault
652END(kvm_virtualization_fault) 656END(kvm_virtualization_fault)
653 657
654 .org kvm_ia64_ivt+0x6200 658 .org kvm_ia64_ivt+0x6200
655////////////////////////////////////////////////////////////// 659//////////////////////////////////////////////////////////////
656// 0x6200 Entry 38 (size 16 bundles) Reserved 660// 0x6200 Entry 38 (size 16 bundles) Reserved
657 KVM_FAULT(38) 661 KVM_FAULT(38)
658 662
659 .org kvm_ia64_ivt+0x6300 663 .org kvm_ia64_ivt+0x6300
660///////////////////////////////////////////////////////////////// 664/////////////////////////////////////////////////////////////////
661// 0x6300 Entry 39 (size 16 bundles) Reserved 665// 0x6300 Entry 39 (size 16 bundles) Reserved
662 KVM_FAULT(39) 666 KVM_FAULT(39)
663 667
664 .org kvm_ia64_ivt+0x6400 668 .org kvm_ia64_ivt+0x6400
665///////////////////////////////////////////////////////////////// 669/////////////////////////////////////////////////////////////////
666// 0x6400 Entry 40 (size 16 bundles) Reserved 670// 0x6400 Entry 40 (size 16 bundles) Reserved
667 KVM_FAULT(40) 671 KVM_FAULT(40)
668 672
669 .org kvm_ia64_ivt+0x6500 673 .org kvm_ia64_ivt+0x6500
670////////////////////////////////////////////////////////////////// 674//////////////////////////////////////////////////////////////////
671// 0x6500 Entry 41 (size 16 bundles) Reserved 675// 0x6500 Entry 41 (size 16 bundles) Reserved
672 KVM_FAULT(41) 676 KVM_FAULT(41)
673 677
674 .org kvm_ia64_ivt+0x6600 678 .org kvm_ia64_ivt+0x6600
675////////////////////////////////////////////////////////////////// 679//////////////////////////////////////////////////////////////////
676// 0x6600 Entry 42 (size 16 bundles) Reserved 680// 0x6600 Entry 42 (size 16 bundles) Reserved
677 KVM_FAULT(42) 681 KVM_FAULT(42)
678 682
679 .org kvm_ia64_ivt+0x6700 683 .org kvm_ia64_ivt+0x6700
680////////////////////////////////////////////////////////////////// 684//////////////////////////////////////////////////////////////////
681// 0x6700 Entry 43 (size 16 bundles) Reserved 685// 0x6700 Entry 43 (size 16 bundles) Reserved
682 KVM_FAULT(43) 686 KVM_FAULT(43)
683 687
684 .org kvm_ia64_ivt+0x6800 688 .org kvm_ia64_ivt+0x6800
685////////////////////////////////////////////////////////////////// 689//////////////////////////////////////////////////////////////////
686// 0x6800 Entry 44 (size 16 bundles) Reserved 690// 0x6800 Entry 44 (size 16 bundles) Reserved
687 KVM_FAULT(44) 691 KVM_FAULT(44)
688 692
689 .org kvm_ia64_ivt+0x6900 693 .org kvm_ia64_ivt+0x6900
690/////////////////////////////////////////////////////////////////// 694///////////////////////////////////////////////////////////////////
691// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception 695// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
692//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) 696//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
693ENTRY(kvm_ia32_exception) 697ENTRY(kvm_ia32_exception)
694 KVM_FAULT(45) 698 KVM_FAULT(45)
695END(kvm_ia32_exception) 699END(kvm_ia32_exception)
696 700
697 .org kvm_ia64_ivt+0x6a00 701 .org kvm_ia64_ivt+0x6a00
698//////////////////////////////////////////////////////////////////// 702////////////////////////////////////////////////////////////////////
699// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) 703// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
700ENTRY(kvm_ia32_intercept) 704ENTRY(kvm_ia32_intercept)
701 KVM_FAULT(47) 705 KVM_FAULT(47)
702END(kvm_ia32_intercept) 706END(kvm_ia32_intercept)
703 707
704 .org kvm_ia64_ivt+0x6c00 708 .org kvm_ia64_ivt+0x6c00
705///////////////////////////////////////////////////////////////////// 709/////////////////////////////////////////////////////////////////////
706// 0x6c00 Entry 48 (size 16 bundles) Reserved 710// 0x6c00 Entry 48 (size 16 bundles) Reserved
707 KVM_FAULT(48) 711 KVM_FAULT(48)
708 712
709 .org kvm_ia64_ivt+0x6d00 713 .org kvm_ia64_ivt+0x6d00
710////////////////////////////////////////////////////////////////////// 714//////////////////////////////////////////////////////////////////////
711// 0x6d00 Entry 49 (size 16 bundles) Reserved 715// 0x6d00 Entry 49 (size 16 bundles) Reserved
712 KVM_FAULT(49) 716 KVM_FAULT(49)
713 717
714 .org kvm_ia64_ivt+0x6e00 718 .org kvm_ia64_ivt+0x6e00
715////////////////////////////////////////////////////////////////////// 719//////////////////////////////////////////////////////////////////////
716// 0x6e00 Entry 50 (size 16 bundles) Reserved 720// 0x6e00 Entry 50 (size 16 bundles) Reserved
717 KVM_FAULT(50) 721 KVM_FAULT(50)
718 722
719 .org kvm_ia64_ivt+0x6f00 723 .org kvm_ia64_ivt+0x6f00
720///////////////////////////////////////////////////////////////////// 724/////////////////////////////////////////////////////////////////////
721// 0x6f00 Entry 51 (size 16 bundles) Reserved 725// 0x6f00 Entry 51 (size 16 bundles) Reserved
722 KVM_FAULT(52) 726 KVM_FAULT(52)
723 727
724 .org kvm_ia64_ivt+0x7100 728 .org kvm_ia64_ivt+0x7100
725//////////////////////////////////////////////////////////////////// 729////////////////////////////////////////////////////////////////////
726// 0x7100 Entry 53 (size 16 bundles) Reserved 730// 0x7100 Entry 53 (size 16 bundles) Reserved
727 KVM_FAULT(53) 731 KVM_FAULT(53)
728 732
729 .org kvm_ia64_ivt+0x7200 733 .org kvm_ia64_ivt+0x7200
730///////////////////////////////////////////////////////////////////// 734/////////////////////////////////////////////////////////////////////
731// 0x7200 Entry 54 (size 16 bundles) Reserved 735// 0x7200 Entry 54 (size 16 bundles) Reserved
732 KVM_FAULT(54) 736 KVM_FAULT(54)
733 737
734 .org kvm_ia64_ivt+0x7300 738 .org kvm_ia64_ivt+0x7300
735//////////////////////////////////////////////////////////////////// 739////////////////////////////////////////////////////////////////////
736// 0x7300 Entry 55 (size 16 bundles) Reserved 740// 0x7300 Entry 55 (size 16 bundles) Reserved
737 KVM_FAULT(55) 741 KVM_FAULT(55)
738 742
739 .org kvm_ia64_ivt+0x7400 743 .org kvm_ia64_ivt+0x7400
740//////////////////////////////////////////////////////////////////// 744////////////////////////////////////////////////////////////////////
741// 0x7400 Entry 56 (size 16 bundles) Reserved 745// 0x7400 Entry 56 (size 16 bundles) Reserved
742 KVM_FAULT(56) 746 KVM_FAULT(56)
743 747
744 .org kvm_ia64_ivt+0x7500 748 .org kvm_ia64_ivt+0x7500
745///////////////////////////////////////////////////////////////////// 749/////////////////////////////////////////////////////////////////////
746// 0x7500 Entry 57 (size 16 bundles) Reserved 750// 0x7500 Entry 57 (size 16 bundles) Reserved
747 KVM_FAULT(57) 751 KVM_FAULT(57)
748 752
749 .org kvm_ia64_ivt+0x7600 753 .org kvm_ia64_ivt+0x7600
750///////////////////////////////////////////////////////////////////// 754/////////////////////////////////////////////////////////////////////
751// 0x7600 Entry 58 (size 16 bundles) Reserved 755// 0x7600 Entry 58 (size 16 bundles) Reserved
752 KVM_FAULT(58) 756 KVM_FAULT(58)
753 757
754 .org kvm_ia64_ivt+0x7700 758 .org kvm_ia64_ivt+0x7700
755//////////////////////////////////////////////////////////////////// 759////////////////////////////////////////////////////////////////////
756// 0x7700 Entry 59 (size 16 bundles) Reserved 760// 0x7700 Entry 59 (size 16 bundles) Reserved
757 KVM_FAULT(59) 761 KVM_FAULT(59)
758 762
759 .org kvm_ia64_ivt+0x7800 763 .org kvm_ia64_ivt+0x7800
760//////////////////////////////////////////////////////////////////// 764////////////////////////////////////////////////////////////////////
761// 0x7800 Entry 60 (size 16 bundles) Reserved 765// 0x7800 Entry 60 (size 16 bundles) Reserved
762 KVM_FAULT(60) 766 KVM_FAULT(60)
763 767
764 .org kvm_ia64_ivt+0x7900 768 .org kvm_ia64_ivt+0x7900
765///////////////////////////////////////////////////////////////////// 769/////////////////////////////////////////////////////////////////////
766// 0x7900 Entry 61 (size 16 bundles) Reserved 770// 0x7900 Entry 61 (size 16 bundles) Reserved
767 KVM_FAULT(61) 771 KVM_FAULT(61)
768 772
769 .org kvm_ia64_ivt+0x7a00 773 .org kvm_ia64_ivt+0x7a00
770///////////////////////////////////////////////////////////////////// 774/////////////////////////////////////////////////////////////////////
771// 0x7a00 Entry 62 (size 16 bundles) Reserved 775// 0x7a00 Entry 62 (size 16 bundles) Reserved
772 KVM_FAULT(62) 776 KVM_FAULT(62)
773 777
774 .org kvm_ia64_ivt+0x7b00 778 .org kvm_ia64_ivt+0x7b00
775///////////////////////////////////////////////////////////////////// 779/////////////////////////////////////////////////////////////////////
776// 0x7b00 Entry 63 (size 16 bundles) Reserved 780// 0x7b00 Entry 63 (size 16 bundles) Reserved
777 KVM_FAULT(63) 781 KVM_FAULT(63)
778 782
779 .org kvm_ia64_ivt+0x7c00 783 .org kvm_ia64_ivt+0x7c00
780//////////////////////////////////////////////////////////////////// 784////////////////////////////////////////////////////////////////////
781// 0x7c00 Entry 64 (size 16 bundles) Reserved 785// 0x7c00 Entry 64 (size 16 bundles) Reserved
782 KVM_FAULT(64) 786 KVM_FAULT(64)
783 787
784 .org kvm_ia64_ivt+0x7d00 788 .org kvm_ia64_ivt+0x7d00
785///////////////////////////////////////////////////////////////////// 789/////////////////////////////////////////////////////////////////////
786// 0x7d00 Entry 65 (size 16 bundles) Reserved 790// 0x7d00 Entry 65 (size 16 bundles) Reserved
787 KVM_FAULT(65) 791 KVM_FAULT(65)
788 792
789 .org kvm_ia64_ivt+0x7e00 793 .org kvm_ia64_ivt+0x7e00
790///////////////////////////////////////////////////////////////////// 794/////////////////////////////////////////////////////////////////////
791// 0x7e00 Entry 66 (size 16 bundles) Reserved 795// 0x7e00 Entry 66 (size 16 bundles) Reserved
792 KVM_FAULT(66) 796 KVM_FAULT(66)
793 797
794 .org kvm_ia64_ivt+0x7f00 798 .org kvm_ia64_ivt+0x7f00
795//////////////////////////////////////////////////////////////////// 799////////////////////////////////////////////////////////////////////
796// 0x7f00 Entry 67 (size 16 bundles) Reserved 800// 0x7f00 Entry 67 (size 16 bundles) Reserved
797 KVM_FAULT(67) 801 KVM_FAULT(67)
798 802
799 .org kvm_ia64_ivt+0x8000 803 .org kvm_ia64_ivt+0x8000
800// There is no particular reason for this code to be here, other than that 804// There is no particular reason for this code to be here, other than that
@@ -804,132 +808,128 @@ END(kvm_ia32_intercept)
804 808
805 809
806ENTRY(kvm_dtlb_miss_dispatch) 810ENTRY(kvm_dtlb_miss_dispatch)
807 mov r19 = 2 811 mov r19 = 2
808 KVM_SAVE_MIN_WITH_COVER_R19 812 KVM_SAVE_MIN_WITH_COVER_R19
809 alloc r14=ar.pfs,0,0,3,0 813 alloc r14=ar.pfs,0,0,3,0
810 mov out0=cr.ifa 814 mov out0=cr.ifa
811 mov out1=r15 815 mov out1=r15
812 adds r3=8,r2 // set up second base pointer 816 adds r3=8,r2 // set up second base pointer
813 ;; 817 ;;
814 ssm psr.ic 818 ssm psr.ic
815 ;; 819 ;;
816 srlz.i // guarantee that interruption collection is on 820 srlz.i // guarantee that interruption collection is on
817 ;; 821 ;;
818 //(p15) ssm psr.i // restore psr.i 822 //(p15) ssm psr.i // restore psr.i
819 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 823 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
820 ;; 824 ;;
821 KVM_SAVE_REST 825 KVM_SAVE_REST
822 KVM_SAVE_EXTRA 826 KVM_SAVE_EXTRA
823 mov rp=r14 827 mov rp=r14
824 ;; 828 ;;
825 adds out2=16,r12 829 adds out2=16,r12
826 br.call.sptk.many b6=kvm_page_fault 830 br.call.sptk.many b6=kvm_page_fault
827END(kvm_dtlb_miss_dispatch) 831END(kvm_dtlb_miss_dispatch)
828 832
829ENTRY(kvm_itlb_miss_dispatch) 833ENTRY(kvm_itlb_miss_dispatch)
830 834
831 KVM_SAVE_MIN_WITH_COVER_R19 835 KVM_SAVE_MIN_WITH_COVER_R19
832 alloc r14=ar.pfs,0,0,3,0 836 alloc r14=ar.pfs,0,0,3,0
833 mov out0=cr.ifa 837 mov out0=cr.ifa
834 mov out1=r15 838 mov out1=r15
835 adds r3=8,r2 // set up second base pointer 839 adds r3=8,r2 // set up second base pointer
836 ;; 840 ;;
837 ssm psr.ic 841 ssm psr.ic
838 ;; 842 ;;
839 srlz.i // guarantee that interruption collection is on 843 srlz.i // guarantee that interruption collection is on
840 ;; 844 ;;
841 //(p15) ssm psr.i // restore psr.i 845 //(p15) ssm psr.i // restore psr.i
842 addl r14=@gprel(ia64_leave_hypervisor),gp 846 addl r14=@gprel(ia64_leave_hypervisor),gp
843 ;; 847 ;;
844 KVM_SAVE_REST 848 KVM_SAVE_REST
845 mov rp=r14 849 mov rp=r14
846 ;; 850 ;;
847 adds out2=16,r12 851 adds out2=16,r12
848 br.call.sptk.many b6=kvm_page_fault 852 br.call.sptk.many b6=kvm_page_fault
849END(kvm_itlb_miss_dispatch) 853END(kvm_itlb_miss_dispatch)
850 854
851ENTRY(kvm_dispatch_reflection) 855ENTRY(kvm_dispatch_reflection)
852 /* 856/*
853 * Input: 857 * Input:
854 * psr.ic: off 858 * psr.ic: off
855 * r19: intr type (offset into ivt, see ia64_int.h) 859 * r19: intr type (offset into ivt, see ia64_int.h)
856 * r31: contains saved predicates (pr) 860 * r31: contains saved predicates (pr)
857 */ 861 */
858 KVM_SAVE_MIN_WITH_COVER_R19 862 KVM_SAVE_MIN_WITH_COVER_R19
859 alloc r14=ar.pfs,0,0,5,0 863 alloc r14=ar.pfs,0,0,5,0
860 mov out0=cr.ifa 864 mov out0=cr.ifa
861 mov out1=cr.isr 865 mov out1=cr.isr
862 mov out2=cr.iim 866 mov out2=cr.iim
863 mov out3=r15 867 mov out3=r15
864 adds r3=8,r2 // set up second base pointer 868 adds r3=8,r2 // set up second base pointer
865 ;; 869 ;;
866 ssm psr.ic 870 ssm psr.ic
867 ;; 871 ;;
868 srlz.i // guarantee that interruption collection is on 872 srlz.i // guarantee that interruption collection is on
869 ;; 873 ;;
870 //(p15) ssm psr.i // restore psr.i 874 //(p15) ssm psr.i // restore psr.i
871 addl r14=@gprel(ia64_leave_hypervisor),gp 875 addl r14=@gprel(ia64_leave_hypervisor),gp
872 ;; 876 ;;
873 KVM_SAVE_REST 877 KVM_SAVE_REST
874 mov rp=r14 878 mov rp=r14
875 ;; 879 ;;
876 adds out4=16,r12 880 adds out4=16,r12
877 br.call.sptk.many b6=reflect_interruption 881 br.call.sptk.many b6=reflect_interruption
878END(kvm_dispatch_reflection) 882END(kvm_dispatch_reflection)
879 883
880ENTRY(kvm_dispatch_virtualization_fault) 884ENTRY(kvm_dispatch_virtualization_fault)
881 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 885 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
882 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 886 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
883 ;; 887 ;;
884 st8 [r16] = r24 888 st8 [r16] = r24
885 st8 [r17] = r25 889 st8 [r17] = r25
886 ;; 890 ;;
887 KVM_SAVE_MIN_WITH_COVER_R19 891 KVM_SAVE_MIN_WITH_COVER_R19
888 ;; 892 ;;
889 alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) 893 alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
890 mov out0=r13 //vcpu 894 mov out0=r13 //vcpu
891 adds r3=8,r2 // set up second base pointer 895 adds r3=8,r2 // set up second base pointer
892 ;; 896 ;;
893 ssm psr.ic 897 ssm psr.ic
894 ;; 898 ;;
895 srlz.i // guarantee that interruption collection is on 899 srlz.i // guarantee that interruption collection is on
896 ;; 900 ;;
897 //(p15) ssm psr.i // restore psr.i 901 //(p15) ssm psr.i // restore psr.i
898 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 902 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
899 ;; 903 ;;
900 KVM_SAVE_REST 904 KVM_SAVE_REST
901 KVM_SAVE_EXTRA 905 KVM_SAVE_EXTRA
902 mov rp=r14 906 mov rp=r14
903 ;; 907 ;;
904 adds out1=16,sp //regs 908 adds out1=16,sp //regs
905 br.call.sptk.many b6=kvm_emulate 909 br.call.sptk.many b6=kvm_emulate
906END(kvm_dispatch_virtualization_fault) 910END(kvm_dispatch_virtualization_fault)
907 911
908 912
909ENTRY(kvm_dispatch_interrupt) 913ENTRY(kvm_dispatch_interrupt)
910 KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 914 KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
911 ;; 915 ;;
912 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group 916 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
913 //mov out0=cr.ivr // pass cr.ivr as first arg 917 adds r3=8,r2 // set up second base pointer for SAVE_REST
914 adds r3=8,r2 // set up second base pointer for SAVE_REST 918 ;;
915 ;; 919 ssm psr.ic
916 ssm psr.ic 920 ;;
917 ;; 921 srlz.i
918 srlz.i 922 ;;
919 ;; 923 //(p15) ssm psr.i
920 //(p15) ssm psr.i 924 addl r14=@gprel(ia64_leave_hypervisor),gp
921 addl r14=@gprel(ia64_leave_hypervisor),gp 925 ;;
922 ;; 926 KVM_SAVE_REST
923 KVM_SAVE_REST 927 mov rp=r14
924 mov rp=r14 928 ;;
925 ;; 929 mov out0=r13 // pass pointer to pt_regs as second arg
926 mov out0=r13 // pass pointer to pt_regs as second arg 930 br.call.sptk.many b6=kvm_ia64_handle_irq
927 br.call.sptk.many b6=kvm_ia64_handle_irq
928END(kvm_dispatch_interrupt) 931END(kvm_dispatch_interrupt)
929 932
930
931
932
933GLOBAL_ENTRY(ia64_leave_nested) 933GLOBAL_ENTRY(ia64_leave_nested)
934 rsm psr.i 934 rsm psr.i
935 ;; 935 ;;
@@ -1008,7 +1008,7 @@ GLOBAL_ENTRY(ia64_leave_nested)
1008 ;; 1008 ;;
1009 ldf.fill f11=[r2] 1009 ldf.fill f11=[r2]
1010// mov r18=r13 1010// mov r18=r13
1011// mov r21=r13 1011// mov r21=r13
1012 adds r16=PT(CR_IPSR)+16,r12 1012 adds r16=PT(CR_IPSR)+16,r12
1013 adds r17=PT(CR_IIP)+16,r12 1013 adds r17=PT(CR_IIP)+16,r12
1014 ;; 1014 ;;
@@ -1058,138 +1058,135 @@ GLOBAL_ENTRY(ia64_leave_nested)
1058 rfi 1058 rfi
1059END(ia64_leave_nested) 1059END(ia64_leave_nested)
1060 1060
1061
1062
1063GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) 1061GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
1064 /* 1062/*
1065 * work.need_resched etc. mustn't get changed 1063 * work.need_resched etc. mustn't get changed
1066 *by this CPU before it returns to 1064 *by this CPU before it returns to
1067 ;; 1065 * user- or fsys-mode, hence we disable interrupts early on:
1068 * user- or fsys-mode, hence we disable interrupts early on: 1066 */
1069 */ 1067 adds r2 = PT(R4)+16,r12
1070 adds r2 = PT(R4)+16,r12 1068 adds r3 = PT(R5)+16,r12
1071 adds r3 = PT(R5)+16,r12 1069 adds r8 = PT(EML_UNAT)+16,r12
1072 adds r8 = PT(EML_UNAT)+16,r12 1070 ;;
1073 ;; 1071 ld8 r8 = [r8]
1074 ld8 r8 = [r8] 1072 ;;
1075 ;; 1073 mov ar.unat=r8
1076 mov ar.unat=r8 1074 ;;
1077 ;; 1075 ld8.fill r4=[r2],16 //load r4
1078 ld8.fill r4=[r2],16 //load r4 1076 ld8.fill r5=[r3],16 //load r5
1079 ld8.fill r5=[r3],16 //load r5 1077 ;;
1080 ;; 1078 ld8.fill r6=[r2] //load r6
1081 ld8.fill r6=[r2] //load r6 1079 ld8.fill r7=[r3] //load r7
1082 ld8.fill r7=[r3] //load r7 1080 ;;
1083 ;;
1084END(ia64_leave_hypervisor_prepare) 1081END(ia64_leave_hypervisor_prepare)
1085//fall through 1082//fall through
1086GLOBAL_ENTRY(ia64_leave_hypervisor) 1083GLOBAL_ENTRY(ia64_leave_hypervisor)
1087 rsm psr.i 1084 rsm psr.i
1088 ;; 1085 ;;
1089 br.call.sptk.many b0=leave_hypervisor_tail 1086 br.call.sptk.many b0=leave_hypervisor_tail
1090 ;; 1087 ;;
1091 adds r20=PT(PR)+16,r12 1088 adds r20=PT(PR)+16,r12
1092 adds r8=PT(EML_UNAT)+16,r12 1089 adds r8=PT(EML_UNAT)+16,r12
1093 ;; 1090 ;;
1094 ld8 r8=[r8] 1091 ld8 r8=[r8]
1095 ;; 1092 ;;
1096 mov ar.unat=r8 1093 mov ar.unat=r8
1097 ;; 1094 ;;
1098 lfetch [r20],PT(CR_IPSR)-PT(PR) 1095 lfetch [r20],PT(CR_IPSR)-PT(PR)
1099 adds r2 = PT(B6)+16,r12 1096 adds r2 = PT(B6)+16,r12
1100 adds r3 = PT(B7)+16,r12 1097 adds r3 = PT(B7)+16,r12
1101 ;; 1098 ;;
1102 lfetch [r20] 1099 lfetch [r20]
1103 ;; 1100 ;;
1104 ld8 r24=[r2],16 /* B6 */ 1101 ld8 r24=[r2],16 /* B6 */
1105 ld8 r25=[r3],16 /* B7 */ 1102 ld8 r25=[r3],16 /* B7 */
1106 ;; 1103 ;;
1107 ld8 r26=[r2],16 /* ar_csd */ 1104 ld8 r26=[r2],16 /* ar_csd */
1108 ld8 r27=[r3],16 /* ar_ssd */ 1105 ld8 r27=[r3],16 /* ar_ssd */
1109 mov b6 = r24 1106 mov b6 = r24
1110 ;; 1107 ;;
1111 ld8.fill r8=[r2],16 1108 ld8.fill r8=[r2],16
1112 ld8.fill r9=[r3],16 1109 ld8.fill r9=[r3],16
1113 mov b7 = r25 1110 mov b7 = r25
1114 ;; 1111 ;;
1115 mov ar.csd = r26 1112 mov ar.csd = r26
1116 mov ar.ssd = r27 1113 mov ar.ssd = r27
1117 ;; 1114 ;;
1118 ld8.fill r10=[r2],PT(R15)-PT(R10) 1115 ld8.fill r10=[r2],PT(R15)-PT(R10)
1119 ld8.fill r11=[r3],PT(R14)-PT(R11) 1116 ld8.fill r11=[r3],PT(R14)-PT(R11)
1120 ;; 1117 ;;
1121 ld8.fill r15=[r2],PT(R16)-PT(R15) 1118 ld8.fill r15=[r2],PT(R16)-PT(R15)
1122 ld8.fill r14=[r3],PT(R17)-PT(R14) 1119 ld8.fill r14=[r3],PT(R17)-PT(R14)
1123 ;; 1120 ;;
1124 ld8.fill r16=[r2],16 1121 ld8.fill r16=[r2],16
1125 ld8.fill r17=[r3],16 1122 ld8.fill r17=[r3],16
1126 ;; 1123 ;;
1127 ld8.fill r18=[r2],16 1124 ld8.fill r18=[r2],16
1128 ld8.fill r19=[r3],16 1125 ld8.fill r19=[r3],16
1129 ;; 1126 ;;
1130 ld8.fill r20=[r2],16 1127 ld8.fill r20=[r2],16
1131 ld8.fill r21=[r3],16 1128 ld8.fill r21=[r3],16
1132 ;; 1129 ;;
1133 ld8.fill r22=[r2],16 1130 ld8.fill r22=[r2],16
1134 ld8.fill r23=[r3],16 1131 ld8.fill r23=[r3],16
1135 ;; 1132 ;;
1136 ld8.fill r24=[r2],16 1133 ld8.fill r24=[r2],16
1137 ld8.fill r25=[r3],16 1134 ld8.fill r25=[r3],16
1138 ;; 1135 ;;
1139 ld8.fill r26=[r2],16 1136 ld8.fill r26=[r2],16
1140 ld8.fill r27=[r3],16 1137 ld8.fill r27=[r3],16
1141 ;; 1138 ;;
1142 ld8.fill r28=[r2],16 1139 ld8.fill r28=[r2],16
1143 ld8.fill r29=[r3],16 1140 ld8.fill r29=[r3],16
1144 ;; 1141 ;;
1145 ld8.fill r30=[r2],PT(F6)-PT(R30) 1142 ld8.fill r30=[r2],PT(F6)-PT(R30)
1146 ld8.fill r31=[r3],PT(F7)-PT(R31) 1143 ld8.fill r31=[r3],PT(F7)-PT(R31)
1147 ;; 1144 ;;
1148 rsm psr.i | psr.ic 1145 rsm psr.i | psr.ic
1149 // initiate turning off of interrupt and interruption collection 1146 // initiate turning off of interrupt and interruption collection
1150 invala // invalidate ALAT 1147 invala // invalidate ALAT
1151 ;; 1148 ;;
1152 srlz.i // ensure interruption collection is off 1149 srlz.i // ensure interruption collection is off
1153 ;; 1150 ;;
1154 bsw.0 1151 bsw.0
1155 ;; 1152 ;;
1156 adds r16 = PT(CR_IPSR)+16,r12 1153 adds r16 = PT(CR_IPSR)+16,r12
1157 adds r17 = PT(CR_IIP)+16,r12 1154 adds r17 = PT(CR_IIP)+16,r12
1158 mov r21=r13 // get current 1155 mov r21=r13 // get current
1159 ;; 1156 ;;
1160 ld8 r31=[r16],16 // load cr.ipsr 1157 ld8 r31=[r16],16 // load cr.ipsr
1161 ld8 r30=[r17],16 // load cr.iip 1158 ld8 r30=[r17],16 // load cr.iip
1162 ;; 1159 ;;
1163 ld8 r29=[r16],16 // load cr.ifs 1160 ld8 r29=[r16],16 // load cr.ifs
1164 ld8 r28=[r17],16 // load ar.unat 1161 ld8 r28=[r17],16 // load ar.unat
1165 ;; 1162 ;;
1166 ld8 r27=[r16],16 // load ar.pfs 1163 ld8 r27=[r16],16 // load ar.pfs
1167 ld8 r26=[r17],16 // load ar.rsc 1164 ld8 r26=[r17],16 // load ar.rsc
1168 ;; 1165 ;;
1169 ld8 r25=[r16],16 // load ar.rnat 1166 ld8 r25=[r16],16 // load ar.rnat
1170 ld8 r24=[r17],16 // load ar.bspstore 1167 ld8 r24=[r17],16 // load ar.bspstore
1171 ;; 1168 ;;
1172 ld8 r23=[r16],16 // load predicates 1169 ld8 r23=[r16],16 // load predicates
1173 ld8 r22=[r17],16 // load b0 1170 ld8 r22=[r17],16 // load b0
1174 ;; 1171 ;;
1175 ld8 r20=[r16],16 // load ar.rsc value for "loadrs" 1172 ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
1176 ld8.fill r1=[r17],16 //load r1 1173 ld8.fill r1=[r17],16 //load r1
1177 ;; 1174 ;;
1178 ld8.fill r12=[r16],16 //load r12 1175 ld8.fill r12=[r16],16 //load r12
1179 ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 1176 ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
1180 ;; 1177 ;;
1181 ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr 1178 ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
1182 ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 1179 ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
1183 ;; 1180 ;;
1184 ld8.fill r3=[r16] //load r3 1181 ld8.fill r3=[r16] //load r3
1185 ld8 r18=[r17] //load ar_ccv 1182 ld8 r18=[r17] //load ar_ccv
1186 ;; 1183 ;;
1187 mov ar.fpsr=r19 1184 mov ar.fpsr=r19
1188 mov ar.ccv=r18 1185 mov ar.ccv=r18
1189 shr.u r18=r20,16 1186 shr.u r18=r20,16
1190 ;; 1187 ;;
1191kvm_rbs_switch: 1188kvm_rbs_switch:
1192 mov r19=96 1189 mov r19=96
1193 1190
1194kvm_dont_preserve_current_frame: 1191kvm_dont_preserve_current_frame:
1195/* 1192/*
@@ -1201,76 +1198,76 @@ kvm_dont_preserve_current_frame:
1201# define pReturn p7 1198# define pReturn p7
1202# define Nregs 14 1199# define Nregs 14
1203 1200
1204 alloc loc0=ar.pfs,2,Nregs-2,2,0 1201 alloc loc0=ar.pfs,2,Nregs-2,2,0
1205 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) 1202 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
1206 sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize 1203 sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
1207 ;; 1204 ;;
1208 mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" 1205 mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
1209 shladd in0=loc1,3,r19 1206 shladd in0=loc1,3,r19
1210 mov in1=0 1207 mov in1=0
1211 ;; 1208 ;;
1212 TEXT_ALIGN(32) 1209 TEXT_ALIGN(32)
1213kvm_rse_clear_invalid: 1210kvm_rse_clear_invalid:
1214 alloc loc0=ar.pfs,2,Nregs-2,2,0 1211 alloc loc0=ar.pfs,2,Nregs-2,2,0
1215 cmp.lt pRecurse,p0=Nregs*8,in0 1212 cmp.lt pRecurse,p0=Nregs*8,in0
1216 // if more than Nregs regs left to clear, (re)curse 1213 // if more than Nregs regs left to clear, (re)curse
1217 add out0=-Nregs*8,in0 1214 add out0=-Nregs*8,in0
1218 add out1=1,in1 // increment recursion count 1215 add out1=1,in1 // increment recursion count
1219 mov loc1=0 1216 mov loc1=0
1220 mov loc2=0 1217 mov loc2=0
1221 ;; 1218 ;;
1222 mov loc3=0 1219 mov loc3=0
1223 mov loc4=0 1220 mov loc4=0
1224 mov loc5=0 1221 mov loc5=0
1225 mov loc6=0 1222 mov loc6=0
1226 mov loc7=0 1223 mov loc7=0
1227(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid 1224(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
1228 ;; 1225 ;;
1229 mov loc8=0 1226 mov loc8=0
1230 mov loc9=0 1227 mov loc9=0
1231 cmp.ne pReturn,p0=r0,in1 1228 cmp.ne pReturn,p0=r0,in1
1232 // if recursion count != 0, we need to do a br.ret 1229 // if recursion count != 0, we need to do a br.ret
1233 mov loc10=0 1230 mov loc10=0
1234 mov loc11=0 1231 mov loc11=0
1235(pReturn) br.ret.dptk.many b0 1232(pReturn) br.ret.dptk.many b0
1236 1233
1237# undef pRecurse 1234# undef pRecurse
1238# undef pReturn 1235# undef pReturn
1239 1236
1240// loadrs has already been shifted 1237// loadrs has already been shifted
1241 alloc r16=ar.pfs,0,0,0,0 // drop current register frame 1238 alloc r16=ar.pfs,0,0,0,0 // drop current register frame
1242 ;; 1239 ;;
1243 loadrs 1240 loadrs
1244 ;; 1241 ;;
1245 mov ar.bspstore=r24 1242 mov ar.bspstore=r24
1246 ;; 1243 ;;
1247 mov ar.unat=r28 1244 mov ar.unat=r28
1248 mov ar.rnat=r25 1245 mov ar.rnat=r25
1249 mov ar.rsc=r26 1246 mov ar.rsc=r26
1250 ;; 1247 ;;
1251 mov cr.ipsr=r31 1248 mov cr.ipsr=r31
1252 mov cr.iip=r30 1249 mov cr.iip=r30
1253 mov cr.ifs=r29 1250 mov cr.ifs=r29
1254 mov ar.pfs=r27 1251 mov ar.pfs=r27
1255 adds r18=VMM_VPD_BASE_OFFSET,r21 1252 adds r18=VMM_VPD_BASE_OFFSET,r21
1256 ;; 1253 ;;
1257 ld8 r18=[r18] //vpd 1254 ld8 r18=[r18] //vpd
1258 adds r17=VMM_VCPU_ISR_OFFSET,r21 1255 adds r17=VMM_VCPU_ISR_OFFSET,r21
1259 ;; 1256 ;;
1260 ld8 r17=[r17] 1257 ld8 r17=[r17]
1261 adds r19=VMM_VPD_VPSR_OFFSET,r18 1258 adds r19=VMM_VPD_VPSR_OFFSET,r18
1262 ;; 1259 ;;
1263 ld8 r19=[r19] //vpsr 1260 ld8 r19=[r19] //vpsr
1264 mov r25=r18 1261 mov r25=r18
1265 adds r16= VMM_VCPU_GP_OFFSET,r21 1262 adds r16= VMM_VCPU_GP_OFFSET,r21
1266 ;; 1263 ;;
1267 ld8 r16= [r16] // Put gp in r24 1264 ld8 r16= [r16] // Put gp in r24
1268 movl r24=@gprel(ia64_vmm_entry) // calculate return address 1265 movl r24=@gprel(ia64_vmm_entry) // calculate return address
1269 ;; 1266 ;;
1270 add r24=r24,r16 1267 add r24=r24,r16
1271 ;; 1268 ;;
1272 br.sptk.many kvm_vps_sync_write // call the service 1269 br.sptk.many kvm_vps_sync_write // call the service
1273 ;; 1270 ;;
1274END(ia64_leave_hypervisor) 1271END(ia64_leave_hypervisor)
1275// fall through 1272// fall through
1276GLOBAL_ENTRY(ia64_vmm_entry) 1273GLOBAL_ENTRY(ia64_vmm_entry)
@@ -1283,16 +1280,14 @@ GLOBAL_ENTRY(ia64_vmm_entry)
1283 * r22:b0 1280 * r22:b0
1284 * r23:predicate 1281 * r23:predicate
1285 */ 1282 */
1286 mov r24=r22 1283 mov r24=r22
1287 mov r25=r18 1284 mov r25=r18
1288 tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic 1285 tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
1289 (p1) br.cond.sptk.few kvm_vps_resume_normal 1286(p1) br.cond.sptk.few kvm_vps_resume_normal
1290 (p2) br.cond.sptk.many kvm_vps_resume_handler 1287(p2) br.cond.sptk.many kvm_vps_resume_handler
1291 ;; 1288 ;;
1292END(ia64_vmm_entry) 1289END(ia64_vmm_entry)
1293 1290
1294
1295
1296/* 1291/*
1297 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, 1292 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
1298 * u64 arg3, u64 arg4, u64 arg5, 1293 * u64 arg3, u64 arg4, u64 arg5,
@@ -1310,88 +1305,88 @@ psrsave = loc2
1310entry = loc3 1305entry = loc3
1311hostret = r24 1306hostret = r24
1312 1307
1313 alloc pfssave=ar.pfs,4,4,0,0 1308 alloc pfssave=ar.pfs,4,4,0,0
1314 mov rpsave=rp 1309 mov rpsave=rp
1315 adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 1310 adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
1316 ;; 1311 ;;
1317 ld8 entry=[entry] 1312 ld8 entry=[entry]
13181: mov hostret=ip 13131: mov hostret=ip
1319 mov r25=in1 // copy arguments 1314 mov r25=in1 // copy arguments
1320 mov r26=in2 1315 mov r26=in2
1321 mov r27=in3 1316 mov r27=in3
1322 mov psrsave=psr 1317 mov psrsave=psr
1323 ;; 1318 ;;
1324 tbit.nz p6,p0=psrsave,14 // IA64_PSR_I 1319 tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
1325 tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC 1320 tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
1326 ;; 1321 ;;
1327 add hostret=2f-1b,hostret // calculate return address 1322 add hostret=2f-1b,hostret // calculate return address
1328 add entry=entry,in0 1323 add entry=entry,in0
1329 ;; 1324 ;;
1330 rsm psr.i | psr.ic 1325 rsm psr.i | psr.ic
1331 ;; 1326 ;;
1332 srlz.i 1327 srlz.i
1333 mov b6=entry 1328 mov b6=entry
1334 br.cond.sptk b6 // call the service 1329 br.cond.sptk b6 // call the service
13352: 13302:
1336 // Architectural sequence for enabling interrupts if necessary 1331// Architectural sequence for enabling interrupts if necessary
1337(p7) ssm psr.ic 1332(p7) ssm psr.ic
1338 ;; 1333 ;;
1339(p7) srlz.i 1334(p7) srlz.i
1340 ;; 1335 ;;
1341//(p6) ssm psr.i 1336//(p6) ssm psr.i
1342 ;; 1337 ;;
1343 mov rp=rpsave 1338 mov rp=rpsave
1344 mov ar.pfs=pfssave 1339 mov ar.pfs=pfssave
1345 mov r8=r31 1340 mov r8=r31
1346 ;; 1341 ;;
1347 srlz.d 1342 srlz.d
1348 br.ret.sptk rp 1343 br.ret.sptk rp
1349 1344
1350END(ia64_call_vsa) 1345END(ia64_call_vsa)
1351 1346
1352#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) 1347#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100)
1353 1348
1354GLOBAL_ENTRY(vmm_reset_entry) 1349GLOBAL_ENTRY(vmm_reset_entry)
1355 //set up ipsr, iip, vpd.vpsr, dcr 1350 //set up ipsr, iip, vpd.vpsr, dcr
1356 // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 1351 // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
1357 // For DCR: all bits 0 1352 // For DCR: all bits 0
1358 bsw.0 1353 bsw.0
1359 ;; 1354 ;;
1360 mov r21 =r13 1355 mov r21 =r13
1361 adds r14=-VMM_PT_REGS_SIZE, r12 1356 adds r14=-VMM_PT_REGS_SIZE, r12
1362 ;; 1357 ;;
1363 movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 1358 movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
1364 movl r10=0x8000000000000000 1359 movl r10=0x8000000000000000
1365 adds r16=PT(CR_IIP), r14 1360 adds r16=PT(CR_IIP), r14
1366 adds r20=PT(R1), r14 1361 adds r20=PT(R1), r14
1367 ;; 1362 ;;
1368 rsm psr.ic | psr.i 1363 rsm psr.ic | psr.i
1369 ;; 1364 ;;
1370 srlz.i 1365 srlz.i
1371 ;; 1366 ;;
1372 mov ar.rsc = 0 1367 mov ar.rsc = 0
1373 ;; 1368 ;;
1374 flushrs 1369 flushrs
1375 ;; 1370 ;;
1376 mov ar.bspstore = 0 1371 mov ar.bspstore = 0
1377 // clear BSPSTORE 1372 // clear BSPSTORE
1378 ;; 1373 ;;
1379 mov cr.ipsr=r6 1374 mov cr.ipsr=r6
1380 mov cr.ifs=r10 1375 mov cr.ifs=r10
1381 ld8 r4 = [r16] // Set init iip for first run. 1376 ld8 r4 = [r16] // Set init iip for first run.
1382 ld8 r1 = [r20] 1377 ld8 r1 = [r20]
1383 ;; 1378 ;;
1384 mov cr.iip=r4 1379 mov cr.iip=r4
1385 adds r16=VMM_VPD_BASE_OFFSET,r13 1380 adds r16=VMM_VPD_BASE_OFFSET,r13
1386 ;; 1381 ;;
1387 ld8 r18=[r16] 1382 ld8 r18=[r16]
1388 ;; 1383 ;;
1389 adds r19=VMM_VPD_VPSR_OFFSET,r18 1384 adds r19=VMM_VPD_VPSR_OFFSET,r18
1390 ;; 1385 ;;
1391 ld8 r19=[r19] 1386 ld8 r19=[r19]
1392 mov r17=r0 1387 mov r17=r0
1393 mov r22=r0 1388 mov r22=r0
1394 mov r23=r0 1389 mov r23=r0
1395 br.cond.sptk ia64_vmm_entry 1390 br.cond.sptk ia64_vmm_entry
1396 br.ret.sptk b0 1391 br.ret.sptk b0
1397END(vmm_reset_entry) 1392END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index e22b93361e08..6b6307a3bd55 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
183 u64 i, dirty_pages = 1; 183 u64 i, dirty_pages = 1;
184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; 184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
185 spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); 185 spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
186 void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) 186 void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
187 + KVM_MEM_DIRTY_LOG_OFS; 187
188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; 188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
189 189
190 vmm_spin_lock(lock); 190 vmm_spin_lock(lock);
diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c
index 0d658dbb6766..016885c6f260 100644
--- a/arch/m32r/kernel/init_task.c
+++ b/arch/m32r/kernel/init_task.c
@@ -11,7 +11,6 @@
11#include <asm/uaccess.h> 11#include <asm/uaccess.h>
12#include <asm/pgtable.h> 12#include <asm/pgtable.h>
13 13
14static struct fs_struct init_fs = INIT_FS;
15static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 14static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
16static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
17struct mm_struct init_mm = INIT_MM(init_mm); 16struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 836fb66f080d..c825bde17cb3 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -280,7 +280,6 @@ config M68060
280 280
281config MMU_MOTOROLA 281config MMU_MOTOROLA
282 bool 282 bool
283 depends on MMU && !MMU_SUN3
284 283
285config MMU_SUN3 284config MMU_SUN3
286 bool 285 bool
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 3042c2bc8c58..632ce016014d 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,7 +40,6 @@
40 * alignment requirements and potentially different initial 40 * alignment requirements and potentially different initial
41 * setup. 41 * setup.
42 */ 42 */
43static struct fs_struct init_fs = INIT_FS;
44static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 43static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
45static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 44static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
46struct mm_struct init_mm = INIT_MM(init_mm); 45struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c
index 344c01aede08..fe282de1d596 100644
--- a/arch/m68knommu/kernel/init_task.c
+++ b/arch/m68knommu/kernel/init_task.c
@@ -12,7 +12,6 @@
12#include <asm/uaccess.h> 12#include <asm/uaccess.h>
13#include <asm/pgtable.h> 13#include <asm/pgtable.h>
14 14
15static struct fs_struct init_fs = INIT_FS;
16static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 15static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
17static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 16static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
18struct mm_struct init_mm = INIT_MM(init_mm); 17struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c
index d72487ad7c15..149cd914526e 100644
--- a/arch/mips/kernel/init_task.c
+++ b/arch/mips/kernel/init_task.c
@@ -9,7 +9,6 @@
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11 11
12static struct fs_struct init_fs = INIT_FS;
13static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 12static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 13static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
15struct mm_struct init_mm = INIT_MM(init_mm); 14struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c
index af16f6e5c918..5ac3566f8c98 100644
--- a/arch/mn10300/kernel/init_task.c
+++ b/arch/mn10300/kernel/init_task.c
@@ -18,7 +18,6 @@
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19#include <asm/pgtable.h> 19#include <asm/pgtable.h>
20 20
21static struct fs_struct init_fs = INIT_FS;
22static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 21static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
23static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 22static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
24struct mm_struct init_mm = INIT_MM(init_mm); 23struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c
index f5941c086551..1e25a45d64c1 100644
--- a/arch/parisc/kernel/init_task.c
+++ b/arch/parisc/kernel/init_task.c
@@ -34,7 +34,6 @@
34#include <asm/pgtable.h> 34#include <asm/pgtable.h>
35#include <asm/pgalloc.h> 35#include <asm/pgalloc.h>
36 36
37static struct fs_struct init_fs = INIT_FS;
38static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 37static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
39static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 38static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
40struct mm_struct init_mm = INIT_MM(init_mm); 39struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
new file mode 100644
index 000000000000..9b198d1b3b2b
--- /dev/null
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -0,0 +1,80 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __ASM_PPC_DISASSEMBLE_H__
21#define __ASM_PPC_DISASSEMBLE_H__
22
23#include <linux/types.h>
24
25static inline unsigned int get_op(u32 inst)
26{
27 return inst >> 26;
28}
29
30static inline unsigned int get_xop(u32 inst)
31{
32 return (inst >> 1) & 0x3ff;
33}
34
35static inline unsigned int get_sprn(u32 inst)
36{
37 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
38}
39
40static inline unsigned int get_dcrn(u32 inst)
41{
42 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
43}
44
45static inline unsigned int get_rt(u32 inst)
46{
47 return (inst >> 21) & 0x1f;
48}
49
50static inline unsigned int get_rs(u32 inst)
51{
52 return (inst >> 21) & 0x1f;
53}
54
55static inline unsigned int get_ra(u32 inst)
56{
57 return (inst >> 16) & 0x1f;
58}
59
60static inline unsigned int get_rb(u32 inst)
61{
62 return (inst >> 11) & 0x1f;
63}
64
65static inline unsigned int get_rc(u32 inst)
66{
67 return inst & 0x1;
68}
69
70static inline unsigned int get_ws(u32 inst)
71{
72 return (inst >> 11) & 0x1f;
73}
74
75static inline unsigned int get_d(u32 inst)
76{
77 return inst & 0xffff;
78}
79
80#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
new file mode 100644
index 000000000000..f49031b632ca
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -0,0 +1,61 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __ASM_44X_H__
21#define __ASM_44X_H__
22
23#include <linux/kvm_host.h>
24
25#define PPC44x_TLB_SIZE 64
26
27/* If the guest is expecting it, this can be as large as we like; we'd just
28 * need to find some way of advertising it. */
29#define KVM44x_GUEST_TLB_SIZE 64
30
31struct kvmppc_44x_shadow_ref {
32 struct page *page;
33 u16 gtlb_index;
34 u8 writeable;
35 u8 tid;
36};
37
38struct kvmppc_vcpu_44x {
39 /* Unmodified copy of the guest's TLB. */
40 struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
41
42 /* References to guest pages in the hardware TLB. */
43 struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
44
45 /* State of the shadow TLB at guest context switch time. */
46 struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
47 u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
48
49 struct kvm_vcpu vcpu;
50};
51
52static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
53{
54 return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
55}
56
57void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
58void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
59void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
60
61#endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 34b52b7180cd..c1e436fe7738 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,27 +64,58 @@ struct kvm_vcpu_stat {
64 u32 halt_wakeup; 64 u32 halt_wakeup;
65}; 65};
66 66
67struct tlbe { 67struct kvmppc_44x_tlbe {
68 u32 tid; /* Only the low 8 bits are used. */ 68 u32 tid; /* Only the low 8 bits are used. */
69 u32 word0; 69 u32 word0;
70 u32 word1; 70 u32 word1;
71 u32 word2; 71 u32 word2;
72}; 72};
73 73
74struct kvm_arch { 74enum kvm_exit_types {
75 MMIO_EXITS,
76 DCR_EXITS,
77 SIGNAL_EXITS,
78 ITLB_REAL_MISS_EXITS,
79 ITLB_VIRT_MISS_EXITS,
80 DTLB_REAL_MISS_EXITS,
81 DTLB_VIRT_MISS_EXITS,
82 SYSCALL_EXITS,
83 ISI_EXITS,
84 DSI_EXITS,
85 EMULATED_INST_EXITS,
86 EMULATED_MTMSRWE_EXITS,
87 EMULATED_WRTEE_EXITS,
88 EMULATED_MTSPR_EXITS,
89 EMULATED_MFSPR_EXITS,
90 EMULATED_MTMSR_EXITS,
91 EMULATED_MFMSR_EXITS,
92 EMULATED_TLBSX_EXITS,
93 EMULATED_TLBWE_EXITS,
94 EMULATED_RFI_EXITS,
95 DEC_EXITS,
96 EXT_INTR_EXITS,
97 HALT_WAKEUP,
98 USR_PR_INST,
99 FP_UNAVAIL,
100 DEBUG_EXITS,
101 TIMEINGUEST,
102 __NUMBER_OF_KVM_EXIT_TYPES
75}; 103};
76 104
77struct kvm_vcpu_arch { 105/* allow access to big endian 32bit upper/lower parts and 64bit var */
78 /* Unmodified copy of the guest's TLB. */ 106struct kvmppc_exit_timing {
79 struct tlbe guest_tlb[PPC44x_TLB_SIZE]; 107 union {
80 /* TLB that's actually used when the guest is running. */ 108 u64 tv64;
81 struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; 109 struct {
82 /* Pages which are referenced in the shadow TLB. */ 110 u32 tbu, tbl;
83 struct page *shadow_pages[PPC44x_TLB_SIZE]; 111 } tv32;
112 };
113};
84 114
85 /* Track which TLB entries we've modified in the current exit. */ 115struct kvm_arch {
86 u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; 116};
87 117
118struct kvm_vcpu_arch {
88 u32 host_stack; 119 u32 host_stack;
89 u32 host_pid; 120 u32 host_pid;
90 u32 host_dbcr0; 121 u32 host_dbcr0;
@@ -94,32 +125,32 @@ struct kvm_vcpu_arch {
94 u32 host_msr; 125 u32 host_msr;
95 126
96 u64 fpr[32]; 127 u64 fpr[32];
97 u32 gpr[32]; 128 ulong gpr[32];
98 129
99 u32 pc; 130 ulong pc;
100 u32 cr; 131 u32 cr;
101 u32 ctr; 132 ulong ctr;
102 u32 lr; 133 ulong lr;
103 u32 xer; 134 ulong xer;
104 135
105 u32 msr; 136 ulong msr;
106 u32 mmucr; 137 u32 mmucr;
107 u32 sprg0; 138 ulong sprg0;
108 u32 sprg1; 139 ulong sprg1;
109 u32 sprg2; 140 ulong sprg2;
110 u32 sprg3; 141 ulong sprg3;
111 u32 sprg4; 142 ulong sprg4;
112 u32 sprg5; 143 ulong sprg5;
113 u32 sprg6; 144 ulong sprg6;
114 u32 sprg7; 145 ulong sprg7;
115 u32 srr0; 146 ulong srr0;
116 u32 srr1; 147 ulong srr1;
117 u32 csrr0; 148 ulong csrr0;
118 u32 csrr1; 149 ulong csrr1;
119 u32 dsrr0; 150 ulong dsrr0;
120 u32 dsrr1; 151 ulong dsrr1;
121 u32 dear; 152 ulong dear;
122 u32 esr; 153 ulong esr;
123 u32 dec; 154 u32 dec;
124 u32 decar; 155 u32 decar;
125 u32 tbl; 156 u32 tbl;
@@ -127,7 +158,7 @@ struct kvm_vcpu_arch {
127 u32 tcr; 158 u32 tcr;
128 u32 tsr; 159 u32 tsr;
129 u32 ivor[16]; 160 u32 ivor[16];
130 u32 ivpr; 161 ulong ivpr;
131 u32 pir; 162 u32 pir;
132 163
133 u32 shadow_pid; 164 u32 shadow_pid;
@@ -140,9 +171,22 @@ struct kvm_vcpu_arch {
140 u32 dbcr0; 171 u32 dbcr0;
141 u32 dbcr1; 172 u32 dbcr1;
142 173
174#ifdef CONFIG_KVM_EXIT_TIMING
175 struct kvmppc_exit_timing timing_exit;
176 struct kvmppc_exit_timing timing_last_enter;
177 u32 last_exit_type;
178 u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
179 u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
180 u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
181 u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
182 u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
183 u64 timing_last_exit;
184 struct dentry *debugfs_exit_timing;
185#endif
186
143 u32 last_inst; 187 u32 last_inst;
144 u32 fault_dear; 188 ulong fault_dear;
145 u32 fault_esr; 189 ulong fault_esr;
146 gpa_t paddr_accessed; 190 gpa_t paddr_accessed;
147 191
148 u8 io_gpr; /* GPR used as IO source/target */ 192 u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bb62ad876de3..36d2a50a8487 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,11 +29,6 @@
29#include <linux/kvm_types.h> 29#include <linux/kvm_types.h>
30#include <linux/kvm_host.h> 30#include <linux/kvm_host.h>
31 31
32struct kvm_tlb {
33 struct tlbe guest_tlb[PPC44x_TLB_SIZE];
34 struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
35};
36
37enum emulation_result { 32enum emulation_result {
38 EMULATE_DONE, /* no further processing */ 33 EMULATE_DONE, /* no further processing */
39 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ 34 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
@@ -41,9 +36,6 @@ enum emulation_result {
41 EMULATE_FAIL, /* can't emulate this instruction */ 36 EMULATE_FAIL, /* can't emulate this instruction */
42}; 37};
43 38
44extern const unsigned char exception_priority[];
45extern const unsigned char priority_exception[];
46
47extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 39extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
48extern char kvmppc_handlers_start[]; 40extern char kvmppc_handlers_start[];
49extern unsigned long kvmppc_handler_len; 41extern unsigned long kvmppc_handler_len;
@@ -58,51 +50,44 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
58extern int kvmppc_emulate_instruction(struct kvm_run *run, 50extern int kvmppc_emulate_instruction(struct kvm_run *run,
59 struct kvm_vcpu *vcpu); 51 struct kvm_vcpu *vcpu);
60extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); 52extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
53extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
61 54
62extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, 55extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
63 u64 asid, u32 flags); 56 u64 asid, u32 flags, u32 max_bytes,
64extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, 57 unsigned int gtlb_idx);
65 gva_t eend, u32 asid);
66extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); 58extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
67extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); 59extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
68 60
69/* XXX Book E specific */ 61/* Core-specific hooks */
70extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); 62
71 63extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
72extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); 64 unsigned int id);
73 65extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
74static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) 66extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
75{ 67extern int kvmppc_core_check_processor_compat(void);
76 unsigned int priority = exception_priority[exception]; 68extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
77 set_bit(priority, &vcpu->arch.pending_exceptions); 69 struct kvm_translation *tr);
78} 70
79 71extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
80static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) 72extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
81{ 73
82 unsigned int priority = exception_priority[exception]; 74extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
83 clear_bit(priority, &vcpu->arch.pending_exceptions); 75extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
84} 76
85 77extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
86/* Helper function for "full" MSR writes. No need to call this if only EE is 78extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
87 * changing. */ 79extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
88static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) 80extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
89{ 81extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
90 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) 82 struct kvm_interrupt *irq);
91 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); 83
92 84extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
93 vcpu->arch.msr = new_msr; 85 unsigned int op, int *advance);
94 86extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
95 if (vcpu->arch.msr & MSR_WE) 87extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
96 kvm_vcpu_block(vcpu); 88
97} 89extern int kvmppc_booke_init(void);
98 90extern void kvmppc_booke_exit(void);
99static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
100{
101 if (vcpu->arch.pid != new_pid) {
102 vcpu->arch.pid = new_pid;
103 vcpu->arch.swap_pid = 1;
104 }
105}
106 91
107extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); 92extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
108 93
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 8a97cfb08b7e..27cc6fdcd3b7 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -56,6 +56,7 @@
56#ifndef __ASSEMBLY__ 56#ifndef __ASSEMBLY__
57 57
58extern unsigned int tlb_44x_hwater; 58extern unsigned int tlb_44x_hwater;
59extern unsigned int tlb_44x_index;
59 60
60typedef struct { 61typedef struct {
61 unsigned int id; 62 unsigned int id;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 661d07d2146b..9937fe44555f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,9 +23,6 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/suspend.h> 24#include <linux/suspend.h>
25#include <linux/hrtimer.h> 25#include <linux/hrtimer.h>
26#ifdef CONFIG_KVM
27#include <linux/kvm_host.h>
28#endif
29#ifdef CONFIG_PPC64 26#ifdef CONFIG_PPC64
30#include <linux/time.h> 27#include <linux/time.h>
31#include <linux/hardirq.h> 28#include <linux/hardirq.h>
@@ -51,6 +48,9 @@
51#ifdef CONFIG_PPC_ISERIES 48#ifdef CONFIG_PPC_ISERIES
52#include <asm/iseries/alpaca.h> 49#include <asm/iseries/alpaca.h>
53#endif 50#endif
51#ifdef CONFIG_KVM
52#include <asm/kvm_44x.h>
53#endif
54 54
55#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 55#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
56#include "head_booke.h" 56#include "head_booke.h"
@@ -357,12 +357,10 @@ int main(void)
357 DEFINE(PTE_SIZE, sizeof(pte_t)); 357 DEFINE(PTE_SIZE, sizeof(pte_t));
358 358
359#ifdef CONFIG_KVM 359#ifdef CONFIG_KVM
360 DEFINE(TLBE_BYTES, sizeof(struct tlbe)); 360 DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
361 361
362 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 362 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
363 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 363 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
364 DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
365 DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
366 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 364 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
367 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 365 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
368 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 366 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
@@ -385,5 +383,16 @@ int main(void)
385 DEFINE(PTE_T_LOG2, PTE_T_LOG2); 383 DEFINE(PTE_T_LOG2, PTE_T_LOG2);
386#endif 384#endif
387 385
386#ifdef CONFIG_KVM_EXIT_TIMING
387 DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
388 arch.timing_exit.tv32.tbu));
389 DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
390 arch.timing_exit.tv32.tbl));
391 DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
392 arch.timing_last_enter.tv32.tbu));
393 DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
394 arch.timing_last_enter.tv32.tbl));
395#endif
396
388 return 0; 397 return 0;
389} 398}
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
index 4c85b8d56478..688b329800bd 100644
--- a/arch/powerpc/kernel/init_task.c
+++ b/arch/powerpc/kernel/init_task.c
@@ -7,7 +7,6 @@
7#include <linux/mqueue.h> 7#include <linux/mqueue.h>
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static struct fs_struct init_fs = INIT_FS;
11static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 10static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
12static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 11static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
13struct mm_struct init_mm = INIT_MM(init_mm); 12struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51b201ddf9a1..fb7049c054c0 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -33,6 +33,7 @@
33#include <linux/mqueue.h> 33#include <linux/mqueue.h>
34#include <linux/hardirq.h> 34#include <linux/hardirq.h>
35#include <linux/utsname.h> 35#include <linux/utsname.h>
36#include <linux/kernel_stat.h>
36 37
37#include <asm/pgtable.h> 38#include <asm/pgtable.h>
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 99f1ddd68582..c9564031a2a9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,8 +256,10 @@ void account_system_vtime(struct task_struct *tsk)
256 delta += sys_time; 256 delta += sys_time;
257 get_paca()->system_time = 0; 257 get_paca()->system_time = 0;
258 } 258 }
259 account_system_time(tsk, 0, delta); 259 if (in_irq() || idle_task(smp_processor_id()) != tsk)
260 account_system_time_scaled(tsk, deltascaled); 260 account_system_time(tsk, 0, delta, deltascaled);
261 else
262 account_idle_time(delta);
261 per_cpu(cputime_last_delta, smp_processor_id()) = delta; 263 per_cpu(cputime_last_delta, smp_processor_id()) = delta;
262 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; 264 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
263 local_irq_restore(flags); 265 local_irq_restore(flags);
@@ -275,10 +277,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
275 277
276 utime = get_paca()->user_time; 278 utime = get_paca()->user_time;
277 get_paca()->user_time = 0; 279 get_paca()->user_time = 0;
278 account_user_time(tsk, utime);
279
280 utimescaled = cputime_to_scaled(utime); 280 utimescaled = cputime_to_scaled(utime);
281 account_user_time_scaled(tsk, utimescaled); 281 account_user_time(tsk, utime, utimescaled);
282} 282}
283 283
284/* 284/*
@@ -338,8 +338,12 @@ void calculate_steal_time(void)
338 tb = mftb(); 338 tb = mftb();
339 purr = mfspr(SPRN_PURR); 339 purr = mfspr(SPRN_PURR);
340 stolen = (tb - pme->tb) - (purr - pme->purr); 340 stolen = (tb - pme->tb) - (purr - pme->purr);
341 if (stolen > 0) 341 if (stolen > 0) {
342 account_steal_time(current, stolen); 342 if (idle_task(smp_processor_id()) != current)
343 account_steal_time(stolen);
344 else
345 account_idle_time(stolen);
346 }
343 pme->tb = tb; 347 pme->tb = tb;
344 pme->purr = purr; 348 pme->purr = purr;
345} 349}
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
new file mode 100644
index 000000000000..a66bec57265a
--- /dev/null
+++ b/arch/powerpc/kvm/44x.c
@@ -0,0 +1,228 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/kvm_host.h>
21#include <linux/err.h>
22
23#include <asm/reg.h>
24#include <asm/cputable.h>
25#include <asm/tlbflush.h>
26#include <asm/kvm_44x.h>
27#include <asm/kvm_ppc.h>
28
29#include "44x_tlb.h"
30
31/* Note: clearing MSR[DE] just means that the debug interrupt will not be
32 * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
33 * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
34 * will be delivered as an "imprecise debug event" (which is indicated by
35 * DBSR[IDE].
36 */
37static void kvm44x_disable_debug_interrupts(void)
38{
39 mtmsr(mfmsr() & ~MSR_DE);
40}
41
42void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
43{
44 kvm44x_disable_debug_interrupts();
45
46 mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
47 mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
48 mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
49 mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
50 mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
51 mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
52 mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
53 mtmsr(vcpu->arch.host_msr);
54}
55
56void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
57{
58 struct kvm_guest_debug *dbg = &vcpu->guest_debug;
59 u32 dbcr0 = 0;
60
61 vcpu->arch.host_msr = mfmsr();
62 kvm44x_disable_debug_interrupts();
63
64 /* Save host debug register state. */
65 vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
66 vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
67 vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
68 vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
69 vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
70 vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
71 vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
72
73 /* set registers up for guest */
74
75 if (dbg->bp[0]) {
76 mtspr(SPRN_IAC1, dbg->bp[0]);
77 dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
78 }
79 if (dbg->bp[1]) {
80 mtspr(SPRN_IAC2, dbg->bp[1]);
81 dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
82 }
83 if (dbg->bp[2]) {
84 mtspr(SPRN_IAC3, dbg->bp[2]);
85 dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
86 }
87 if (dbg->bp[3]) {
88 mtspr(SPRN_IAC4, dbg->bp[3]);
89 dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
90 }
91
92 mtspr(SPRN_DBCR0, dbcr0);
93 mtspr(SPRN_DBCR1, 0);
94 mtspr(SPRN_DBCR2, 0);
95}
96
97void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
98{
99 kvmppc_44x_tlb_load(vcpu);
100}
101
102void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
103{
104 kvmppc_44x_tlb_put(vcpu);
105}
106
107int kvmppc_core_check_processor_compat(void)
108{
109 int r;
110
111 if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
112 r = 0;
113 else
114 r = -ENOTSUPP;
115
116 return r;
117}
118
119int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
120{
121 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
122 struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
123 int i;
124
125 tlbe->tid = 0;
126 tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
127 tlbe->word1 = 0;
128 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
129
130 tlbe++;
131 tlbe->tid = 0;
132 tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
133 tlbe->word1 = 0xef600000;
134 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
135 | PPC44x_TLB_I | PPC44x_TLB_G;
136
137 /* Since the guest can directly access the timebase, it must know the
138 * real timebase frequency. Accordingly, it must see the state of
139 * CCR1[TCS]. */
140 vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
141
142 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
143 vcpu_44x->shadow_refs[i].gtlb_index = -1;
144
145 return 0;
146}
147
148/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
149int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
150 struct kvm_translation *tr)
151{
152 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
153 struct kvmppc_44x_tlbe *gtlbe;
154 int index;
155 gva_t eaddr;
156 u8 pid;
157 u8 as;
158
159 eaddr = tr->linear_address;
160 pid = (tr->linear_address >> 32) & 0xff;
161 as = (tr->linear_address >> 40) & 0x1;
162
163 index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
164 if (index == -1) {
165 tr->valid = 0;
166 return 0;
167 }
168
169 gtlbe = &vcpu_44x->guest_tlb[index];
170
171 tr->physical_address = tlb_xlate(gtlbe, eaddr);
172 /* XXX what does "writeable" and "usermode" even mean? */
173 tr->valid = 1;
174
175 return 0;
176}
177
178struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
179{
180 struct kvmppc_vcpu_44x *vcpu_44x;
181 struct kvm_vcpu *vcpu;
182 int err;
183
184 vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
185 if (!vcpu_44x) {
186 err = -ENOMEM;
187 goto out;
188 }
189
190 vcpu = &vcpu_44x->vcpu;
191 err = kvm_vcpu_init(vcpu, kvm, id);
192 if (err)
193 goto free_vcpu;
194
195 return vcpu;
196
197free_vcpu:
198 kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
199out:
200 return ERR_PTR(err);
201}
202
203void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
204{
205 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
206
207 kvm_vcpu_uninit(vcpu);
208 kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
209}
210
211static int kvmppc_44x_init(void)
212{
213 int r;
214
215 r = kvmppc_booke_init();
216 if (r)
217 return r;
218
219 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
220}
221
222static void kvmppc_44x_exit(void)
223{
224 kvmppc_booke_exit();
225}
226
227module_init(kvmppc_44x_init);
228module_exit(kvmppc_44x_exit);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
new file mode 100644
index 000000000000..82489a743a6f
--- /dev/null
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -0,0 +1,371 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <asm/kvm_ppc.h>
21#include <asm/dcr.h>
22#include <asm/dcr-regs.h>
23#include <asm/disassemble.h>
24#include <asm/kvm_44x.h>
25#include "timing.h"
26
27#include "booke.h"
28#include "44x_tlb.h"
29
30#define OP_RFI 19
31
32#define XOP_RFI 50
33#define XOP_MFMSR 83
34#define XOP_WRTEE 131
35#define XOP_MTMSR 146
36#define XOP_WRTEEI 163
37#define XOP_MFDCR 323
38#define XOP_MTDCR 451
39#define XOP_TLBSX 914
40#define XOP_ICCCI 966
41#define XOP_TLBWE 978
42
43static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
44{
45 vcpu->arch.pc = vcpu->arch.srr0;
46 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
47}
48
49int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
50 unsigned int inst, int *advance)
51{
52 int emulated = EMULATE_DONE;
53 int dcrn;
54 int ra;
55 int rb;
56 int rc;
57 int rs;
58 int rt;
59 int ws;
60
61 switch (get_op(inst)) {
62 case OP_RFI:
63 switch (get_xop(inst)) {
64 case XOP_RFI:
65 kvmppc_emul_rfi(vcpu);
66 kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
67 *advance = 0;
68 break;
69
70 default:
71 emulated = EMULATE_FAIL;
72 break;
73 }
74 break;
75
76 case 31:
77 switch (get_xop(inst)) {
78
79 case XOP_MFMSR:
80 rt = get_rt(inst);
81 vcpu->arch.gpr[rt] = vcpu->arch.msr;
82 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
83 break;
84
85 case XOP_MTMSR:
86 rs = get_rs(inst);
87 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
88 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
89 break;
90
91 case XOP_WRTEE:
92 rs = get_rs(inst);
93 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
94 | (vcpu->arch.gpr[rs] & MSR_EE);
95 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
96 break;
97
98 case XOP_WRTEEI:
99 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
100 | (inst & MSR_EE);
101 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
102 break;
103
104 case XOP_MFDCR:
105 dcrn = get_dcrn(inst);
106 rt = get_rt(inst);
107
108 /* The guest may access CPR0 registers to determine the timebase
109 * frequency, and it must know the real host frequency because it
110 * can directly access the timebase registers.
111 *
112 * It would be possible to emulate those accesses in userspace,
113 * but userspace can really only figure out the end frequency.
114 * We could decompose that into the factors that compute it, but
115 * that's tricky math, and it's easier to just report the real
116 * CPR0 values.
117 */
118 switch (dcrn) {
119 case DCRN_CPR0_CONFIG_ADDR:
120 vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
121 break;
122 case DCRN_CPR0_CONFIG_DATA:
123 local_irq_disable();
124 mtdcr(DCRN_CPR0_CONFIG_ADDR,
125 vcpu->arch.cpr0_cfgaddr);
126 vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
127 local_irq_enable();
128 break;
129 default:
130 run->dcr.dcrn = dcrn;
131 run->dcr.data = 0;
132 run->dcr.is_write = 0;
133 vcpu->arch.io_gpr = rt;
134 vcpu->arch.dcr_needed = 1;
135 kvmppc_account_exit(vcpu, DCR_EXITS);
136 emulated = EMULATE_DO_DCR;
137 }
138
139 break;
140
141 case XOP_MTDCR:
142 dcrn = get_dcrn(inst);
143 rs = get_rs(inst);
144
145 /* emulate some access in kernel */
146 switch (dcrn) {
147 case DCRN_CPR0_CONFIG_ADDR:
148 vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
149 break;
150 default:
151 run->dcr.dcrn = dcrn;
152 run->dcr.data = vcpu->arch.gpr[rs];
153 run->dcr.is_write = 1;
154 vcpu->arch.dcr_needed = 1;
155 kvmppc_account_exit(vcpu, DCR_EXITS);
156 emulated = EMULATE_DO_DCR;
157 }
158
159 break;
160
161 case XOP_TLBWE:
162 ra = get_ra(inst);
163 rs = get_rs(inst);
164 ws = get_ws(inst);
165 emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
166 break;
167
168 case XOP_TLBSX:
169 rt = get_rt(inst);
170 ra = get_ra(inst);
171 rb = get_rb(inst);
172 rc = get_rc(inst);
173 emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
174 break;
175
176 case XOP_ICCCI:
177 break;
178
179 default:
180 emulated = EMULATE_FAIL;
181 }
182
183 break;
184
185 default:
186 emulated = EMULATE_FAIL;
187 }
188
189 return emulated;
190}
191
192int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
193{
194 switch (sprn) {
195 case SPRN_MMUCR:
196 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
197 case SPRN_PID:
198 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
199 case SPRN_CCR0:
200 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
201 case SPRN_CCR1:
202 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
203 case SPRN_DEAR:
204 vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
205 case SPRN_ESR:
206 vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
207 case SPRN_DBCR0:
208 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
209 case SPRN_DBCR1:
210 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
211 case SPRN_TSR:
212 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
213 case SPRN_TCR:
214 vcpu->arch.tcr = vcpu->arch.gpr[rs];
215 kvmppc_emulate_dec(vcpu);
216 break;
217
218 /* Note: SPRG4-7 are user-readable. These values are
219 * loaded into the real SPRGs when resuming the
220 * guest. */
221 case SPRN_SPRG4:
222 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
223 case SPRN_SPRG5:
224 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
225 case SPRN_SPRG6:
226 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
227 case SPRN_SPRG7:
228 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
229
230 case SPRN_IVPR:
231 vcpu->arch.ivpr = vcpu->arch.gpr[rs];
232 break;
233 case SPRN_IVOR0:
234 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
235 break;
236 case SPRN_IVOR1:
237 vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
238 break;
239 case SPRN_IVOR2:
240 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
241 break;
242 case SPRN_IVOR3:
243 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
244 break;
245 case SPRN_IVOR4:
246 vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
247 break;
248 case SPRN_IVOR5:
249 vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
250 break;
251 case SPRN_IVOR6:
252 vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
253 break;
254 case SPRN_IVOR7:
255 vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
256 break;
257 case SPRN_IVOR8:
258 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
259 break;
260 case SPRN_IVOR9:
261 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
262 break;
263 case SPRN_IVOR10:
264 vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
265 break;
266 case SPRN_IVOR11:
267 vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
268 break;
269 case SPRN_IVOR12:
270 vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
271 break;
272 case SPRN_IVOR13:
273 vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
274 break;
275 case SPRN_IVOR14:
276 vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
277 break;
278 case SPRN_IVOR15:
279 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
280 break;
281
282 default:
283 return EMULATE_FAIL;
284 }
285
286 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
287 return EMULATE_DONE;
288}
289
290int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
291{
292 switch (sprn) {
293 /* 440 */
294 case SPRN_MMUCR:
295 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
296 case SPRN_CCR0:
297 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
298 case SPRN_CCR1:
299 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
300
301 /* Book E */
302 case SPRN_PID:
303 vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
304 case SPRN_IVPR:
305 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
306 case SPRN_DEAR:
307 vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
308 case SPRN_ESR:
309 vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
310 case SPRN_DBCR0:
311 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
312 case SPRN_DBCR1:
313 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
314
315 case SPRN_IVOR0:
316 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
317 break;
318 case SPRN_IVOR1:
319 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
320 break;
321 case SPRN_IVOR2:
322 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
323 break;
324 case SPRN_IVOR3:
325 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
326 break;
327 case SPRN_IVOR4:
328 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
329 break;
330 case SPRN_IVOR5:
331 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
332 break;
333 case SPRN_IVOR6:
334 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
335 break;
336 case SPRN_IVOR7:
337 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
338 break;
339 case SPRN_IVOR8:
340 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
341 break;
342 case SPRN_IVOR9:
343 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
344 break;
345 case SPRN_IVOR10:
346 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
347 break;
348 case SPRN_IVOR11:
349 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
350 break;
351 case SPRN_IVOR12:
352 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
353 break;
354 case SPRN_IVOR13:
355 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
356 break;
357 case SPRN_IVOR14:
358 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
359 break;
360 case SPRN_IVOR15:
361 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
362 break;
363
364 default:
365 return EMULATE_FAIL;
366 }
367
368 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
369 return EMULATE_DONE;
370}
371
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ad72c6f9811f..9a34b8edb9e2 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -22,20 +22,103 @@
22#include <linux/kvm.h> 22#include <linux/kvm.h>
23#include <linux/kvm_host.h> 23#include <linux/kvm_host.h>
24#include <linux/highmem.h> 24#include <linux/highmem.h>
25
26#include <asm/tlbflush.h>
25#include <asm/mmu-44x.h> 27#include <asm/mmu-44x.h>
26#include <asm/kvm_ppc.h> 28#include <asm/kvm_ppc.h>
29#include <asm/kvm_44x.h>
30#include "timing.h"
27 31
28#include "44x_tlb.h" 32#include "44x_tlb.h"
29 33
34#ifndef PPC44x_TLBE_SIZE
35#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
36#endif
37
38#define PAGE_SIZE_4K (1<<12)
39#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
40
41#define PPC44x_TLB_UATTR_MASK \
42 (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
30#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) 43#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
31#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) 44#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
32 45
33static unsigned int kvmppc_tlb_44x_pos; 46#ifdef DEBUG
47void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
48{
49 struct kvmppc_44x_tlbe *tlbe;
50 int i;
51
52 printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
53 printk("| %2s | %3s | %8s | %8s | %8s |\n",
54 "nr", "tid", "word0", "word1", "word2");
55
56 for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
57 tlbe = &vcpu_44x->guest_tlb[i];
58 if (tlbe->word0 & PPC44x_TLB_VALID)
59 printk(" G%2d | %02X | %08X | %08X | %08X |\n",
60 i, tlbe->tid, tlbe->word0, tlbe->word1,
61 tlbe->word2);
62 }
63}
64#endif
65
66static inline void kvmppc_44x_tlbie(unsigned int index)
67{
68 /* 0 <= index < 64, so the V bit is clear and we can use the index as
69 * word0. */
70 asm volatile(
71 "tlbwe %[index], %[index], 0\n"
72 :
73 : [index] "r"(index)
74 );
75}
76
77static inline void kvmppc_44x_tlbre(unsigned int index,
78 struct kvmppc_44x_tlbe *tlbe)
79{
80 asm volatile(
81 "tlbre %[word0], %[index], 0\n"
82 "mfspr %[tid], %[sprn_mmucr]\n"
83 "andi. %[tid], %[tid], 0xff\n"
84 "tlbre %[word1], %[index], 1\n"
85 "tlbre %[word2], %[index], 2\n"
86 : [word0] "=r"(tlbe->word0),
87 [word1] "=r"(tlbe->word1),
88 [word2] "=r"(tlbe->word2),
89 [tid] "=r"(tlbe->tid)
90 : [index] "r"(index),
91 [sprn_mmucr] "i"(SPRN_MMUCR)
92 : "cc"
93 );
94}
95
96static inline void kvmppc_44x_tlbwe(unsigned int index,
97 struct kvmppc_44x_tlbe *stlbe)
98{
99 unsigned long tmp;
100
101 asm volatile(
102 "mfspr %[tmp], %[sprn_mmucr]\n"
103 "rlwimi %[tmp], %[tid], 0, 0xff\n"
104 "mtspr %[sprn_mmucr], %[tmp]\n"
105 "tlbwe %[word0], %[index], 0\n"
106 "tlbwe %[word1], %[index], 1\n"
107 "tlbwe %[word2], %[index], 2\n"
108 : [tmp] "=&r"(tmp)
109 : [word0] "r"(stlbe->word0),
110 [word1] "r"(stlbe->word1),
111 [word2] "r"(stlbe->word2),
112 [tid] "r"(stlbe->tid),
113 [index] "r"(index),
114 [sprn_mmucr] "i"(SPRN_MMUCR)
115 );
116}
34 117
35static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) 118static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
36{ 119{
37 /* Mask off reserved bits. */ 120 /* We only care about the guest's permission and user bits. */
38 attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; 121 attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
39 122
40 if (!usermode) { 123 if (!usermode) {
41 /* Guest is in supervisor mode, so we need to translate guest 124 /* Guest is in supervisor mode, so we need to translate guest
@@ -47,18 +130,60 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
47 /* Make sure host can always access this memory. */ 130 /* Make sure host can always access this memory. */
48 attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; 131 attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
49 132
133 /* WIMGE = 0b00100 */
134 attrib |= PPC44x_TLB_M;
135
50 return attrib; 136 return attrib;
51} 137}
52 138
139/* Load shadow TLB back into hardware. */
140void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
141{
142 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
143 int i;
144
145 for (i = 0; i <= tlb_44x_hwater; i++) {
146 struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
147
148 if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
149 kvmppc_44x_tlbwe(i, stlbe);
150 }
151}
152
153static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
154 unsigned int i)
155{
156 vcpu_44x->shadow_tlb_mod[i] = 1;
157}
158
159/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
160void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
161{
162 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
163 int i;
164
165 for (i = 0; i <= tlb_44x_hwater; i++) {
166 struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
167
168 if (vcpu_44x->shadow_tlb_mod[i])
169 kvmppc_44x_tlbre(i, stlbe);
170
171 if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
172 kvmppc_44x_tlbie(i);
173 }
174}
175
176
53/* Search the guest TLB for a matching entry. */ 177/* Search the guest TLB for a matching entry. */
54int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, 178int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
55 unsigned int as) 179 unsigned int as)
56{ 180{
181 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
57 int i; 182 int i;
58 183
59 /* XXX Replace loop with fancy data structures. */ 184 /* XXX Replace loop with fancy data structures. */
60 for (i = 0; i < PPC44x_TLB_SIZE; i++) { 185 for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
61 struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; 186 struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
62 unsigned int tid; 187 unsigned int tid;
63 188
64 if (eaddr < get_tlb_eaddr(tlbe)) 189 if (eaddr < get_tlb_eaddr(tlbe))
@@ -83,78 +208,89 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
83 return -1; 208 return -1;
84} 209}
85 210
86struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) 211int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
87{ 212{
88 unsigned int as = !!(vcpu->arch.msr & MSR_IS); 213 unsigned int as = !!(vcpu->arch.msr & MSR_IS);
89 unsigned int index;
90 214
91 index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 215 return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
92 if (index == -1)
93 return NULL;
94 return &vcpu->arch.guest_tlb[index];
95} 216}
96 217
97struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) 218int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
98{ 219{
99 unsigned int as = !!(vcpu->arch.msr & MSR_DS); 220 unsigned int as = !!(vcpu->arch.msr & MSR_DS);
100 unsigned int index;
101 221
102 index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 222 return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
103 if (index == -1)
104 return NULL;
105 return &vcpu->arch.guest_tlb[index];
106} 223}
107 224
108static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) 225static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
226 unsigned int stlb_index)
109{ 227{
110 return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); 228 struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
111}
112 229
113static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, 230 if (!ref->page)
114 unsigned int index) 231 return;
115{
116 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
117 struct page *page = vcpu->arch.shadow_pages[index];
118 232
119 if (get_tlb_v(stlbe)) { 233 /* Discard from the TLB. */
120 if (kvmppc_44x_tlbe_is_writable(stlbe)) 234 /* Note: we could actually invalidate a host mapping, if the host overwrote
121 kvm_release_page_dirty(page); 235 * this TLB entry since we inserted a guest mapping. */
122 else 236 kvmppc_44x_tlbie(stlb_index);
123 kvm_release_page_clean(page); 237
124 } 238 /* Now release the page. */
239 if (ref->writeable)
240 kvm_release_page_dirty(ref->page);
241 else
242 kvm_release_page_clean(ref->page);
243
244 ref->page = NULL;
245
246 /* XXX set tlb_44x_index to stlb_index? */
247
248 KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
125} 249}
126 250
127void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) 251void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
128{ 252{
253 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
129 int i; 254 int i;
130 255
131 for (i = 0; i <= tlb_44x_hwater; i++) 256 for (i = 0; i <= tlb_44x_hwater; i++)
132 kvmppc_44x_shadow_release(vcpu, i); 257 kvmppc_44x_shadow_release(vcpu_44x, i);
133}
134
135void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
136{
137 vcpu->arch.shadow_tlb_mod[i] = 1;
138} 258}
139 259
140/* Caller must ensure that the specified guest TLB entry is safe to insert into 260/**
141 * the shadow TLB. */ 261 * kvmppc_mmu_map -- create a host mapping for guest memory
142void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, 262 *
143 u32 flags) 263 * If the guest wanted a larger page than the host supports, only the first
264 * host page is mapped here and the rest are demand faulted.
265 *
266 * If the guest wanted a smaller page than the host page size, we map only the
267 * guest-size page (i.e. not a full host page mapping).
268 *
269 * Caller must ensure that the specified guest TLB entry is safe to insert into
270 * the shadow TLB.
271 */
272void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
273 u32 flags, u32 max_bytes, unsigned int gtlb_index)
144{ 274{
275 struct kvmppc_44x_tlbe stlbe;
276 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
277 struct kvmppc_44x_shadow_ref *ref;
145 struct page *new_page; 278 struct page *new_page;
146 struct tlbe *stlbe;
147 hpa_t hpaddr; 279 hpa_t hpaddr;
280 gfn_t gfn;
148 unsigned int victim; 281 unsigned int victim;
149 282
150 /* Future optimization: don't overwrite the TLB entry containing the 283 /* Select TLB entry to clobber. Indirectly guard against races with the TLB
151 * current PC (or stack?). */ 284 * miss handler by disabling interrupts. */
152 victim = kvmppc_tlb_44x_pos++; 285 local_irq_disable();
153 if (kvmppc_tlb_44x_pos > tlb_44x_hwater) 286 victim = ++tlb_44x_index;
154 kvmppc_tlb_44x_pos = 0; 287 if (victim > tlb_44x_hwater)
155 stlbe = &vcpu->arch.shadow_tlb[victim]; 288 victim = 0;
289 tlb_44x_index = victim;
290 local_irq_enable();
156 291
157 /* Get reference to new page. */ 292 /* Get reference to new page. */
293 gfn = gpaddr >> PAGE_SHIFT;
158 new_page = gfn_to_page(vcpu->kvm, gfn); 294 new_page = gfn_to_page(vcpu->kvm, gfn);
159 if (is_error_page(new_page)) { 295 if (is_error_page(new_page)) {
160 printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); 296 printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -163,10 +299,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
163 } 299 }
164 hpaddr = page_to_phys(new_page); 300 hpaddr = page_to_phys(new_page);
165 301
166 /* Drop reference to old page. */ 302 /* Invalidate any previous shadow mappings. */
167 kvmppc_44x_shadow_release(vcpu, victim); 303 kvmppc_44x_shadow_release(vcpu_44x, victim);
168
169 vcpu->arch.shadow_pages[victim] = new_page;
170 304
171 /* XXX Make sure (va, size) doesn't overlap any other 305 /* XXX Make sure (va, size) doesn't overlap any other
172 * entries. 440x6 user manual says the result would be 306 * entries. 440x6 user manual says the result would be
@@ -174,78 +308,193 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
174 308
175 /* XXX what about AS? */ 309 /* XXX what about AS? */
176 310
177 stlbe->tid = !(asid & 0xff);
178
179 /* Force TS=1 for all guest mappings. */ 311 /* Force TS=1 for all guest mappings. */
180 /* For now we hardcode 4KB mappings, but it will be important to 312 stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
181 * use host large pages in the future. */ 313
182 stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS 314 if (max_bytes >= PAGE_SIZE) {
183 | PPC44x_TLB_4K; 315 /* Guest mapping is larger than or equal to host page size. We can use
184 stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); 316 * a "native" host mapping. */
185 stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, 317 stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
186 vcpu->arch.msr & MSR_PR); 318 } else {
187 kvmppc_tlbe_set_modified(vcpu, victim); 319 /* Guest mapping is smaller than host page size. We must restrict the
320 * size of the mapping to be at most the smaller of the two, but for
321 * simplicity we fall back to a 4K mapping (this is probably what the
322 * guest is using anyways). */
323 stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
324
325 /* 'hpaddr' is a host page, which is larger than the mapping we're
326 * inserting here. To compensate, we must add the in-page offset to the
327 * sub-page. */
328 hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
329 }
188 330
189 KVMTRACE_5D(STLB_WRITE, vcpu, victim, 331 stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
190 stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, 332 stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
191 handler); 333 vcpu->arch.msr & MSR_PR);
334 stlbe.tid = !(asid & 0xff);
335
336 /* Keep track of the reference so we can properly release it later. */
337 ref = &vcpu_44x->shadow_refs[victim];
338 ref->page = new_page;
339 ref->gtlb_index = gtlb_index;
340 ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
341 ref->tid = stlbe.tid;
342
343 /* Insert shadow mapping into hardware TLB. */
344 kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
345 kvmppc_44x_tlbwe(victim, &stlbe);
346 KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
347 stlbe.word2, handler);
192} 348}
193 349
194void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, 350/* For a particular guest TLB entry, invalidate the corresponding host TLB
195 gva_t eend, u32 asid) 351 * mappings and release the host pages. */
352static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
353 unsigned int gtlb_index)
196{ 354{
197 unsigned int pid = !(asid & 0xff); 355 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
198 int i; 356 int i;
199 357
200 /* XXX Replace loop with fancy data structures. */ 358 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
201 for (i = 0; i <= tlb_44x_hwater; i++) { 359 struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
202 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; 360 if (ref->gtlb_index == gtlb_index)
203 unsigned int tid; 361 kvmppc_44x_shadow_release(vcpu_44x, i);
362 }
363}
204 364
205 if (!get_tlb_v(stlbe)) 365void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
206 continue; 366{
367 vcpu->arch.shadow_pid = !usermode;
368}
207 369
208 if (eend < get_tlb_eaddr(stlbe)) 370void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
209 continue; 371{
372 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
373 int i;
210 374
211 if (eaddr > get_tlb_end(stlbe)) 375 if (unlikely(vcpu->arch.pid == new_pid))
212 continue; 376 return;
213 377
214 tid = get_tlb_tid(stlbe); 378 vcpu->arch.pid = new_pid;
215 if (tid && (tid != pid))
216 continue;
217 379
218 kvmppc_44x_shadow_release(vcpu, i); 380 /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
219 stlbe->word0 = 0; 381 * can't access guest kernel mappings (TID=1). When we switch to a new
220 kvmppc_tlbe_set_modified(vcpu, i); 382 * guest PID, which will also use host PID=0, we must discard the old guest
221 KVMTRACE_5D(STLB_INVAL, vcpu, i, 383 * userspace mappings. */
222 stlbe->tid, stlbe->word0, stlbe->word1, 384 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
223 stlbe->word2, handler); 385 struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
386
387 if (ref->tid == 0)
388 kvmppc_44x_shadow_release(vcpu_44x, i);
224 } 389 }
225} 390}
226 391
227/* Invalidate all mappings on the privilege switch after PID has been changed. 392static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
228 * The guest always runs with PID=1, so we must clear the entire TLB when 393 const struct kvmppc_44x_tlbe *tlbe)
229 * switching address spaces. */
230void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
231{ 394{
232 int i; 395 gpa_t gpa;
233 396
234 if (vcpu->arch.swap_pid) { 397 if (!get_tlb_v(tlbe))
235 /* XXX Replace loop with fancy data structures. */ 398 return 0;
236 for (i = 0; i <= tlb_44x_hwater; i++) { 399
237 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; 400 /* Does it match current guest AS? */
238 401 /* XXX what about IS != DS? */
239 /* Future optimization: clear only userspace mappings. */ 402 if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
240 kvmppc_44x_shadow_release(vcpu, i); 403 return 0;
241 stlbe->word0 = 0; 404
242 kvmppc_tlbe_set_modified(vcpu, i); 405 gpa = get_tlb_raddr(tlbe);
243 KVMTRACE_5D(STLB_INVAL, vcpu, i, 406 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
244 stlbe->tid, stlbe->word0, stlbe->word1, 407 /* Mapping is not for RAM. */
245 stlbe->word2, handler); 408 return 0;
246 } 409
247 vcpu->arch.swap_pid = 0; 410 return 1;
411}
412
413int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
414{
415 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
416 struct kvmppc_44x_tlbe *tlbe;
417 unsigned int gtlb_index;
418
419 gtlb_index = vcpu->arch.gpr[ra];
420 if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
421 printk("%s: index %d\n", __func__, gtlb_index);
422 kvmppc_dump_vcpu(vcpu);
423 return EMULATE_FAIL;
248 } 424 }
249 425
250 vcpu->arch.shadow_pid = !usermode; 426 tlbe = &vcpu_44x->guest_tlb[gtlb_index];
427
428 /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
429 if (tlbe->word0 & PPC44x_TLB_VALID)
430 kvmppc_44x_invalidate(vcpu, gtlb_index);
431
432 switch (ws) {
433 case PPC44x_TLB_PAGEID:
434 tlbe->tid = get_mmucr_stid(vcpu);
435 tlbe->word0 = vcpu->arch.gpr[rs];
436 break;
437
438 case PPC44x_TLB_XLAT:
439 tlbe->word1 = vcpu->arch.gpr[rs];
440 break;
441
442 case PPC44x_TLB_ATTRIB:
443 tlbe->word2 = vcpu->arch.gpr[rs];
444 break;
445
446 default:
447 return EMULATE_FAIL;
448 }
449
450 if (tlbe_is_host_safe(vcpu, tlbe)) {
451 u64 asid;
452 gva_t eaddr;
453 gpa_t gpaddr;
454 u32 flags;
455 u32 bytes;
456
457 eaddr = get_tlb_eaddr(tlbe);
458 gpaddr = get_tlb_raddr(tlbe);
459
460 /* Use the advertised page size to mask effective and real addrs. */
461 bytes = get_tlb_bytes(tlbe);
462 eaddr &= ~(bytes - 1);
463 gpaddr &= ~(bytes - 1);
464
465 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
466 flags = tlbe->word2 & 0xffff;
467
468 kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
469 }
470
471 KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
472 tlbe->word1, tlbe->word2, handler);
473
474 kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
475 return EMULATE_DONE;
476}
477
478int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
479{
480 u32 ea;
481 int gtlb_index;
482 unsigned int as = get_mmucr_sts(vcpu);
483 unsigned int pid = get_mmucr_stid(vcpu);
484
485 ea = vcpu->arch.gpr[rb];
486 if (ra)
487 ea += vcpu->arch.gpr[ra];
488
489 gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
490 if (rc) {
491 if (gtlb_index < 0)
492 vcpu->arch.cr &= ~0x20000000;
493 else
494 vcpu->arch.cr |= 0x20000000;
495 }
496 vcpu->arch.gpr[rt] = gtlb_index;
497
498 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
499 return EMULATE_DONE;
251} 500}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 2ccd46b6f6b7..772191f29e62 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,48 +25,52 @@
25 25
26extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, 26extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
27 unsigned int pid, unsigned int as); 27 unsigned int pid, unsigned int as);
28extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); 28extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
29extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); 29extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
30
31extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
32 u8 rc);
33extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
30 34
31/* TLB helper functions */ 35/* TLB helper functions */
32static inline unsigned int get_tlb_size(const struct tlbe *tlbe) 36static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
33{ 37{
34 return (tlbe->word0 >> 4) & 0xf; 38 return (tlbe->word0 >> 4) & 0xf;
35} 39}
36 40
37static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) 41static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
38{ 42{
39 return tlbe->word0 & 0xfffffc00; 43 return tlbe->word0 & 0xfffffc00;
40} 44}
41 45
42static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) 46static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
43{ 47{
44 unsigned int pgsize = get_tlb_size(tlbe); 48 unsigned int pgsize = get_tlb_size(tlbe);
45 return 1 << 10 << (pgsize << 1); 49 return 1 << 10 << (pgsize << 1);
46} 50}
47 51
48static inline gva_t get_tlb_end(const struct tlbe *tlbe) 52static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
49{ 53{
50 return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; 54 return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
51} 55}
52 56
53static inline u64 get_tlb_raddr(const struct tlbe *tlbe) 57static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
54{ 58{
55 u64 word1 = tlbe->word1; 59 u64 word1 = tlbe->word1;
56 return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); 60 return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
57} 61}
58 62
59static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) 63static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
60{ 64{
61 return tlbe->tid & 0xff; 65 return tlbe->tid & 0xff;
62} 66}
63 67
64static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) 68static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
65{ 69{
66 return (tlbe->word0 >> 8) & 0x1; 70 return (tlbe->word0 >> 8) & 0x1;
67} 71}
68 72
69static inline unsigned int get_tlb_v(const struct tlbe *tlbe) 73static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
70{ 74{
71 return (tlbe->word0 >> 9) & 0x1; 75 return (tlbe->word0 >> 9) & 0x1;
72} 76}
@@ -81,7 +85,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
81 return (vcpu->arch.mmucr >> 16) & 0x1; 85 return (vcpu->arch.mmucr >> 16) & 0x1;
82} 86}
83 87
84static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) 88static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
85{ 89{
86 unsigned int pgmask = get_tlb_bytes(tlbe) - 1; 90 unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
87 91
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 53aaa66b25e5..6dbdc4817d80 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -15,27 +15,33 @@ menuconfig VIRTUALIZATION
15if VIRTUALIZATION 15if VIRTUALIZATION
16 16
17config KVM 17config KVM
18 bool "Kernel-based Virtual Machine (KVM) support" 18 bool
19 depends on 44x && EXPERIMENTAL
20 select PREEMPT_NOTIFIERS 19 select PREEMPT_NOTIFIERS
21 select ANON_INODES 20 select ANON_INODES
22 # We can only run on Book E hosts so far 21
23 select KVM_BOOKE_HOST 22config KVM_440
23 bool "KVM support for PowerPC 440 processors"
24 depends on EXPERIMENTAL && 44x
25 select KVM
24 ---help--- 26 ---help---
25 Support hosting virtualized guest machines. You will also 27 Support running unmodified 440 guest kernels in virtual machines on
26 need to select one or more of the processor modules below. 28 440 host processors.
27 29
28 This module provides access to the hardware capabilities through 30 This module provides access to the hardware capabilities through
29 a character device node named /dev/kvm. 31 a character device node named /dev/kvm.
30 32
31 If unsure, say N. 33 If unsure, say N.
32 34
33config KVM_BOOKE_HOST 35config KVM_EXIT_TIMING
34 bool "KVM host support for Book E PowerPC processors" 36 bool "Detailed exit timing"
35 depends on KVM && 44x 37 depends on KVM
36 ---help--- 38 ---help---
37 Provides host support for KVM on Book E PowerPC processors. Currently 39 Calculate elapsed time for every exit/enter cycle. A per-vcpu
38 this works on 440 processors only. 40 report is available in debugfs kvm/vm#_vcpu#_timing.
41 The overhead is relatively small, however it is not recommended for
42 production environments.
43
44 If unsure, say N.
39 45
40config KVM_TRACE 46config KVM_TRACE
41 bool "KVM trace support" 47 bool "KVM trace support"
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2a5d4397ac4b..df7ba59e6d53 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,10 +8,16 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
8 8
9common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) 9common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
10 10
11kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o 11kvm-objs := $(common-objs-y) powerpc.o emulate.o
12obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
12obj-$(CONFIG_KVM) += kvm.o 13obj-$(CONFIG_KVM) += kvm.o
13 14
14AFLAGS_booke_interrupts.o := -I$(obj) 15AFLAGS_booke_interrupts.o := -I$(obj)
15 16
16kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o 17kvm-440-objs := \
17obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o 18 booke.o \
19 booke_interrupts.o \
20 44x.o \
21 44x_tlb.o \
22 44x_emulate.o
23obj-$(CONFIG_KVM_440) += kvm-440.o
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke.c
index 7b2591e26bae..35485dd6927e 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke.c
@@ -24,21 +24,26 @@
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/vmalloc.h> 25#include <linux/vmalloc.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27
27#include <asm/cputable.h> 28#include <asm/cputable.h>
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29#include <asm/kvm_ppc.h> 30#include <asm/kvm_ppc.h>
31#include "timing.h"
32#include <asm/cacheflush.h>
33#include <asm/kvm_44x.h>
30 34
35#include "booke.h"
31#include "44x_tlb.h" 36#include "44x_tlb.h"
32 37
38unsigned long kvmppc_booke_handlers;
39
33#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 40#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
34#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 41#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
35 42
36struct kvm_stats_debugfs_item debugfs_entries[] = { 43struct kvm_stats_debugfs_item debugfs_entries[] = {
37 { "exits", VCPU_STAT(sum_exits) },
38 { "mmio", VCPU_STAT(mmio_exits) }, 44 { "mmio", VCPU_STAT(mmio_exits) },
39 { "dcr", VCPU_STAT(dcr_exits) }, 45 { "dcr", VCPU_STAT(dcr_exits) },
40 { "sig", VCPU_STAT(signal_exits) }, 46 { "sig", VCPU_STAT(signal_exits) },
41 { "light", VCPU_STAT(light_exits) },
42 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, 47 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
43 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, 48 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
44 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, 49 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
@@ -53,103 +58,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
53 { NULL } 58 { NULL }
54}; 59};
55 60
56static const u32 interrupt_msr_mask[16] = {
57 [BOOKE_INTERRUPT_CRITICAL] = MSR_ME,
58 [BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
59 [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
60 [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
61 [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE,
62 [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE,
63 [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE,
64 [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
65 [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE,
66 [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
67 [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE,
68 [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE,
69 [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME,
70 [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
71 [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
72 [BOOKE_INTERRUPT_DEBUG] = MSR_ME,
73};
74
75const unsigned char exception_priority[] = {
76 [BOOKE_INTERRUPT_DATA_STORAGE] = 0,
77 [BOOKE_INTERRUPT_INST_STORAGE] = 1,
78 [BOOKE_INTERRUPT_ALIGNMENT] = 2,
79 [BOOKE_INTERRUPT_PROGRAM] = 3,
80 [BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
81 [BOOKE_INTERRUPT_SYSCALL] = 5,
82 [BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
83 [BOOKE_INTERRUPT_DTLB_MISS] = 7,
84 [BOOKE_INTERRUPT_ITLB_MISS] = 8,
85 [BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
86 [BOOKE_INTERRUPT_DEBUG] = 10,
87 [BOOKE_INTERRUPT_CRITICAL] = 11,
88 [BOOKE_INTERRUPT_WATCHDOG] = 12,
89 [BOOKE_INTERRUPT_EXTERNAL] = 13,
90 [BOOKE_INTERRUPT_FIT] = 14,
91 [BOOKE_INTERRUPT_DECREMENTER] = 15,
92};
93
94const unsigned char priority_exception[] = {
95 BOOKE_INTERRUPT_DATA_STORAGE,
96 BOOKE_INTERRUPT_INST_STORAGE,
97 BOOKE_INTERRUPT_ALIGNMENT,
98 BOOKE_INTERRUPT_PROGRAM,
99 BOOKE_INTERRUPT_FP_UNAVAIL,
100 BOOKE_INTERRUPT_SYSCALL,
101 BOOKE_INTERRUPT_AP_UNAVAIL,
102 BOOKE_INTERRUPT_DTLB_MISS,
103 BOOKE_INTERRUPT_ITLB_MISS,
104 BOOKE_INTERRUPT_MACHINE_CHECK,
105 BOOKE_INTERRUPT_DEBUG,
106 BOOKE_INTERRUPT_CRITICAL,
107 BOOKE_INTERRUPT_WATCHDOG,
108 BOOKE_INTERRUPT_EXTERNAL,
109 BOOKE_INTERRUPT_FIT,
110 BOOKE_INTERRUPT_DECREMENTER,
111};
112
113
114void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
115{
116 struct tlbe *tlbe;
117 int i;
118
119 printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
120 printk("| %2s | %3s | %8s | %8s | %8s |\n",
121 "nr", "tid", "word0", "word1", "word2");
122
123 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
124 tlbe = &vcpu->arch.guest_tlb[i];
125 if (tlbe->word0 & PPC44x_TLB_VALID)
126 printk(" G%2d | %02X | %08X | %08X | %08X |\n",
127 i, tlbe->tid, tlbe->word0, tlbe->word1,
128 tlbe->word2);
129 }
130
131 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
132 tlbe = &vcpu->arch.shadow_tlb[i];
133 if (tlbe->word0 & PPC44x_TLB_VALID)
134 printk(" S%2d | %02X | %08X | %08X | %08X |\n",
135 i, tlbe->tid, tlbe->word0, tlbe->word1,
136 tlbe->word2);
137 }
138}
139
140/* TODO: use vcpu_printf() */ 61/* TODO: use vcpu_printf() */
141void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) 62void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
142{ 63{
143 int i; 64 int i;
144 65
145 printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); 66 printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
146 printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); 67 printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
147 printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); 68 printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
148 69
149 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); 70 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
150 71
151 for (i = 0; i < 32; i += 4) { 72 for (i = 0; i < 32; i += 4) {
152 printk("gpr%02d: %08x %08x %08x %08x\n", i, 73 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
153 vcpu->arch.gpr[i], 74 vcpu->arch.gpr[i],
154 vcpu->arch.gpr[i+1], 75 vcpu->arch.gpr[i+1],
155 vcpu->arch.gpr[i+2], 76 vcpu->arch.gpr[i+2],
@@ -157,69 +78,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
157 } 78 }
158} 79}
159 80
160/* Check if we are ready to deliver the interrupt */ 81static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
161static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) 82 unsigned int priority)
162{ 83{
163 int r; 84 set_bit(priority, &vcpu->arch.pending_exceptions);
85}
164 86
165 switch (interrupt) { 87void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
166 case BOOKE_INTERRUPT_CRITICAL: 88{
167 r = vcpu->arch.msr & MSR_CE; 89 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
168 break; 90}
169 case BOOKE_INTERRUPT_MACHINE_CHECK: 91
170 r = vcpu->arch.msr & MSR_ME; 92void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
171 break; 93{
172 case BOOKE_INTERRUPT_EXTERNAL: 94 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
173 r = vcpu->arch.msr & MSR_EE; 95}
96
97int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
98{
99 return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
100}
101
102void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
103 struct kvm_interrupt *irq)
104{
105 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
106}
107
108/* Deliver the interrupt of the corresponding priority, if possible. */
109static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
110 unsigned int priority)
111{
112 int allowed = 0;
113 ulong msr_mask;
114
115 switch (priority) {
116 case BOOKE_IRQPRIO_PROGRAM:
117 case BOOKE_IRQPRIO_DTLB_MISS:
118 case BOOKE_IRQPRIO_ITLB_MISS:
119 case BOOKE_IRQPRIO_SYSCALL:
120 case BOOKE_IRQPRIO_DATA_STORAGE:
121 case BOOKE_IRQPRIO_INST_STORAGE:
122 case BOOKE_IRQPRIO_FP_UNAVAIL:
123 case BOOKE_IRQPRIO_AP_UNAVAIL:
124 case BOOKE_IRQPRIO_ALIGNMENT:
125 allowed = 1;
126 msr_mask = MSR_CE|MSR_ME|MSR_DE;
174 break; 127 break;
175 case BOOKE_INTERRUPT_DECREMENTER: 128 case BOOKE_IRQPRIO_CRITICAL:
176 r = vcpu->arch.msr & MSR_EE; 129 case BOOKE_IRQPRIO_WATCHDOG:
130 allowed = vcpu->arch.msr & MSR_CE;
131 msr_mask = MSR_ME;
177 break; 132 break;
178 case BOOKE_INTERRUPT_FIT: 133 case BOOKE_IRQPRIO_MACHINE_CHECK:
179 r = vcpu->arch.msr & MSR_EE; 134 allowed = vcpu->arch.msr & MSR_ME;
135 msr_mask = 0;
180 break; 136 break;
181 case BOOKE_INTERRUPT_WATCHDOG: 137 case BOOKE_IRQPRIO_EXTERNAL:
182 r = vcpu->arch.msr & MSR_CE; 138 case BOOKE_IRQPRIO_DECREMENTER:
139 case BOOKE_IRQPRIO_FIT:
140 allowed = vcpu->arch.msr & MSR_EE;
141 msr_mask = MSR_CE|MSR_ME|MSR_DE;
183 break; 142 break;
184 case BOOKE_INTERRUPT_DEBUG: 143 case BOOKE_IRQPRIO_DEBUG:
185 r = vcpu->arch.msr & MSR_DE; 144 allowed = vcpu->arch.msr & MSR_DE;
145 msr_mask = MSR_ME;
186 break; 146 break;
187 default:
188 r = 1;
189 } 147 }
190 148
191 return r; 149 if (allowed) {
192} 150 vcpu->arch.srr0 = vcpu->arch.pc;
151 vcpu->arch.srr1 = vcpu->arch.msr;
152 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
153 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
193 154
194static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) 155 clear_bit(priority, &vcpu->arch.pending_exceptions);
195{
196 switch (interrupt) {
197 case BOOKE_INTERRUPT_DECREMENTER:
198 vcpu->arch.tsr |= TSR_DIS;
199 break;
200 } 156 }
201 157
202 vcpu->arch.srr0 = vcpu->arch.pc; 158 return allowed;
203 vcpu->arch.srr1 = vcpu->arch.msr;
204 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
205 kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
206} 159}
207 160
208/* Check pending exceptions and deliver one, if possible. */ 161/* Check pending exceptions and deliver one, if possible. */
209void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) 162void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
210{ 163{
211 unsigned long *pending = &vcpu->arch.pending_exceptions; 164 unsigned long *pending = &vcpu->arch.pending_exceptions;
212 unsigned int exception;
213 unsigned int priority; 165 unsigned int priority;
214 166
215 priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); 167 priority = __ffs(*pending);
216 while (priority <= BOOKE_MAX_INTERRUPT) { 168 while (priority <= BOOKE_MAX_INTERRUPT) {
217 exception = priority_exception[priority]; 169 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
218 if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
219 kvmppc_clear_exception(vcpu, exception);
220 kvmppc_deliver_interrupt(vcpu, exception);
221 break; 170 break;
222 }
223 171
224 priority = find_next_bit(pending, 172 priority = find_next_bit(pending,
225 BITS_PER_BYTE * sizeof(*pending), 173 BITS_PER_BYTE * sizeof(*pending),
@@ -238,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
238 enum emulation_result er; 186 enum emulation_result er;
239 int r = RESUME_HOST; 187 int r = RESUME_HOST;
240 188
189 /* update before a new last_exit_type is rewritten */
190 kvmppc_update_timing_stats(vcpu);
191
241 local_irq_enable(); 192 local_irq_enable();
242 193
243 run->exit_reason = KVM_EXIT_UNKNOWN; 194 run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -251,21 +202,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
251 break; 202 break;
252 203
253 case BOOKE_INTERRUPT_EXTERNAL: 204 case BOOKE_INTERRUPT_EXTERNAL:
205 kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
206 if (need_resched())
207 cond_resched();
208 r = RESUME_GUEST;
209 break;
210
254 case BOOKE_INTERRUPT_DECREMENTER: 211 case BOOKE_INTERRUPT_DECREMENTER:
255 /* Since we switched IVPR back to the host's value, the host 212 /* Since we switched IVPR back to the host's value, the host
256 * handled this interrupt the moment we enabled interrupts. 213 * handled this interrupt the moment we enabled interrupts.
257 * Now we just offer it a chance to reschedule the guest. */ 214 * Now we just offer it a chance to reschedule the guest. */
258 215 kvmppc_account_exit(vcpu, DEC_EXITS);
259 /* XXX At this point the TLB still holds our shadow TLB, so if
260 * we do reschedule the host will fault over it. Perhaps we
261 * should politely restore the host's entries to minimize
262 * misses before ceding control. */
263 if (need_resched()) 216 if (need_resched())
264 cond_resched(); 217 cond_resched();
265 if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
266 vcpu->stat.dec_exits++;
267 else
268 vcpu->stat.ext_intr_exits++;
269 r = RESUME_GUEST; 218 r = RESUME_GUEST;
270 break; 219 break;
271 220
@@ -274,17 +223,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
274 /* Program traps generated by user-level software must be handled 223 /* Program traps generated by user-level software must be handled
275 * by the guest kernel. */ 224 * by the guest kernel. */
276 vcpu->arch.esr = vcpu->arch.fault_esr; 225 vcpu->arch.esr = vcpu->arch.fault_esr;
277 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); 226 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
278 r = RESUME_GUEST; 227 r = RESUME_GUEST;
228 kvmppc_account_exit(vcpu, USR_PR_INST);
279 break; 229 break;
280 } 230 }
281 231
282 er = kvmppc_emulate_instruction(run, vcpu); 232 er = kvmppc_emulate_instruction(run, vcpu);
283 switch (er) { 233 switch (er) {
284 case EMULATE_DONE: 234 case EMULATE_DONE:
235 /* don't overwrite subtypes, just account kvm_stats */
236 kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
285 /* Future optimization: only reload non-volatiles if 237 /* Future optimization: only reload non-volatiles if
286 * they were actually modified by emulation. */ 238 * they were actually modified by emulation. */
287 vcpu->stat.emulated_inst_exits++;
288 r = RESUME_GUEST_NV; 239 r = RESUME_GUEST_NV;
289 break; 240 break;
290 case EMULATE_DO_DCR: 241 case EMULATE_DO_DCR:
@@ -293,7 +244,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
293 break; 244 break;
294 case EMULATE_FAIL: 245 case EMULATE_FAIL:
295 /* XXX Deliver Program interrupt to guest. */ 246 /* XXX Deliver Program interrupt to guest. */
296 printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", 247 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
297 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 248 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
298 /* For debugging, encode the failing instruction and 249 /* For debugging, encode the failing instruction and
299 * report it to userspace. */ 250 * report it to userspace. */
@@ -307,48 +258,53 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
307 break; 258 break;
308 259
309 case BOOKE_INTERRUPT_FP_UNAVAIL: 260 case BOOKE_INTERRUPT_FP_UNAVAIL:
310 kvmppc_queue_exception(vcpu, exit_nr); 261 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
262 kvmppc_account_exit(vcpu, FP_UNAVAIL);
311 r = RESUME_GUEST; 263 r = RESUME_GUEST;
312 break; 264 break;
313 265
314 case BOOKE_INTERRUPT_DATA_STORAGE: 266 case BOOKE_INTERRUPT_DATA_STORAGE:
315 vcpu->arch.dear = vcpu->arch.fault_dear; 267 vcpu->arch.dear = vcpu->arch.fault_dear;
316 vcpu->arch.esr = vcpu->arch.fault_esr; 268 vcpu->arch.esr = vcpu->arch.fault_esr;
317 kvmppc_queue_exception(vcpu, exit_nr); 269 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
318 vcpu->stat.dsi_exits++; 270 kvmppc_account_exit(vcpu, DSI_EXITS);
319 r = RESUME_GUEST; 271 r = RESUME_GUEST;
320 break; 272 break;
321 273
322 case BOOKE_INTERRUPT_INST_STORAGE: 274 case BOOKE_INTERRUPT_INST_STORAGE:
323 vcpu->arch.esr = vcpu->arch.fault_esr; 275 vcpu->arch.esr = vcpu->arch.fault_esr;
324 kvmppc_queue_exception(vcpu, exit_nr); 276 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
325 vcpu->stat.isi_exits++; 277 kvmppc_account_exit(vcpu, ISI_EXITS);
326 r = RESUME_GUEST; 278 r = RESUME_GUEST;
327 break; 279 break;
328 280
329 case BOOKE_INTERRUPT_SYSCALL: 281 case BOOKE_INTERRUPT_SYSCALL:
330 kvmppc_queue_exception(vcpu, exit_nr); 282 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
331 vcpu->stat.syscall_exits++; 283 kvmppc_account_exit(vcpu, SYSCALL_EXITS);
332 r = RESUME_GUEST; 284 r = RESUME_GUEST;
333 break; 285 break;
334 286
287 /* XXX move to a 440-specific file. */
335 case BOOKE_INTERRUPT_DTLB_MISS: { 288 case BOOKE_INTERRUPT_DTLB_MISS: {
336 struct tlbe *gtlbe; 289 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
290 struct kvmppc_44x_tlbe *gtlbe;
337 unsigned long eaddr = vcpu->arch.fault_dear; 291 unsigned long eaddr = vcpu->arch.fault_dear;
292 int gtlb_index;
338 gfn_t gfn; 293 gfn_t gfn;
339 294
340 /* Check the guest TLB. */ 295 /* Check the guest TLB. */
341 gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); 296 gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
342 if (!gtlbe) { 297 if (gtlb_index < 0) {
343 /* The guest didn't have a mapping for it. */ 298 /* The guest didn't have a mapping for it. */
344 kvmppc_queue_exception(vcpu, exit_nr); 299 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
345 vcpu->arch.dear = vcpu->arch.fault_dear; 300 vcpu->arch.dear = vcpu->arch.fault_dear;
346 vcpu->arch.esr = vcpu->arch.fault_esr; 301 vcpu->arch.esr = vcpu->arch.fault_esr;
347 vcpu->stat.dtlb_real_miss_exits++; 302 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
348 r = RESUME_GUEST; 303 r = RESUME_GUEST;
349 break; 304 break;
350 } 305 }
351 306
307 gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
352 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); 308 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
353 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; 309 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
354 310
@@ -359,38 +315,45 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
359 * b) the guest used a large mapping which we're faking 315 * b) the guest used a large mapping which we're faking
360 * Either way, we need to satisfy the fault without 316 * Either way, we need to satisfy the fault without
361 * invoking the guest. */ 317 * invoking the guest. */
362 kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, 318 kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
363 gtlbe->word2); 319 gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
364 vcpu->stat.dtlb_virt_miss_exits++; 320 kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
365 r = RESUME_GUEST; 321 r = RESUME_GUEST;
366 } else { 322 } else {
367 /* Guest has mapped and accessed a page which is not 323 /* Guest has mapped and accessed a page which is not
368 * actually RAM. */ 324 * actually RAM. */
369 r = kvmppc_emulate_mmio(run, vcpu); 325 r = kvmppc_emulate_mmio(run, vcpu);
326 kvmppc_account_exit(vcpu, MMIO_EXITS);
370 } 327 }
371 328
372 break; 329 break;
373 } 330 }
374 331
332 /* XXX move to a 440-specific file. */
375 case BOOKE_INTERRUPT_ITLB_MISS: { 333 case BOOKE_INTERRUPT_ITLB_MISS: {
376 struct tlbe *gtlbe; 334 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
335 struct kvmppc_44x_tlbe *gtlbe;
377 unsigned long eaddr = vcpu->arch.pc; 336 unsigned long eaddr = vcpu->arch.pc;
337 gpa_t gpaddr;
378 gfn_t gfn; 338 gfn_t gfn;
339 int gtlb_index;
379 340
380 r = RESUME_GUEST; 341 r = RESUME_GUEST;
381 342
382 /* Check the guest TLB. */ 343 /* Check the guest TLB. */
383 gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); 344 gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
384 if (!gtlbe) { 345 if (gtlb_index < 0) {
385 /* The guest didn't have a mapping for it. */ 346 /* The guest didn't have a mapping for it. */
386 kvmppc_queue_exception(vcpu, exit_nr); 347 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
387 vcpu->stat.itlb_real_miss_exits++; 348 kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
388 break; 349 break;
389 } 350 }
390 351
391 vcpu->stat.itlb_virt_miss_exits++; 352 kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
392 353
393 gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; 354 gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
355 gpaddr = tlb_xlate(gtlbe, eaddr);
356 gfn = gpaddr >> PAGE_SHIFT;
394 357
395 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { 358 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
396 /* The guest TLB had a mapping, but the shadow TLB 359 /* The guest TLB had a mapping, but the shadow TLB
@@ -399,12 +362,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
399 * b) the guest used a large mapping which we're faking 362 * b) the guest used a large mapping which we're faking
400 * Either way, we need to satisfy the fault without 363 * Either way, we need to satisfy the fault without
401 * invoking the guest. */ 364 * invoking the guest. */
402 kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, 365 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
403 gtlbe->word2); 366 gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
404 } else { 367 } else {
405 /* Guest mapped and leaped at non-RAM! */ 368 /* Guest mapped and leaped at non-RAM! */
406 kvmppc_queue_exception(vcpu, 369 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
407 BOOKE_INTERRUPT_MACHINE_CHECK);
408 } 370 }
409 371
410 break; 372 break;
@@ -421,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
421 mtspr(SPRN_DBSR, dbsr); 383 mtspr(SPRN_DBSR, dbsr);
422 384
423 run->exit_reason = KVM_EXIT_DEBUG; 385 run->exit_reason = KVM_EXIT_DEBUG;
386 kvmppc_account_exit(vcpu, DEBUG_EXITS);
424 r = RESUME_HOST; 387 r = RESUME_HOST;
425 break; 388 break;
426 } 389 }
@@ -432,10 +395,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
432 395
433 local_irq_disable(); 396 local_irq_disable();
434 397
435 kvmppc_check_and_deliver_interrupts(vcpu); 398 kvmppc_core_deliver_interrupts(vcpu);
436 399
437 /* Do some exit accounting. */
438 vcpu->stat.sum_exits++;
439 if (!(r & RESUME_HOST)) { 400 if (!(r & RESUME_HOST)) {
440 /* To avoid clobbering exit_reason, only check for signals if 401 /* To avoid clobbering exit_reason, only check for signals if
441 * we aren't already exiting to userspace for some other 402 * we aren't already exiting to userspace for some other
@@ -443,22 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
443 if (signal_pending(current)) { 404 if (signal_pending(current)) {
444 run->exit_reason = KVM_EXIT_INTR; 405 run->exit_reason = KVM_EXIT_INTR;
445 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); 406 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
446 407 kvmppc_account_exit(vcpu, SIGNAL_EXITS);
447 vcpu->stat.signal_exits++;
448 } else {
449 vcpu->stat.light_exits++;
450 }
451 } else {
452 switch (run->exit_reason) {
453 case KVM_EXIT_MMIO:
454 vcpu->stat.mmio_exits++;
455 break;
456 case KVM_EXIT_DCR:
457 vcpu->stat.dcr_exits++;
458 break;
459 case KVM_EXIT_INTR:
460 vcpu->stat.signal_exits++;
461 break;
462 } 408 }
463 } 409 }
464 410
@@ -468,20 +414,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
468/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ 414/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
469int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 415int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
470{ 416{
471 struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
472
473 tlbe->tid = 0;
474 tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
475 tlbe->word1 = 0;
476 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
477
478 tlbe++;
479 tlbe->tid = 0;
480 tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
481 tlbe->word1 = 0xef600000;
482 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
483 | PPC44x_TLB_I | PPC44x_TLB_G;
484
485 vcpu->arch.pc = 0; 417 vcpu->arch.pc = 0;
486 vcpu->arch.msr = 0; 418 vcpu->arch.msr = 0;
487 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ 419 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@@ -492,12 +424,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
492 * before it's programmed its own IVPR. */ 424 * before it's programmed its own IVPR. */
493 vcpu->arch.ivpr = 0x55550000; 425 vcpu->arch.ivpr = 0x55550000;
494 426
495 /* Since the guest can directly access the timebase, it must know the 427 kvmppc_init_timing_stats(vcpu);
496 * real timebase frequency. Accordingly, it must see the state of
497 * CCR1[TCS]. */
498 vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
499 428
500 return 0; 429 return kvmppc_core_vcpu_setup(vcpu);
501} 430}
502 431
503int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 432int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -536,7 +465,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
536 vcpu->arch.ctr = regs->ctr; 465 vcpu->arch.ctr = regs->ctr;
537 vcpu->arch.lr = regs->lr; 466 vcpu->arch.lr = regs->lr;
538 vcpu->arch.xer = regs->xer; 467 vcpu->arch.xer = regs->xer;
539 vcpu->arch.msr = regs->msr; 468 kvmppc_set_msr(vcpu, regs->msr);
540 vcpu->arch.srr0 = regs->srr0; 469 vcpu->arch.srr0 = regs->srr0;
541 vcpu->arch.srr1 = regs->srr1; 470 vcpu->arch.srr1 = regs->srr1;
542 vcpu->arch.sprg0 = regs->sprg0; 471 vcpu->arch.sprg0 = regs->sprg0;
@@ -575,31 +504,62 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
575 return -ENOTSUPP; 504 return -ENOTSUPP;
576} 505}
577 506
578/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
579int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 507int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
580 struct kvm_translation *tr) 508 struct kvm_translation *tr)
581{ 509{
582 struct tlbe *gtlbe; 510 return kvmppc_core_vcpu_translate(vcpu, tr);
583 int index; 511}
584 gva_t eaddr;
585 u8 pid;
586 u8 as;
587
588 eaddr = tr->linear_address;
589 pid = (tr->linear_address >> 32) & 0xff;
590 as = (tr->linear_address >> 40) & 0x1;
591
592 index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
593 if (index == -1) {
594 tr->valid = 0;
595 return 0;
596 }
597 512
598 gtlbe = &vcpu->arch.guest_tlb[index]; 513int kvmppc_booke_init(void)
514{
515 unsigned long ivor[16];
516 unsigned long max_ivor = 0;
517 int i;
599 518
600 tr->physical_address = tlb_xlate(gtlbe, eaddr); 519 /* We install our own exception handlers by hijacking IVPR. IVPR must
601 /* XXX what does "writeable" and "usermode" even mean? */ 520 * be 16-bit aligned, so we need a 64KB allocation. */
602 tr->valid = 1; 521 kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
522 VCPU_SIZE_ORDER);
523 if (!kvmppc_booke_handlers)
524 return -ENOMEM;
525
526 /* XXX make sure our handlers are smaller than Linux's */
527
528 /* Copy our interrupt handlers to match host IVORs. That way we don't
529 * have to swap the IVORs on every guest/host transition. */
530 ivor[0] = mfspr(SPRN_IVOR0);
531 ivor[1] = mfspr(SPRN_IVOR1);
532 ivor[2] = mfspr(SPRN_IVOR2);
533 ivor[3] = mfspr(SPRN_IVOR3);
534 ivor[4] = mfspr(SPRN_IVOR4);
535 ivor[5] = mfspr(SPRN_IVOR5);
536 ivor[6] = mfspr(SPRN_IVOR6);
537 ivor[7] = mfspr(SPRN_IVOR7);
538 ivor[8] = mfspr(SPRN_IVOR8);
539 ivor[9] = mfspr(SPRN_IVOR9);
540 ivor[10] = mfspr(SPRN_IVOR10);
541 ivor[11] = mfspr(SPRN_IVOR11);
542 ivor[12] = mfspr(SPRN_IVOR12);
543 ivor[13] = mfspr(SPRN_IVOR13);
544 ivor[14] = mfspr(SPRN_IVOR14);
545 ivor[15] = mfspr(SPRN_IVOR15);
546
547 for (i = 0; i < 16; i++) {
548 if (ivor[i] > max_ivor)
549 max_ivor = ivor[i];
550
551 memcpy((void *)kvmppc_booke_handlers + ivor[i],
552 kvmppc_handlers_start + i * kvmppc_handler_len,
553 kvmppc_handler_len);
554 }
555 flush_icache_range(kvmppc_booke_handlers,
556 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
603 557
604 return 0; 558 return 0;
605} 559}
560
561void __exit kvmppc_booke_exit(void)
562{
563 free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
564 kvm_exit();
565}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
new file mode 100644
index 000000000000..cf7c94ca24bf
--- /dev/null
+++ b/arch/powerpc/kvm/booke.h
@@ -0,0 +1,60 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __KVM_BOOKE_H__
21#define __KVM_BOOKE_H__
22
23#include <linux/types.h>
24#include <linux/kvm_host.h>
25#include "timing.h"
26
27/* interrupt priortity ordering */
28#define BOOKE_IRQPRIO_DATA_STORAGE 0
29#define BOOKE_IRQPRIO_INST_STORAGE 1
30#define BOOKE_IRQPRIO_ALIGNMENT 2
31#define BOOKE_IRQPRIO_PROGRAM 3
32#define BOOKE_IRQPRIO_FP_UNAVAIL 4
33#define BOOKE_IRQPRIO_SYSCALL 5
34#define BOOKE_IRQPRIO_AP_UNAVAIL 6
35#define BOOKE_IRQPRIO_DTLB_MISS 7
36#define BOOKE_IRQPRIO_ITLB_MISS 8
37#define BOOKE_IRQPRIO_MACHINE_CHECK 9
38#define BOOKE_IRQPRIO_DEBUG 10
39#define BOOKE_IRQPRIO_CRITICAL 11
40#define BOOKE_IRQPRIO_WATCHDOG 12
41#define BOOKE_IRQPRIO_EXTERNAL 13
42#define BOOKE_IRQPRIO_FIT 14
43#define BOOKE_IRQPRIO_DECREMENTER 15
44
45/* Helper function for "full" MSR writes. No need to call this if only EE is
46 * changing. */
47static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
48{
49 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
50 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
51
52 vcpu->arch.msr = new_msr;
53
54 if (vcpu->arch.msr & MSR_WE) {
55 kvm_vcpu_block(vcpu);
56 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
57 };
58}
59
60#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
deleted file mode 100644
index b480341bc31e..000000000000
--- a/arch/powerpc/kvm/booke_host.c
+++ /dev/null
@@ -1,83 +0,0 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/errno.h>
21#include <linux/kvm_host.h>
22#include <linux/module.h>
23#include <asm/cacheflush.h>
24#include <asm/kvm_ppc.h>
25
26unsigned long kvmppc_booke_handlers;
27
28static int kvmppc_booke_init(void)
29{
30 unsigned long ivor[16];
31 unsigned long max_ivor = 0;
32 int i;
33
34 /* We install our own exception handlers by hijacking IVPR. IVPR must
35 * be 16-bit aligned, so we need a 64KB allocation. */
36 kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
37 VCPU_SIZE_ORDER);
38 if (!kvmppc_booke_handlers)
39 return -ENOMEM;
40
41 /* XXX make sure our handlers are smaller than Linux's */
42
43 /* Copy our interrupt handlers to match host IVORs. That way we don't
44 * have to swap the IVORs on every guest/host transition. */
45 ivor[0] = mfspr(SPRN_IVOR0);
46 ivor[1] = mfspr(SPRN_IVOR1);
47 ivor[2] = mfspr(SPRN_IVOR2);
48 ivor[3] = mfspr(SPRN_IVOR3);
49 ivor[4] = mfspr(SPRN_IVOR4);
50 ivor[5] = mfspr(SPRN_IVOR5);
51 ivor[6] = mfspr(SPRN_IVOR6);
52 ivor[7] = mfspr(SPRN_IVOR7);
53 ivor[8] = mfspr(SPRN_IVOR8);
54 ivor[9] = mfspr(SPRN_IVOR9);
55 ivor[10] = mfspr(SPRN_IVOR10);
56 ivor[11] = mfspr(SPRN_IVOR11);
57 ivor[12] = mfspr(SPRN_IVOR12);
58 ivor[13] = mfspr(SPRN_IVOR13);
59 ivor[14] = mfspr(SPRN_IVOR14);
60 ivor[15] = mfspr(SPRN_IVOR15);
61
62 for (i = 0; i < 16; i++) {
63 if (ivor[i] > max_ivor)
64 max_ivor = ivor[i];
65
66 memcpy((void *)kvmppc_booke_handlers + ivor[i],
67 kvmppc_handlers_start + i * kvmppc_handler_len,
68 kvmppc_handler_len);
69 }
70 flush_icache_range(kvmppc_booke_handlers,
71 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
72
73 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
74}
75
76static void __exit kvmppc_booke_exit(void)
77{
78 free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
79 kvm_exit();
80}
81
82module_init(kvmppc_booke_init)
83module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 95e165baf85f..084ebcd7dd83 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host)
107 li r6, 1 107 li r6, 1
108 slw r6, r6, r5 108 slw r6, r6, r5
109 109
110#ifdef CONFIG_KVM_EXIT_TIMING
111 /* save exit time */
1121:
113 mfspr r7, SPRN_TBRU
114 mfspr r8, SPRN_TBRL
115 mfspr r9, SPRN_TBRU
116 cmpw r9, r7
117 bne 1b
118 stw r8, VCPU_TIMING_EXIT_TBL(r4)
119 stw r9, VCPU_TIMING_EXIT_TBU(r4)
120#endif
121
110 /* Save the faulting instruction and all GPRs for emulation. */ 122 /* Save the faulting instruction and all GPRs for emulation. */
111 andi. r7, r6, NEED_INST_MASK 123 andi. r7, r6, NEED_INST_MASK
112 beq ..skip_inst_copy 124 beq ..skip_inst_copy
@@ -335,54 +347,6 @@ lightweight_exit:
335 lwz r3, VCPU_SHADOW_PID(r4) 347 lwz r3, VCPU_SHADOW_PID(r4)
336 mtspr SPRN_PID, r3 348 mtspr SPRN_PID, r3
337 349
338 /* Prevent all asynchronous TLB updates. */
339 mfmsr r5
340 lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
341 ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
342 andc r6, r5, r6
343 mtmsr r6
344
345 /* Load the guest mappings, leaving the host's "pinned" kernel mappings
346 * in place. */
347 mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
348 li r5, PPC44x_TLB_SIZE
349 lis r5, tlb_44x_hwater@ha
350 lwz r5, tlb_44x_hwater@l(r5)
351 mtctr r5
352 addi r9, r4, VCPU_SHADOW_TLB
353 addi r5, r4, VCPU_SHADOW_MOD
354 li r3, 0
3551:
356 lbzx r7, r3, r5
357 cmpwi r7, 0
358 beq 3f
359
360 /* Load guest entry. */
361 mulli r11, r3, TLBE_BYTES
362 add r11, r11, r9
363 lwz r7, 0(r11)
364 mtspr SPRN_MMUCR, r7
365 lwz r7, 4(r11)
366 tlbwe r7, r3, PPC44x_TLB_PAGEID
367 lwz r7, 8(r11)
368 tlbwe r7, r3, PPC44x_TLB_XLAT
369 lwz r7, 12(r11)
370 tlbwe r7, r3, PPC44x_TLB_ATTRIB
3713:
372 addi r3, r3, 1 /* Increment index. */
373 bdnz 1b
374
375 mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
376
377 /* Clear bitmap of modified TLB entries */
378 li r5, PPC44x_TLB_SIZE>>2
379 mtctr r5
380 addi r5, r4, VCPU_SHADOW_MOD - 4
381 li r6, 0
3821:
383 stwu r6, 4(r5)
384 bdnz 1b
385
386 iccci 0, 0 /* XXX hack */ 350 iccci 0, 0 /* XXX hack */
387 351
388 /* Load some guest volatiles. */ 352 /* Load some guest volatiles. */
@@ -423,6 +387,18 @@ lightweight_exit:
423 lwz r3, VCPU_SPRG7(r4) 387 lwz r3, VCPU_SPRG7(r4)
424 mtspr SPRN_SPRG7, r3 388 mtspr SPRN_SPRG7, r3
425 389
390#ifdef CONFIG_KVM_EXIT_TIMING
391 /* save enter time */
3921:
393 mfspr r6, SPRN_TBRU
394 mfspr r7, SPRN_TBRL
395 mfspr r8, SPRN_TBRU
396 cmpw r8, r6
397 bne 1b
398 stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
399 stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
400#endif
401
426 /* Finish loading guest volatiles and jump to guest. */ 402 /* Finish loading guest volatiles and jump to guest. */
427 lwz r3, VCPU_CTR(r4) 403 lwz r3, VCPU_CTR(r4)
428 mtctr r3 404 mtctr r3
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 0fce4fbdc20d..d1d38daa93fb 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,161 +23,14 @@
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25 25
26#include <asm/dcr.h> 26#include <asm/reg.h>
27#include <asm/dcr-regs.h>
28#include <asm/time.h> 27#include <asm/time.h>
29#include <asm/byteorder.h> 28#include <asm/byteorder.h>
30#include <asm/kvm_ppc.h> 29#include <asm/kvm_ppc.h>
30#include <asm/disassemble.h>
31#include "timing.h"
31 32
32#include "44x_tlb.h" 33void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
33
34/* Instruction decoding */
35static inline unsigned int get_op(u32 inst)
36{
37 return inst >> 26;
38}
39
40static inline unsigned int get_xop(u32 inst)
41{
42 return (inst >> 1) & 0x3ff;
43}
44
45static inline unsigned int get_sprn(u32 inst)
46{
47 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
48}
49
50static inline unsigned int get_dcrn(u32 inst)
51{
52 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
53}
54
55static inline unsigned int get_rt(u32 inst)
56{
57 return (inst >> 21) & 0x1f;
58}
59
60static inline unsigned int get_rs(u32 inst)
61{
62 return (inst >> 21) & 0x1f;
63}
64
65static inline unsigned int get_ra(u32 inst)
66{
67 return (inst >> 16) & 0x1f;
68}
69
70static inline unsigned int get_rb(u32 inst)
71{
72 return (inst >> 11) & 0x1f;
73}
74
75static inline unsigned int get_rc(u32 inst)
76{
77 return inst & 0x1;
78}
79
80static inline unsigned int get_ws(u32 inst)
81{
82 return (inst >> 11) & 0x1f;
83}
84
85static inline unsigned int get_d(u32 inst)
86{
87 return inst & 0xffff;
88}
89
90static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
91 const struct tlbe *tlbe)
92{
93 gpa_t gpa;
94
95 if (!get_tlb_v(tlbe))
96 return 0;
97
98 /* Does it match current guest AS? */
99 /* XXX what about IS != DS? */
100 if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
101 return 0;
102
103 gpa = get_tlb_raddr(tlbe);
104 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
105 /* Mapping is not for RAM. */
106 return 0;
107
108 return 1;
109}
110
111static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
112{
113 u64 eaddr;
114 u64 raddr;
115 u64 asid;
116 u32 flags;
117 struct tlbe *tlbe;
118 unsigned int ra;
119 unsigned int rs;
120 unsigned int ws;
121 unsigned int index;
122
123 ra = get_ra(inst);
124 rs = get_rs(inst);
125 ws = get_ws(inst);
126
127 index = vcpu->arch.gpr[ra];
128 if (index > PPC44x_TLB_SIZE) {
129 printk("%s: index %d\n", __func__, index);
130 kvmppc_dump_vcpu(vcpu);
131 return EMULATE_FAIL;
132 }
133
134 tlbe = &vcpu->arch.guest_tlb[index];
135
136 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
137 if (tlbe->word0 & PPC44x_TLB_VALID) {
138 eaddr = get_tlb_eaddr(tlbe);
139 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
140 kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
141 }
142
143 switch (ws) {
144 case PPC44x_TLB_PAGEID:
145 tlbe->tid = vcpu->arch.mmucr & 0xff;
146 tlbe->word0 = vcpu->arch.gpr[rs];
147 break;
148
149 case PPC44x_TLB_XLAT:
150 tlbe->word1 = vcpu->arch.gpr[rs];
151 break;
152
153 case PPC44x_TLB_ATTRIB:
154 tlbe->word2 = vcpu->arch.gpr[rs];
155 break;
156
157 default:
158 return EMULATE_FAIL;
159 }
160
161 if (tlbe_is_host_safe(vcpu, tlbe)) {
162 eaddr = get_tlb_eaddr(tlbe);
163 raddr = get_tlb_raddr(tlbe);
164 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
165 flags = tlbe->word2 & 0xffff;
166
167 /* Create a 4KB mapping on the host. If the guest wanted a
168 * large page, only the first 4KB is mapped here and the rest
169 * are mapped on the fly. */
170 kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
171 }
172
173 KVMTRACE_5D(GTLB_WRITE, vcpu, index,
174 tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
175 handler);
176
177 return EMULATE_DONE;
178}
179
180static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
181{ 34{
182 if (vcpu->arch.tcr & TCR_DIE) { 35 if (vcpu->arch.tcr & TCR_DIE) {
183 /* The decrementer ticks at the same rate as the timebase, so 36 /* The decrementer ticks at the same rate as the timebase, so
@@ -193,12 +46,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
193 } 46 }
194} 47}
195 48
196static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
197{
198 vcpu->arch.pc = vcpu->arch.srr0;
199 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
200}
201
202/* XXX to do: 49/* XXX to do:
203 * lhax 50 * lhax
204 * lhaux 51 * lhaux
@@ -213,40 +60,30 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
213 * 60 *
214 * XXX is_bigendian should depend on MMU mapping or MSR[LE] 61 * XXX is_bigendian should depend on MMU mapping or MSR[LE]
215 */ 62 */
63/* XXX Should probably auto-generate instruction decoding for a particular core
64 * from opcode tables in the future. */
216int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 65int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
217{ 66{
218 u32 inst = vcpu->arch.last_inst; 67 u32 inst = vcpu->arch.last_inst;
219 u32 ea; 68 u32 ea;
220 int ra; 69 int ra;
221 int rb; 70 int rb;
222 int rc;
223 int rs; 71 int rs;
224 int rt; 72 int rt;
225 int sprn; 73 int sprn;
226 int dcrn;
227 enum emulation_result emulated = EMULATE_DONE; 74 enum emulation_result emulated = EMULATE_DONE;
228 int advance = 1; 75 int advance = 1;
229 76
77 /* this default type might be overwritten by subcategories */
78 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
79
230 switch (get_op(inst)) { 80 switch (get_op(inst)) {
231 case 3: /* trap */ 81 case 3: /* trap */
232 printk("trap!\n"); 82 vcpu->arch.esr |= ESR_PTR;
233 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); 83 kvmppc_core_queue_program(vcpu);
234 advance = 0; 84 advance = 0;
235 break; 85 break;
236 86
237 case 19:
238 switch (get_xop(inst)) {
239 case 50: /* rfi */
240 kvmppc_emul_rfi(vcpu);
241 advance = 0;
242 break;
243
244 default:
245 emulated = EMULATE_FAIL;
246 break;
247 }
248 break;
249
250 case 31: 87 case 31:
251 switch (get_xop(inst)) { 88 switch (get_xop(inst)) {
252 89
@@ -255,27 +92,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
255 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 92 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
256 break; 93 break;
257 94
258 case 83: /* mfmsr */
259 rt = get_rt(inst);
260 vcpu->arch.gpr[rt] = vcpu->arch.msr;
261 break;
262
263 case 87: /* lbzx */ 95 case 87: /* lbzx */
264 rt = get_rt(inst); 96 rt = get_rt(inst);
265 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 97 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
266 break; 98 break;
267 99
268 case 131: /* wrtee */
269 rs = get_rs(inst);
270 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
271 | (vcpu->arch.gpr[rs] & MSR_EE);
272 break;
273
274 case 146: /* mtmsr */
275 rs = get_rs(inst);
276 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
277 break;
278
279 case 151: /* stwx */ 100 case 151: /* stwx */
280 rs = get_rs(inst); 101 rs = get_rs(inst);
281 emulated = kvmppc_handle_store(run, vcpu, 102 emulated = kvmppc_handle_store(run, vcpu,
@@ -283,11 +104,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
283 4, 1); 104 4, 1);
284 break; 105 break;
285 106
286 case 163: /* wrteei */
287 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
288 | (inst & MSR_EE);
289 break;
290
291 case 215: /* stbx */ 107 case 215: /* stbx */
292 rs = get_rs(inst); 108 rs = get_rs(inst);
293 emulated = kvmppc_handle_store(run, vcpu, 109 emulated = kvmppc_handle_store(run, vcpu,
@@ -328,42 +144,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
328 vcpu->arch.gpr[ra] = ea; 144 vcpu->arch.gpr[ra] = ea;
329 break; 145 break;
330 146
331 case 323: /* mfdcr */
332 dcrn = get_dcrn(inst);
333 rt = get_rt(inst);
334
335 /* The guest may access CPR0 registers to determine the timebase
336 * frequency, and it must know the real host frequency because it
337 * can directly access the timebase registers.
338 *
339 * It would be possible to emulate those accesses in userspace,
340 * but userspace can really only figure out the end frequency.
341 * We could decompose that into the factors that compute it, but
342 * that's tricky math, and it's easier to just report the real
343 * CPR0 values.
344 */
345 switch (dcrn) {
346 case DCRN_CPR0_CONFIG_ADDR:
347 vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
348 break;
349 case DCRN_CPR0_CONFIG_DATA:
350 local_irq_disable();
351 mtdcr(DCRN_CPR0_CONFIG_ADDR,
352 vcpu->arch.cpr0_cfgaddr);
353 vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
354 local_irq_enable();
355 break;
356 default:
357 run->dcr.dcrn = dcrn;
358 run->dcr.data = 0;
359 run->dcr.is_write = 0;
360 vcpu->arch.io_gpr = rt;
361 vcpu->arch.dcr_needed = 1;
362 emulated = EMULATE_DO_DCR;
363 }
364
365 break;
366
367 case 339: /* mfspr */ 147 case 339: /* mfspr */
368 sprn = get_sprn(inst); 148 sprn = get_sprn(inst);
369 rt = get_rt(inst); 149 rt = get_rt(inst);
@@ -373,26 +153,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
373 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; 153 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
374 case SPRN_SRR1: 154 case SPRN_SRR1:
375 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; 155 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
376 case SPRN_MMUCR:
377 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
378 case SPRN_PID:
379 vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
380 case SPRN_IVPR:
381 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
382 case SPRN_CCR0:
383 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
384 case SPRN_CCR1:
385 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
386 case SPRN_PVR: 156 case SPRN_PVR:
387 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; 157 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
388 case SPRN_DEAR:
389 vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
390 case SPRN_ESR:
391 vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
392 case SPRN_DBCR0:
393 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
394 case SPRN_DBCR1:
395 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
396 158
397 /* Note: mftb and TBRL/TBWL are user-accessible, so 159 /* Note: mftb and TBRL/TBWL are user-accessible, so
398 * the guest can always access the real TB anyways. 160 * the guest can always access the real TB anyways.
@@ -413,42 +175,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
413 /* Note: SPRG4-7 are user-readable, so we don't get 175 /* Note: SPRG4-7 are user-readable, so we don't get
414 * a trap. */ 176 * a trap. */
415 177
416 case SPRN_IVOR0:
417 vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
418 case SPRN_IVOR1:
419 vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
420 case SPRN_IVOR2:
421 vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
422 case SPRN_IVOR3:
423 vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
424 case SPRN_IVOR4:
425 vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
426 case SPRN_IVOR5:
427 vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
428 case SPRN_IVOR6:
429 vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
430 case SPRN_IVOR7:
431 vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
432 case SPRN_IVOR8:
433 vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
434 case SPRN_IVOR9:
435 vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
436 case SPRN_IVOR10:
437 vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
438 case SPRN_IVOR11:
439 vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
440 case SPRN_IVOR12:
441 vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
442 case SPRN_IVOR13:
443 vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
444 case SPRN_IVOR14:
445 vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
446 case SPRN_IVOR15:
447 vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
448
449 default: 178 default:
450 printk("mfspr: unknown spr %x\n", sprn); 179 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
451 vcpu->arch.gpr[rt] = 0; 180 if (emulated == EMULATE_FAIL) {
181 printk("mfspr: unknown spr %x\n", sprn);
182 vcpu->arch.gpr[rt] = 0;
183 }
452 break; 184 break;
453 } 185 }
454 break; 186 break;
@@ -478,25 +210,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
478 vcpu->arch.gpr[ra] = ea; 210 vcpu->arch.gpr[ra] = ea;
479 break; 211 break;
480 212
481 case 451: /* mtdcr */
482 dcrn = get_dcrn(inst);
483 rs = get_rs(inst);
484
485 /* emulate some access in kernel */
486 switch (dcrn) {
487 case DCRN_CPR0_CONFIG_ADDR:
488 vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
489 break;
490 default:
491 run->dcr.dcrn = dcrn;
492 run->dcr.data = vcpu->arch.gpr[rs];
493 run->dcr.is_write = 1;
494 vcpu->arch.dcr_needed = 1;
495 emulated = EMULATE_DO_DCR;
496 }
497
498 break;
499
500 case 467: /* mtspr */ 213 case 467: /* mtspr */
501 sprn = get_sprn(inst); 214 sprn = get_sprn(inst);
502 rs = get_rs(inst); 215 rs = get_rs(inst);
@@ -505,22 +218,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
505 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; 218 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
506 case SPRN_SRR1: 219 case SPRN_SRR1:
507 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; 220 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
508 case SPRN_MMUCR:
509 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
510 case SPRN_PID:
511 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
512 case SPRN_CCR0:
513 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
514 case SPRN_CCR1:
515 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
516 case SPRN_DEAR:
517 vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
518 case SPRN_ESR:
519 vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
520 case SPRN_DBCR0:
521 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
522 case SPRN_DBCR1:
523 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
524 221
525 /* XXX We need to context-switch the timebase for 222 /* XXX We need to context-switch the timebase for
526 * watchdog and FIT. */ 223 * watchdog and FIT. */
@@ -532,14 +229,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
532 kvmppc_emulate_dec(vcpu); 229 kvmppc_emulate_dec(vcpu);
533 break; 230 break;
534 231
535 case SPRN_TSR:
536 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
537
538 case SPRN_TCR:
539 vcpu->arch.tcr = vcpu->arch.gpr[rs];
540 kvmppc_emulate_dec(vcpu);
541 break;
542
543 case SPRN_SPRG0: 232 case SPRN_SPRG0:
544 vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; 233 vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
545 case SPRN_SPRG1: 234 case SPRN_SPRG1:
@@ -549,56 +238,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
549 case SPRN_SPRG3: 238 case SPRN_SPRG3:
550 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; 239 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
551 240
552 /* Note: SPRG4-7 are user-readable. These values are
553 * loaded into the real SPRGs when resuming the
554 * guest. */
555 case SPRN_SPRG4:
556 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
557 case SPRN_SPRG5:
558 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
559 case SPRN_SPRG6:
560 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
561 case SPRN_SPRG7:
562 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
563
564 case SPRN_IVPR:
565 vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
566 case SPRN_IVOR0:
567 vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
568 case SPRN_IVOR1:
569 vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
570 case SPRN_IVOR2:
571 vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
572 case SPRN_IVOR3:
573 vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
574 case SPRN_IVOR4:
575 vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
576 case SPRN_IVOR5:
577 vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
578 case SPRN_IVOR6:
579 vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
580 case SPRN_IVOR7:
581 vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
582 case SPRN_IVOR8:
583 vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
584 case SPRN_IVOR9:
585 vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
586 case SPRN_IVOR10:
587 vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
588 case SPRN_IVOR11:
589 vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
590 case SPRN_IVOR12:
591 vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
592 case SPRN_IVOR13:
593 vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
594 case SPRN_IVOR14:
595 vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
596 case SPRN_IVOR15:
597 vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
598
599 default: 241 default:
600 printk("mtspr: unknown spr %x\n", sprn); 242 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
601 emulated = EMULATE_FAIL; 243 if (emulated == EMULATE_FAIL)
244 printk("mtspr: unknown spr %x\n", sprn);
602 break; 245 break;
603 } 246 }
604 break; 247 break;
@@ -629,36 +272,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
629 4, 0); 272 4, 0);
630 break; 273 break;
631 274
632 case 978: /* tlbwe */
633 emulated = kvmppc_emul_tlbwe(vcpu, inst);
634 break;
635
636 case 914: { /* tlbsx */
637 int index;
638 unsigned int as = get_mmucr_sts(vcpu);
639 unsigned int pid = get_mmucr_stid(vcpu);
640
641 rt = get_rt(inst);
642 ra = get_ra(inst);
643 rb = get_rb(inst);
644 rc = get_rc(inst);
645
646 ea = vcpu->arch.gpr[rb];
647 if (ra)
648 ea += vcpu->arch.gpr[ra];
649
650 index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
651 if (rc) {
652 if (index < 0)
653 vcpu->arch.cr &= ~0x20000000;
654 else
655 vcpu->arch.cr |= 0x20000000;
656 }
657 vcpu->arch.gpr[rt] = index;
658
659 }
660 break;
661
662 case 790: /* lhbrx */ 275 case 790: /* lhbrx */
663 rt = get_rt(inst); 276 rt = get_rt(inst);
664 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); 277 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@@ -674,14 +287,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
674 2, 0); 287 2, 0);
675 break; 288 break;
676 289
677 case 966: /* iccci */
678 break;
679
680 default: 290 default:
681 printk("unknown: op %d xop %d\n", get_op(inst), 291 /* Attempt core-specific emulation below. */
682 get_xop(inst));
683 emulated = EMULATE_FAIL; 292 emulated = EMULATE_FAIL;
684 break;
685 } 293 }
686 break; 294 break;
687 295
@@ -764,12 +372,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
764 break; 372 break;
765 373
766 default: 374 default:
767 printk("unknown op %d\n", get_op(inst));
768 emulated = EMULATE_FAIL; 375 emulated = EMULATE_FAIL;
769 break;
770 } 376 }
771 377
772 KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit); 378 if (emulated == EMULATE_FAIL) {
379 emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
380 if (emulated == EMULATE_FAIL) {
381 advance = 0;
382 printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
383 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
384 }
385 }
386
387 KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
773 388
774 if (advance) 389 if (advance)
775 vcpu->arch.pc += 4; /* Advance past emulated instruction. */ 390 vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8bef0efcdfe1..2822c8ccfaaf 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -28,9 +28,9 @@
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/kvm_ppc.h> 29#include <asm/kvm_ppc.h>
30#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
31#include "timing.h"
31#include "../mm/mmu_decl.h" 32#include "../mm/mmu_decl.h"
32 33
33
34gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 34gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
35{ 35{
36 return gfn; 36 return gfn;
@@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void)
99 99
100void kvm_arch_check_processor_compat(void *rtn) 100void kvm_arch_check_processor_compat(void *rtn)
101{ 101{
102 int r; 102 *(int *)rtn = kvmppc_core_check_processor_compat();
103
104 if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
105 r = 0;
106 else
107 r = -ENOTSUPP;
108
109 *(int *)rtn = r;
110} 103}
111 104
112struct kvm *kvm_arch_create_vm(void) 105struct kvm *kvm_arch_create_vm(void)
@@ -144,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext)
144 int r; 137 int r;
145 138
146 switch (ext) { 139 switch (ext) {
147 case KVM_CAP_USER_MEMORY:
148 r = 1;
149 break;
150 case KVM_CAP_COALESCED_MMIO: 140 case KVM_CAP_COALESCED_MMIO:
151 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 141 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
152 break; 142 break;
@@ -179,30 +169,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
179struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) 169struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
180{ 170{
181 struct kvm_vcpu *vcpu; 171 struct kvm_vcpu *vcpu;
182 int err; 172 vcpu = kvmppc_core_vcpu_create(kvm, id);
183 173 kvmppc_create_vcpu_debugfs(vcpu, id);
184 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
185 if (!vcpu) {
186 err = -ENOMEM;
187 goto out;
188 }
189
190 err = kvm_vcpu_init(vcpu, kvm, id);
191 if (err)
192 goto free_vcpu;
193
194 return vcpu; 174 return vcpu;
195
196free_vcpu:
197 kmem_cache_free(kvm_vcpu_cache, vcpu);
198out:
199 return ERR_PTR(err);
200} 175}
201 176
202void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 177void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
203{ 178{
204 kvm_vcpu_uninit(vcpu); 179 kvmppc_remove_vcpu_debugfs(vcpu);
205 kmem_cache_free(kvm_vcpu_cache, vcpu); 180 kvmppc_core_vcpu_free(vcpu);
206} 181}
207 182
208void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 183void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -212,16 +187,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
212 187
213int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 188int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
214{ 189{
215 unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; 190 return kvmppc_core_pending_dec(vcpu);
216
217 return test_bit(priority, &vcpu->arch.pending_exceptions);
218} 191}
219 192
220static void kvmppc_decrementer_func(unsigned long data) 193static void kvmppc_decrementer_func(unsigned long data)
221{ 194{
222 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 195 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
223 196
224 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); 197 kvmppc_core_queue_dec(vcpu);
225 198
226 if (waitqueue_active(&vcpu->wq)) { 199 if (waitqueue_active(&vcpu->wq)) {
227 wake_up_interruptible(&vcpu->wq); 200 wake_up_interruptible(&vcpu->wq);
@@ -242,96 +215,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
242 kvmppc_core_destroy_mmu(vcpu); 215 kvmppc_core_destroy_mmu(vcpu);
243} 216}
244 217
245/* Note: clearing MSR[DE] just means that the debug interrupt will not be
246 * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
247 * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
248 * will be delivered as an "imprecise debug event" (which is indicated by
249 * DBSR[IDE].
250 */
251static void kvmppc_disable_debug_interrupts(void)
252{
253 mtmsr(mfmsr() & ~MSR_DE);
254}
255
256static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
257{
258 kvmppc_disable_debug_interrupts();
259
260 mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
261 mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
262 mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
263 mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
264 mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
265 mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
266 mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
267 mtmsr(vcpu->arch.host_msr);
268}
269
270static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
271{
272 struct kvm_guest_debug *dbg = &vcpu->guest_debug;
273 u32 dbcr0 = 0;
274
275 vcpu->arch.host_msr = mfmsr();
276 kvmppc_disable_debug_interrupts();
277
278 /* Save host debug register state. */
279 vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
280 vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
281 vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
282 vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
283 vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
284 vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
285 vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
286
287 /* set registers up for guest */
288
289 if (dbg->bp[0]) {
290 mtspr(SPRN_IAC1, dbg->bp[0]);
291 dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
292 }
293 if (dbg->bp[1]) {
294 mtspr(SPRN_IAC2, dbg->bp[1]);
295 dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
296 }
297 if (dbg->bp[2]) {
298 mtspr(SPRN_IAC3, dbg->bp[2]);
299 dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
300 }
301 if (dbg->bp[3]) {
302 mtspr(SPRN_IAC4, dbg->bp[3]);
303 dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
304 }
305
306 mtspr(SPRN_DBCR0, dbcr0);
307 mtspr(SPRN_DBCR1, 0);
308 mtspr(SPRN_DBCR2, 0);
309}
310
311void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 218void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
312{ 219{
313 int i;
314
315 if (vcpu->guest_debug.enabled) 220 if (vcpu->guest_debug.enabled)
316 kvmppc_load_guest_debug_registers(vcpu); 221 kvmppc_core_load_guest_debugstate(vcpu);
317 222
318 /* Mark every guest entry in the shadow TLB entry modified, so that they 223 kvmppc_core_vcpu_load(vcpu, cpu);
319 * will all be reloaded on the next vcpu run (instead of being
320 * demand-faulted). */
321 for (i = 0; i <= tlb_44x_hwater; i++)
322 kvmppc_tlbe_set_modified(vcpu, i);
323} 224}
324 225
325void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 226void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
326{ 227{
327 if (vcpu->guest_debug.enabled) 228 if (vcpu->guest_debug.enabled)
328 kvmppc_restore_host_debug_state(vcpu); 229 kvmppc_core_load_host_debugstate(vcpu);
329 230
330 /* Don't leave guest TLB entries resident when being de-scheduled. */ 231 /* Don't leave guest TLB entries resident when being de-scheduled. */
331 /* XXX It would be nice to differentiate between heavyweight exit and 232 /* XXX It would be nice to differentiate between heavyweight exit and
332 * sched_out here, since we could avoid the TLB flush for heavyweight 233 * sched_out here, since we could avoid the TLB flush for heavyweight
333 * exits. */ 234 * exits. */
334 _tlbil_all(); 235 _tlbil_all();
236 kvmppc_core_vcpu_put(vcpu);
335} 237}
336 238
337int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 239int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -355,14 +257,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
355static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 257static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
356 struct kvm_run *run) 258 struct kvm_run *run)
357{ 259{
358 u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 260 ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
359 *gpr = run->dcr.data; 261 *gpr = run->dcr.data;
360} 262}
361 263
362static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 264static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
363 struct kvm_run *run) 265 struct kvm_run *run)
364{ 266{
365 u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 267 ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
366 268
367 if (run->mmio.len > sizeof(*gpr)) { 269 if (run->mmio.len > sizeof(*gpr)) {
368 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); 270 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -460,7 +362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
460 vcpu->arch.dcr_needed = 0; 362 vcpu->arch.dcr_needed = 0;
461 } 363 }
462 364
463 kvmppc_check_and_deliver_interrupts(vcpu); 365 kvmppc_core_deliver_interrupts(vcpu);
464 366
465 local_irq_disable(); 367 local_irq_disable();
466 kvm_guest_enter(); 368 kvm_guest_enter();
@@ -478,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
478 380
479int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 381int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
480{ 382{
481 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); 383 kvmppc_core_queue_external(vcpu, irq);
482 384
483 if (waitqueue_active(&vcpu->wq)) { 385 if (waitqueue_active(&vcpu->wq)) {
484 wake_up_interruptible(&vcpu->wq); 386 wake_up_interruptible(&vcpu->wq);
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
new file mode 100644
index 000000000000..47ee603f558e
--- /dev/null
+++ b/arch/powerpc/kvm/timing.c
@@ -0,0 +1,239 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19 */
20
21#include <linux/kvm_host.h>
22#include <linux/fs.h>
23#include <linux/seq_file.h>
24#include <linux/debugfs.h>
25#include <linux/uaccess.h>
26
27#include <asm/time.h>
28#include <asm-generic/div64.h>
29
30#include "timing.h"
31
32void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
33{
34 int i;
35
36 /* pause guest execution to avoid concurrent updates */
37 local_irq_disable();
38 mutex_lock(&vcpu->mutex);
39
40 vcpu->arch.last_exit_type = 0xDEAD;
41 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
42 vcpu->arch.timing_count_type[i] = 0;
43 vcpu->arch.timing_max_duration[i] = 0;
44 vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
45 vcpu->arch.timing_sum_duration[i] = 0;
46 vcpu->arch.timing_sum_quad_duration[i] = 0;
47 }
48 vcpu->arch.timing_last_exit = 0;
49 vcpu->arch.timing_exit.tv64 = 0;
50 vcpu->arch.timing_last_enter.tv64 = 0;
51
52 mutex_unlock(&vcpu->mutex);
53 local_irq_enable();
54}
55
56static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
57{
58 u64 old;
59
60 do_div(duration, tb_ticks_per_usec);
61 if (unlikely(duration > 0xFFFFFFFF)) {
62 printk(KERN_ERR"%s - duration too big -> overflow"
63 " duration %lld type %d exit #%d\n",
64 __func__, duration, type,
65 vcpu->arch.timing_count_type[type]);
66 return;
67 }
68
69 vcpu->arch.timing_count_type[type]++;
70
71 /* sum */
72 old = vcpu->arch.timing_sum_duration[type];
73 vcpu->arch.timing_sum_duration[type] += duration;
74 if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
75 printk(KERN_ERR"%s - wrap adding sum of durations"
76 " old %lld new %lld type %d exit # of type %d\n",
77 __func__, old, vcpu->arch.timing_sum_duration[type],
78 type, vcpu->arch.timing_count_type[type]);
79 }
80
81 /* square sum */
82 old = vcpu->arch.timing_sum_quad_duration[type];
83 vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
84 if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
85 printk(KERN_ERR"%s - wrap adding sum of squared durations"
86 " old %lld new %lld type %d exit # of type %d\n",
87 __func__, old,
88 vcpu->arch.timing_sum_quad_duration[type],
89 type, vcpu->arch.timing_count_type[type]);
90 }
91
92 /* set min/max */
93 if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
94 vcpu->arch.timing_min_duration[type] = duration;
95 if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
96 vcpu->arch.timing_max_duration[type] = duration;
97}
98
99void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
100{
101 u64 exit = vcpu->arch.timing_last_exit;
102 u64 enter = vcpu->arch.timing_last_enter.tv64;
103
104 /* save exit time, used next exit when the reenter time is known */
105 vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
106
107 if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
108 return; /* skip incomplete cycle (e.g. after reset) */
109
110 /* update statistics for average and standard deviation */
111 add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
112 /* enter -> timing_last_exit is time spent in guest - log this too */
113 add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
114 TIMEINGUEST);
115}
116
117static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
118 [MMIO_EXITS] = "MMIO",
119 [DCR_EXITS] = "DCR",
120 [SIGNAL_EXITS] = "SIGNAL",
121 [ITLB_REAL_MISS_EXITS] = "ITLBREAL",
122 [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT",
123 [DTLB_REAL_MISS_EXITS] = "DTLBREAL",
124 [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT",
125 [SYSCALL_EXITS] = "SYSCALL",
126 [ISI_EXITS] = "ISI",
127 [DSI_EXITS] = "DSI",
128 [EMULATED_INST_EXITS] = "EMULINST",
129 [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT",
130 [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE",
131 [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR",
132 [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR",
133 [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR",
134 [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR",
135 [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX",
136 [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE",
137 [EMULATED_RFI_EXITS] = "EMUL_RFI",
138 [DEC_EXITS] = "DEC",
139 [EXT_INTR_EXITS] = "EXTINT",
140 [HALT_WAKEUP] = "HALT",
141 [USR_PR_INST] = "USR_PR_INST",
142 [FP_UNAVAIL] = "FP_UNAVAIL",
143 [DEBUG_EXITS] = "DEBUG",
144 [TIMEINGUEST] = "TIMEINGUEST"
145};
146
147static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
148{
149 struct kvm_vcpu *vcpu = m->private;
150 int i;
151
152 seq_printf(m, "%s", "type count min max sum sum_squared\n");
153
154 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
155 seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
156 kvm_exit_names[i],
157 vcpu->arch.timing_count_type[i],
158 vcpu->arch.timing_min_duration[i],
159 vcpu->arch.timing_max_duration[i],
160 vcpu->arch.timing_sum_duration[i],
161 vcpu->arch.timing_sum_quad_duration[i]);
162 }
163 return 0;
164}
165
166/* Write 'c' to clear the timing statistics. */
167static ssize_t kvmppc_exit_timing_write(struct file *file,
168 const char __user *user_buf,
169 size_t count, loff_t *ppos)
170{
171 int err = -EINVAL;
172 char c;
173
174 if (count > 1) {
175 goto done;
176 }
177
178 if (get_user(c, user_buf)) {
179 err = -EFAULT;
180 goto done;
181 }
182
183 if (c == 'c') {
184 struct seq_file *seqf = (struct seq_file *)file->private_data;
185 struct kvm_vcpu *vcpu = seqf->private;
186 /* Write does not affect our buffers previously generated with
187 * show. seq_file is locked here to prevent races of init with
188 * a show call */
189 mutex_lock(&seqf->lock);
190 kvmppc_init_timing_stats(vcpu);
191 mutex_unlock(&seqf->lock);
192 err = count;
193 }
194
195done:
196 return err;
197}
198
199static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
200{
201 return single_open(file, kvmppc_exit_timing_show, inode->i_private);
202}
203
204static struct file_operations kvmppc_exit_timing_fops = {
205 .owner = THIS_MODULE,
206 .open = kvmppc_exit_timing_open,
207 .read = seq_read,
208 .write = kvmppc_exit_timing_write,
209 .llseek = seq_lseek,
210 .release = single_release,
211};
212
213void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
214{
215 static char dbg_fname[50];
216 struct dentry *debugfs_file;
217
218 snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
219 current->pid, id);
220 debugfs_file = debugfs_create_file(dbg_fname, 0666,
221 kvm_debugfs_dir, vcpu,
222 &kvmppc_exit_timing_fops);
223
224 if (!debugfs_file) {
225 printk(KERN_ERR"%s: error creating debugfs file %s\n",
226 __func__, dbg_fname);
227 return;
228 }
229
230 vcpu->arch.debugfs_exit_timing = debugfs_file;
231}
232
233void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
234{
235 if (vcpu->arch.debugfs_exit_timing) {
236 debugfs_remove(vcpu->arch.debugfs_exit_timing);
237 vcpu->arch.debugfs_exit_timing = NULL;
238 }
239}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
new file mode 100644
index 000000000000..bb13b1f3cd5a
--- /dev/null
+++ b/arch/powerpc/kvm/timing.h
@@ -0,0 +1,102 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
18 */
19
20#ifndef __POWERPC_KVM_EXITTIMING_H__
21#define __POWERPC_KVM_EXITTIMING_H__
22
23#include <linux/kvm_host.h>
24#include <asm/kvm_host.h>
25
26#ifdef CONFIG_KVM_EXIT_TIMING
27void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
28void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
29void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
30void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
31
32static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
33{
34 vcpu->arch.last_exit_type = type;
35}
36
37#else
38/* if exit timing is not configured there is no need to build the c file */
39static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
40static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
41static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
42 unsigned int id) {}
43static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
44static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
45#endif /* CONFIG_KVM_EXIT_TIMING */
46
47/* account the exit in kvm_stats */
48static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
49{
50 /* type has to be known at build time for optimization */
51 BUILD_BUG_ON(__builtin_constant_p(type));
52 switch (type) {
53 case EXT_INTR_EXITS:
54 vcpu->stat.ext_intr_exits++;
55 break;
56 case DEC_EXITS:
57 vcpu->stat.dec_exits++;
58 break;
59 case EMULATED_INST_EXITS:
60 vcpu->stat.emulated_inst_exits++;
61 break;
62 case DCR_EXITS:
63 vcpu->stat.dcr_exits++;
64 break;
65 case DSI_EXITS:
66 vcpu->stat.dsi_exits++;
67 break;
68 case ISI_EXITS:
69 vcpu->stat.isi_exits++;
70 break;
71 case SYSCALL_EXITS:
72 vcpu->stat.syscall_exits++;
73 break;
74 case DTLB_REAL_MISS_EXITS:
75 vcpu->stat.dtlb_real_miss_exits++;
76 break;
77 case DTLB_VIRT_MISS_EXITS:
78 vcpu->stat.dtlb_virt_miss_exits++;
79 break;
80 case MMIO_EXITS:
81 vcpu->stat.mmio_exits++;
82 break;
83 case ITLB_REAL_MISS_EXITS:
84 vcpu->stat.itlb_real_miss_exits++;
85 break;
86 case ITLB_VIRT_MISS_EXITS:
87 vcpu->stat.itlb_virt_miss_exits++;
88 break;
89 case SIGNAL_EXITS:
90 vcpu->stat.signal_exits++;
91 break;
92 }
93}
94
95/* wrapper to set exit time and account for it in kvm_stats */
96static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
97{
98 kvmppc_set_exit_type(vcpu, type);
99 kvmppc_account_exit_stat(vcpu, type);
100}
101
102#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 2949126d28d1..6b793aeda72e 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -297,7 +297,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
297{ 297{
298 unsigned long cookie; 298 unsigned long cookie;
299 299
300 if (path->dentry->d_cookie) 300 if (path->dentry->d_flags & DCACHE_COOKIE)
301 return (unsigned long)path->dentry; 301 return (unsigned long)path->dentry;
302 get_dcookie(path, &cookie); 302 get_dcookie(path, &cookie);
303 return cookie; 303 return cookie;
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index e5a6a9ba3adf..d60a2eefb17b 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -14,7 +14,6 @@
14 14
15struct s390_idle_data { 15struct s390_idle_data {
16 spinlock_t lock; 16 spinlock_t lock;
17 unsigned int in_idle;
18 unsigned long long idle_count; 17 unsigned long long idle_count;
19 unsigned long long idle_enter; 18 unsigned long long idle_enter;
20 unsigned long long idle_time; 19 unsigned long long idle_time;
@@ -22,12 +21,12 @@ struct s390_idle_data {
22 21
23DECLARE_PER_CPU(struct s390_idle_data, s390_idle); 22DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
24 23
25void s390_idle_leave(void); 24void vtime_start_cpu(void);
26 25
27static inline void s390_idle_check(void) 26static inline void s390_idle_check(void)
28{ 27{
29 if ((&__get_cpu_var(s390_idle))->in_idle) 28 if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
30 s390_idle_leave(); 29 vtime_start_cpu();
31} 30}
32 31
33#endif /* _ASM_S390_CPU_H_ */ 32#endif /* _ASM_S390_CPU_H_ */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 133ce054fc89..521726430afa 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -11,7 +11,7 @@
11 11
12#include <asm/div64.h> 12#include <asm/div64.h>
13 13
14/* We want to use micro-second resolution. */ 14/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
15 15
16typedef unsigned long long cputime_t; 16typedef unsigned long long cputime_t;
17typedef unsigned long long cputime64_t; 17typedef unsigned long long cputime64_t;
@@ -53,9 +53,9 @@ __div(unsigned long long n, unsigned int base)
53#define cputime_ge(__a, __b) ((__a) >= (__b)) 53#define cputime_ge(__a, __b) ((__a) >= (__b))
54#define cputime_lt(__a, __b) ((__a) < (__b)) 54#define cputime_lt(__a, __b) ((__a) < (__b))
55#define cputime_le(__a, __b) ((__a) <= (__b)) 55#define cputime_le(__a, __b) ((__a) <= (__b))
56#define cputime_to_jiffies(__ct) (__div((__ct), 1000000 / HZ)) 56#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ))
57#define cputime_to_scaled(__ct) (__ct) 57#define cputime_to_scaled(__ct) (__ct)
58#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (1000000 / HZ)) 58#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ))
59 59
60#define cputime64_zero (0ULL) 60#define cputime64_zero (0ULL)
61#define cputime64_add(__a, __b) ((__a) + (__b)) 61#define cputime64_add(__a, __b) ((__a) + (__b))
@@ -64,7 +64,7 @@ __div(unsigned long long n, unsigned int base)
64static inline u64 64static inline u64
65cputime64_to_jiffies64(cputime64_t cputime) 65cputime64_to_jiffies64(cputime64_t cputime)
66{ 66{
67 do_div(cputime, 1000000 / HZ); 67 do_div(cputime, 4096000000ULL / HZ);
68 return cputime; 68 return cputime;
69} 69}
70 70
@@ -74,13 +74,13 @@ cputime64_to_jiffies64(cputime64_t cputime)
74static inline unsigned int 74static inline unsigned int
75cputime_to_msecs(const cputime_t cputime) 75cputime_to_msecs(const cputime_t cputime)
76{ 76{
77 return __div(cputime, 1000); 77 return __div(cputime, 4096000);
78} 78}
79 79
80static inline cputime_t 80static inline cputime_t
81msecs_to_cputime(const unsigned int m) 81msecs_to_cputime(const unsigned int m)
82{ 82{
83 return (cputime_t) m * 1000; 83 return (cputime_t) m * 4096000;
84} 84}
85 85
86/* 86/*
@@ -89,13 +89,13 @@ msecs_to_cputime(const unsigned int m)
89static inline unsigned int 89static inline unsigned int
90cputime_to_secs(const cputime_t cputime) 90cputime_to_secs(const cputime_t cputime)
91{ 91{
92 return __div(cputime, 1000000); 92 return __div(cputime, 2048000000) >> 1;
93} 93}
94 94
95static inline cputime_t 95static inline cputime_t
96secs_to_cputime(const unsigned int s) 96secs_to_cputime(const unsigned int s)
97{ 97{
98 return (cputime_t) s * 1000000; 98 return (cputime_t) s * 4096000000ULL;
99} 99}
100 100
101/* 101/*
@@ -104,7 +104,7 @@ secs_to_cputime(const unsigned int s)
104static inline cputime_t 104static inline cputime_t
105timespec_to_cputime(const struct timespec *value) 105timespec_to_cputime(const struct timespec *value)
106{ 106{
107 return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000; 107 return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
108} 108}
109 109
110static inline void 110static inline void
@@ -114,12 +114,12 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
114 register_pair rp; 114 register_pair rp;
115 115
116 rp.pair = cputime >> 1; 116 rp.pair = cputime >> 1;
117 asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); 117 asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
118 value->tv_nsec = rp.subreg.even * 1000; 118 value->tv_nsec = rp.subreg.even * 1000 / 4096;
119 value->tv_sec = rp.subreg.odd; 119 value->tv_sec = rp.subreg.odd;
120#else 120#else
121 value->tv_nsec = (cputime % 1000000) * 1000; 121 value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
122 value->tv_sec = cputime / 1000000; 122 value->tv_sec = cputime / 4096000000ULL;
123#endif 123#endif
124} 124}
125 125
@@ -131,7 +131,7 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
131static inline cputime_t 131static inline cputime_t
132timeval_to_cputime(const struct timeval *value) 132timeval_to_cputime(const struct timeval *value)
133{ 133{
134 return value->tv_usec + (u64) value->tv_sec * 1000000; 134 return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
135} 135}
136 136
137static inline void 137static inline void
@@ -141,12 +141,12 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
141 register_pair rp; 141 register_pair rp;
142 142
143 rp.pair = cputime >> 1; 143 rp.pair = cputime >> 1;
144 asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); 144 asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
145 value->tv_usec = rp.subreg.even; 145 value->tv_usec = rp.subreg.even / 4096;
146 value->tv_sec = rp.subreg.odd; 146 value->tv_sec = rp.subreg.odd;
147#else 147#else
148 value->tv_usec = cputime % 1000000; 148 value->tv_usec = cputime % 4096000000ULL;
149 value->tv_sec = cputime / 1000000; 149 value->tv_sec = cputime / 4096000000ULL;
150#endif 150#endif
151} 151}
152 152
@@ -156,13 +156,13 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
156static inline clock_t 156static inline clock_t
157cputime_to_clock_t(cputime_t cputime) 157cputime_to_clock_t(cputime_t cputime)
158{ 158{
159 return __div(cputime, 1000000 / USER_HZ); 159 return __div(cputime, 4096000000ULL / USER_HZ);
160} 160}
161 161
162static inline cputime_t 162static inline cputime_t
163clock_t_to_cputime(unsigned long x) 163clock_t_to_cputime(unsigned long x)
164{ 164{
165 return (cputime_t) x * (1000000 / USER_HZ); 165 return (cputime_t) x * (4096000000ULL / USER_HZ);
166} 166}
167 167
168/* 168/*
@@ -171,7 +171,7 @@ clock_t_to_cputime(unsigned long x)
171static inline clock_t 171static inline clock_t
172cputime64_to_clock_t(cputime64_t cputime) 172cputime64_to_clock_t(cputime64_t cputime)
173{ 173{
174 return __div(cputime, 1000000 / USER_HZ); 174 return __div(cputime, 4096000000ULL / USER_HZ);
175} 175}
176 176
177#endif /* _S390_CPUTIME_H */ 177#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 0bc51d52a899..ffdef5fe8587 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -67,11 +67,11 @@
67#define __LC_SYNC_ENTER_TIMER 0x248 67#define __LC_SYNC_ENTER_TIMER 0x248
68#define __LC_ASYNC_ENTER_TIMER 0x250 68#define __LC_ASYNC_ENTER_TIMER 0x250
69#define __LC_EXIT_TIMER 0x258 69#define __LC_EXIT_TIMER 0x258
70#define __LC_LAST_UPDATE_TIMER 0x260 70#define __LC_USER_TIMER 0x260
71#define __LC_USER_TIMER 0x268 71#define __LC_SYSTEM_TIMER 0x268
72#define __LC_SYSTEM_TIMER 0x270 72#define __LC_STEAL_TIMER 0x270
73#define __LC_LAST_UPDATE_CLOCK 0x278 73#define __LC_LAST_UPDATE_TIMER 0x278
74#define __LC_STEAL_CLOCK 0x280 74#define __LC_LAST_UPDATE_CLOCK 0x280
75#define __LC_RETURN_MCCK_PSW 0x288 75#define __LC_RETURN_MCCK_PSW 0x288
76#define __LC_KERNEL_STACK 0xC40 76#define __LC_KERNEL_STACK 0xC40
77#define __LC_THREAD_INFO 0xC44 77#define __LC_THREAD_INFO 0xC44
@@ -89,11 +89,11 @@
89#define __LC_SYNC_ENTER_TIMER 0x250 89#define __LC_SYNC_ENTER_TIMER 0x250
90#define __LC_ASYNC_ENTER_TIMER 0x258 90#define __LC_ASYNC_ENTER_TIMER 0x258
91#define __LC_EXIT_TIMER 0x260 91#define __LC_EXIT_TIMER 0x260
92#define __LC_LAST_UPDATE_TIMER 0x268 92#define __LC_USER_TIMER 0x268
93#define __LC_USER_TIMER 0x270 93#define __LC_SYSTEM_TIMER 0x270
94#define __LC_SYSTEM_TIMER 0x278 94#define __LC_STEAL_TIMER 0x278
95#define __LC_LAST_UPDATE_CLOCK 0x280 95#define __LC_LAST_UPDATE_TIMER 0x280
96#define __LC_STEAL_CLOCK 0x288 96#define __LC_LAST_UPDATE_CLOCK 0x288
97#define __LC_RETURN_MCCK_PSW 0x290 97#define __LC_RETURN_MCCK_PSW 0x290
98#define __LC_KERNEL_STACK 0xD40 98#define __LC_KERNEL_STACK 0xD40
99#define __LC_THREAD_INFO 0xD48 99#define __LC_THREAD_INFO 0xD48
@@ -106,8 +106,10 @@
106#define __LC_IPLDEV 0xDB8 106#define __LC_IPLDEV 0xDB8
107#define __LC_CURRENT 0xDD8 107#define __LC_CURRENT 0xDD8
108#define __LC_INT_CLOCK 0xDE8 108#define __LC_INT_CLOCK 0xDE8
109#define __LC_VDSO_PER_CPU 0xE38
109#endif /* __s390x__ */ 110#endif /* __s390x__ */
110 111
112#define __LC_PASTE 0xE40
111 113
112#define __LC_PANIC_MAGIC 0xE00 114#define __LC_PANIC_MAGIC 0xE00
113#ifndef __s390x__ 115#ifndef __s390x__
@@ -252,11 +254,11 @@ struct _lowcore
252 __u64 sync_enter_timer; /* 0x248 */ 254 __u64 sync_enter_timer; /* 0x248 */
253 __u64 async_enter_timer; /* 0x250 */ 255 __u64 async_enter_timer; /* 0x250 */
254 __u64 exit_timer; /* 0x258 */ 256 __u64 exit_timer; /* 0x258 */
255 __u64 last_update_timer; /* 0x260 */ 257 __u64 user_timer; /* 0x260 */
256 __u64 user_timer; /* 0x268 */ 258 __u64 system_timer; /* 0x268 */
257 __u64 system_timer; /* 0x270 */ 259 __u64 steal_timer; /* 0x270 */
258 __u64 last_update_clock; /* 0x278 */ 260 __u64 last_update_timer; /* 0x278 */
259 __u64 steal_clock; /* 0x280 */ 261 __u64 last_update_clock; /* 0x280 */
260 psw_t return_mcck_psw; /* 0x288 */ 262 psw_t return_mcck_psw; /* 0x288 */
261 __u8 pad8[0xc00-0x290]; /* 0x290 */ 263 __u8 pad8[0xc00-0x290]; /* 0x290 */
262 264
@@ -343,11 +345,11 @@ struct _lowcore
343 __u64 sync_enter_timer; /* 0x250 */ 345 __u64 sync_enter_timer; /* 0x250 */
344 __u64 async_enter_timer; /* 0x258 */ 346 __u64 async_enter_timer; /* 0x258 */
345 __u64 exit_timer; /* 0x260 */ 347 __u64 exit_timer; /* 0x260 */
346 __u64 last_update_timer; /* 0x268 */ 348 __u64 user_timer; /* 0x268 */
347 __u64 user_timer; /* 0x270 */ 349 __u64 system_timer; /* 0x270 */
348 __u64 system_timer; /* 0x278 */ 350 __u64 steal_timer; /* 0x278 */
349 __u64 last_update_clock; /* 0x280 */ 351 __u64 last_update_timer; /* 0x280 */
350 __u64 steal_clock; /* 0x288 */ 352 __u64 last_update_clock; /* 0x288 */
351 psw_t return_mcck_psw; /* 0x290 */ 353 psw_t return_mcck_psw; /* 0x290 */
352 __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */ 354 __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */
353 /* System info area */ 355 /* System info area */
@@ -381,7 +383,12 @@ struct _lowcore
381 /* whether the kernel died with panic() or not */ 383 /* whether the kernel died with panic() or not */
382 __u32 panic_magic; /* 0xe00 */ 384 __u32 panic_magic; /* 0xe00 */
383 385
384 __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ 386 /* Per cpu primary space access list */
387 __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */
388 __u32 vdso_per_cpu_data; /* 0xe3c */
389 __u32 paste[16]; /* 0xe40 */
390
391 __u8 pad13[0x11b8-0xe80]; /* 0xe80 */
385 392
386 /* 64 bit extparam used for pfault, diag 250 etc */ 393 /* 64 bit extparam used for pfault, diag 250 etc */
387 __u64 ext_params2; /* 0x11B8 */ 394 __u64 ext_params2; /* 0x11B8 */
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 024ef42ed6d7..3a8b26eb1f2e 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -99,7 +99,7 @@ static inline void restore_access_regs(unsigned int *acrs)
99 prev = __switch_to(prev,next); \ 99 prev = __switch_to(prev,next); \
100} while (0) 100} while (0)
101 101
102extern void account_vtime(struct task_struct *); 102extern void account_vtime(struct task_struct *, struct task_struct *);
103extern void account_tick_vtime(struct task_struct *); 103extern void account_tick_vtime(struct task_struct *);
104extern void account_system_vtime(struct task_struct *); 104extern void account_system_vtime(struct task_struct *);
105 105
@@ -121,7 +121,7 @@ static inline void cmma_init(void) { }
121 121
122#define finish_arch_switch(prev) do { \ 122#define finish_arch_switch(prev) do { \
123 set_fs(current->thread.mm_segment); \ 123 set_fs(current->thread.mm_segment); \
124 account_vtime(prev); \ 124 account_vtime(prev, current); \
125} while (0) 125} while (0)
126 126
127#define nop() asm volatile("nop") 127#define nop() asm volatile("nop")
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c1eaf9604da7..c544aa524535 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -47,6 +47,8 @@ struct thread_info {
47 unsigned int cpu; /* current CPU */ 47 unsigned int cpu; /* current CPU */
48 int preempt_count; /* 0 => preemptable, <0 => BUG */ 48 int preempt_count; /* 0 => preemptable, <0 => BUG */
49 struct restart_block restart_block; 49 struct restart_block restart_block;
50 __u64 user_timer;
51 __u64 system_timer;
50}; 52};
51 53
52/* 54/*
diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h
index 61705d60f995..e4bcab739c19 100644
--- a/arch/s390/include/asm/timer.h
+++ b/arch/s390/include/asm/timer.h
@@ -23,20 +23,18 @@ struct vtimer_list {
23 __u64 expires; 23 __u64 expires;
24 __u64 interval; 24 __u64 interval;
25 25
26 spinlock_t lock;
27 unsigned long magic;
28
29 void (*function)(unsigned long); 26 void (*function)(unsigned long);
30 unsigned long data; 27 unsigned long data;
31}; 28};
32 29
33/* the offset value will wrap after ca. 71 years */ 30/* the vtimer value will wrap after ca. 71 years */
34struct vtimer_queue { 31struct vtimer_queue {
35 struct list_head list; 32 struct list_head list;
36 spinlock_t lock; 33 spinlock_t lock;
37 __u64 to_expire; /* current event expire time */ 34 __u64 timer; /* last programmed timer */
38 __u64 offset; /* list offset to zero */ 35 __u64 elapsed; /* elapsed time of timer expire values */
39 __u64 idle; /* temp var for idle */ 36 __u64 idle; /* temp var for idle */
37 int do_spt; /* =1: reprogram cpu timer in idle */
40}; 38};
41 39
42extern void init_virt_timer(struct vtimer_list *timer); 40extern void init_virt_timer(struct vtimer_list *timer);
@@ -48,8 +46,8 @@ extern int del_virt_timer(struct vtimer_list *timer);
48extern void init_cpu_vtimer(void); 46extern void init_cpu_vtimer(void);
49extern void vtime_init(void); 47extern void vtime_init(void);
50 48
51extern void vtime_start_cpu_timer(void); 49extern void vtime_stop_cpu(void);
52extern void vtime_stop_cpu_timer(void); 50extern void vtime_start_leave(void);
53 51
54#endif /* __KERNEL__ */ 52#endif /* __KERNEL__ */
55 53
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index a44f4fe16a35..7bdd7c8ebc91 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -12,9 +12,9 @@
12#ifndef __ASSEMBLY__ 12#ifndef __ASSEMBLY__
13 13
14/* 14/*
15 * Note about this structure: 15 * Note about the vdso_data and vdso_per_cpu_data structures:
16 * 16 *
17 * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this 17 * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
18 * structure is supposed to be known only to the function in the vdso 18 * structure is supposed to be known only to the function in the vdso
19 * itself and may change without notice. 19 * itself and may change without notice.
20 */ 20 */
@@ -28,10 +28,21 @@ struct vdso_data {
28 __u64 wtom_clock_nsec; /* 0x28 */ 28 __u64 wtom_clock_nsec; /* 0x28 */
29 __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ 29 __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */
30 __u32 tz_dsttime; /* Type of dst correction 0x34 */ 30 __u32 tz_dsttime; /* Type of dst correction 0x34 */
31 __u32 ectg_available;
32};
33
34struct vdso_per_cpu_data {
35 __u64 ectg_timer_base;
36 __u64 ectg_user_time;
31}; 37};
32 38
33extern struct vdso_data *vdso_data; 39extern struct vdso_data *vdso_data;
34 40
41#ifdef CONFIG_64BIT
42int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore);
43void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore);
44#endif
45
35#endif /* __ASSEMBLY__ */ 46#endif /* __ASSEMBLY__ */
36 47
37#endif /* __KERNEL__ */ 48#endif /* __KERNEL__ */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e641f60bac99..67a60016babb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -48,6 +48,11 @@ int main(void)
48 DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); 48 DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
49 DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); 49 DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
50 DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); 50 DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
51 DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
52 DEFINE(__VDSO_ECTG_BASE,
53 offsetof(struct vdso_per_cpu_data, ectg_timer_base));
54 DEFINE(__VDSO_ECTG_USER,
55 offsetof(struct vdso_per_cpu_data, ectg_user_time));
51 /* constants used by the vdso */ 56 /* constants used by the vdso */
52 DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); 57 DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
53 DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); 58 DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 55de521aef77..1268aa2991bf 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -583,8 +583,8 @@ kernel_per:
583 583
584 .globl io_int_handler 584 .globl io_int_handler
585io_int_handler: 585io_int_handler:
586 stpt __LC_ASYNC_ENTER_TIMER
587 stck __LC_INT_CLOCK 586 stck __LC_INT_CLOCK
587 stpt __LC_ASYNC_ENTER_TIMER
588 SAVE_ALL_BASE __LC_SAVE_AREA+16 588 SAVE_ALL_BASE __LC_SAVE_AREA+16
589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
@@ -723,8 +723,8 @@ io_notify_resume:
723 723
724 .globl ext_int_handler 724 .globl ext_int_handler
725ext_int_handler: 725ext_int_handler:
726 stpt __LC_ASYNC_ENTER_TIMER
727 stck __LC_INT_CLOCK 726 stck __LC_INT_CLOCK
727 stpt __LC_ASYNC_ENTER_TIMER
728 SAVE_ALL_BASE __LC_SAVE_AREA+16 728 SAVE_ALL_BASE __LC_SAVE_AREA+16
729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
@@ -750,6 +750,7 @@ __critical_end:
750 750
751 .globl mcck_int_handler 751 .globl mcck_int_handler
752mcck_int_handler: 752mcck_int_handler:
753 stck __LC_INT_CLOCK
753 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer 754 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer
754 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs 755 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs
755 SAVE_ALL_BASE __LC_SAVE_AREA+32 756 SAVE_ALL_BASE __LC_SAVE_AREA+32
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 16bb4fd1a403..c6fbde13971a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
177 .if !\sync 177 .if !\sync
178 ni \psworg+1,0xfd # clear wait state bit 178 ni \psworg+1,0xfd # clear wait state bit
179 .endif 179 .endif
180 lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user 180 lg %r14,__LC_VDSO_PER_CPU
181 lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user
181 stpt __LC_EXIT_TIMER 182 stpt __LC_EXIT_TIMER
183 mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
184 lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user
182 lpswe \psworg # back to caller 185 lpswe \psworg # back to caller
183 .endm 186 .endm
184 187
@@ -559,8 +562,8 @@ kernel_per:
559 */ 562 */
560 .globl io_int_handler 563 .globl io_int_handler
561io_int_handler: 564io_int_handler:
562 stpt __LC_ASYNC_ENTER_TIMER
563 stck __LC_INT_CLOCK 565 stck __LC_INT_CLOCK
566 stpt __LC_ASYNC_ENTER_TIMER
564 SAVE_ALL_BASE __LC_SAVE_AREA+32 567 SAVE_ALL_BASE __LC_SAVE_AREA+32
565 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 568 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
566 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 569 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
@@ -721,8 +724,8 @@ io_notify_resume:
721 */ 724 */
722 .globl ext_int_handler 725 .globl ext_int_handler
723ext_int_handler: 726ext_int_handler:
724 stpt __LC_ASYNC_ENTER_TIMER
725 stck __LC_INT_CLOCK 727 stck __LC_INT_CLOCK
728 stpt __LC_ASYNC_ENTER_TIMER
726 SAVE_ALL_BASE __LC_SAVE_AREA+32 729 SAVE_ALL_BASE __LC_SAVE_AREA+32
727 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 730 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
728 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 731 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
@@ -746,6 +749,7 @@ __critical_end:
746 */ 749 */
747 .globl mcck_int_handler 750 .globl mcck_int_handler
748mcck_int_handler: 751mcck_int_handler:
752 stck __LC_INT_CLOCK
749 la %r1,4095 # revalidate r1 753 la %r1,4095 # revalidate r1
750 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer 754 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
751 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs 755 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
@@ -979,23 +983,23 @@ cleanup_sysc_return:
979 983
980cleanup_sysc_leave: 984cleanup_sysc_leave:
981 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) 985 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn)
982 je 2f 986 je 3f
983 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
984 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) 987 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
985 je 2f 988 jhe 0f
986 mvc __LC_RETURN_PSW(16),SP_PSW(%r15) 989 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
9900: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
987 cghi %r12,__LC_MCK_OLD_PSW 991 cghi %r12,__LC_MCK_OLD_PSW
988 jne 0f 992 jne 1f
989 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) 993 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
990 j 1f 994 j 2f
9910: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) 9951: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
9921: lmg %r0,%r11,SP_R0(%r15) 9962: lmg %r0,%r11,SP_R0(%r15)
993 lg %r15,SP_R15(%r15) 997 lg %r15,SP_R15(%r15)
9942: la %r12,__LC_RETURN_PSW 9983: la %r12,__LC_RETURN_PSW
995 br %r14 999 br %r14
996cleanup_sysc_leave_insn: 1000cleanup_sysc_leave_insn:
997 .quad sysc_done - 4 1001 .quad sysc_done - 4
998 .quad sysc_done - 8 1002 .quad sysc_done - 16
999 1003
1000cleanup_io_return: 1004cleanup_io_return:
1001 mvc __LC_RETURN_PSW(8),0(%r12) 1005 mvc __LC_RETURN_PSW(8),0(%r12)
@@ -1005,23 +1009,23 @@ cleanup_io_return:
1005 1009
1006cleanup_io_leave: 1010cleanup_io_leave:
1007 clc 8(8,%r12),BASED(cleanup_io_leave_insn) 1011 clc 8(8,%r12),BASED(cleanup_io_leave_insn)
1008 je 2f 1012 je 3f
1009 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
1010 clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) 1013 clc 8(8,%r12),BASED(cleanup_io_leave_insn+8)
1011 je 2f 1014 jhe 0f
1012 mvc __LC_RETURN_PSW(16),SP_PSW(%r15) 1015 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
10160: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
1013 cghi %r12,__LC_MCK_OLD_PSW 1017 cghi %r12,__LC_MCK_OLD_PSW
1014 jne 0f 1018 jne 1f
1015 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) 1019 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
1016 j 1f 1020 j 2f
10170: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) 10211: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
10181: lmg %r0,%r11,SP_R0(%r15) 10222: lmg %r0,%r11,SP_R0(%r15)
1019 lg %r15,SP_R15(%r15) 1023 lg %r15,SP_R15(%r15)
10202: la %r12,__LC_RETURN_PSW 10243: la %r12,__LC_RETURN_PSW
1021 br %r14 1025 br %r14
1022cleanup_io_leave_insn: 1026cleanup_io_leave_insn:
1023 .quad io_done - 4 1027 .quad io_done - 4
1024 .quad io_done - 8 1028 .quad io_done - 16
1025 1029
1026/* 1030/*
1027 * Integer constants 1031 * Integer constants
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 3ccd36b24b8f..f9f70aa15244 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -87,6 +87,8 @@ startup_continue:
87 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area 87 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
88 # move IPL device to lowcore 88 # move IPL device to lowcore
89 mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) 89 mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
90 lghi %r0,__LC_PASTE
91 stg %r0,__LC_VDSO_PER_CPU
90# 92#
91# Setup stack 93# Setup stack
92# 94#
diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c
index e80716843619..7db95c0b8693 100644
--- a/arch/s390/kernel/init_task.c
+++ b/arch/s390/kernel/init_task.c
@@ -16,7 +16,6 @@
16#include <asm/uaccess.h> 16#include <asm/uaccess.h>
17#include <asm/pgtable.h> 17#include <asm/pgtable.h>
18 18
19static struct fs_struct init_fs = INIT_FS;
20static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 19static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
21static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 20static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
22struct mm_struct init_mm = INIT_MM(init_mm); 21struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 04f8c67a6101..b6110bdf8dc2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -38,6 +38,7 @@
38#include <linux/utsname.h> 38#include <linux/utsname.h>
39#include <linux/tick.h> 39#include <linux/tick.h>
40#include <linux/elfcore.h> 40#include <linux/elfcore.h>
41#include <linux/kernel_stat.h>
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
43#include <asm/system.h> 44#include <asm/system.h>
@@ -45,7 +46,6 @@
45#include <asm/processor.h> 46#include <asm/processor.h>
46#include <asm/irq.h> 47#include <asm/irq.h>
47#include <asm/timer.h> 48#include <asm/timer.h>
48#include <asm/cpu.h>
49#include "entry.h" 49#include "entry.h"
50 50
51asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); 51asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -75,36 +75,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
75 return sf->gprs[8]; 75 return sf->gprs[8];
76} 76}
77 77
78DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
79 .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
80};
81
82static int s390_idle_enter(void)
83{
84 struct s390_idle_data *idle;
85
86 idle = &__get_cpu_var(s390_idle);
87 spin_lock(&idle->lock);
88 idle->idle_count++;
89 idle->in_idle = 1;
90 idle->idle_enter = get_clock();
91 spin_unlock(&idle->lock);
92 vtime_stop_cpu_timer();
93 return NOTIFY_OK;
94}
95
96void s390_idle_leave(void)
97{
98 struct s390_idle_data *idle;
99
100 vtime_start_cpu_timer();
101 idle = &__get_cpu_var(s390_idle);
102 spin_lock(&idle->lock);
103 idle->idle_time += get_clock() - idle->idle_enter;
104 idle->in_idle = 0;
105 spin_unlock(&idle->lock);
106}
107
108extern void s390_handle_mcck(void); 78extern void s390_handle_mcck(void);
109/* 79/*
110 * The idle loop on a S390... 80 * The idle loop on a S390...
@@ -117,10 +87,6 @@ static void default_idle(void)
117 local_irq_enable(); 87 local_irq_enable();
118 return; 88 return;
119 } 89 }
120 if (s390_idle_enter() == NOTIFY_BAD) {
121 local_irq_enable();
122 return;
123 }
124#ifdef CONFIG_HOTPLUG_CPU 90#ifdef CONFIG_HOTPLUG_CPU
125 if (cpu_is_offline(smp_processor_id())) { 91 if (cpu_is_offline(smp_processor_id())) {
126 preempt_enable_no_resched(); 92 preempt_enable_no_resched();
@@ -130,7 +96,6 @@ static void default_idle(void)
130 local_mcck_disable(); 96 local_mcck_disable();
131 if (test_thread_flag(TIF_MCCK_PENDING)) { 97 if (test_thread_flag(TIF_MCCK_PENDING)) {
132 local_mcck_enable(); 98 local_mcck_enable();
133 s390_idle_leave();
134 local_irq_enable(); 99 local_irq_enable();
135 s390_handle_mcck(); 100 s390_handle_mcck();
136 return; 101 return;
@@ -138,9 +103,9 @@ static void default_idle(void)
138 trace_hardirqs_on(); 103 trace_hardirqs_on();
139 /* Don't trace preempt off for idle. */ 104 /* Don't trace preempt off for idle. */
140 stop_critical_timings(); 105 stop_critical_timings();
141 /* Wait for external, I/O or machine check interrupt. */ 106 /* Stop virtual timer and halt the cpu. */
142 __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | 107 vtime_stop_cpu();
143 PSW_MASK_IO | PSW_MASK_EXT); 108 /* Reenable preemption tracer. */
144 start_critical_timings(); 109 start_critical_timings();
145} 110}
146 111
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index e019b419efc6..a0d2d55d7fb3 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code)
119 struct pt_regs *old_regs; 119 struct pt_regs *old_regs;
120 120
121 old_regs = set_irq_regs(regs); 121 old_regs = set_irq_regs(regs);
122 irq_enter();
123 s390_idle_check(); 122 s390_idle_check();
123 irq_enter();
124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) 124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
125 /* Serve timer interrupts first. */ 125 /* Serve timer interrupts first. */
126 clock_comparator_work(); 126 clock_comparator_work();
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b7a1efd5522c..d825f4950e4e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -427,6 +427,8 @@ setup_lowcore(void)
427 /* enable extended save area */ 427 /* enable extended save area */
428 __ctl_set_bit(14, 29); 428 __ctl_set_bit(14, 29);
429 } 429 }
430#else
431 lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
430#endif 432#endif
431 set_prefix((u32)(unsigned long) lc); 433 set_prefix((u32)(unsigned long) lc);
432} 434}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3ed5c7a83c6c..9c0ccb532a45 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -47,6 +47,7 @@
47#include <asm/lowcore.h> 47#include <asm/lowcore.h>
48#include <asm/sclp.h> 48#include <asm/sclp.h>
49#include <asm/cpu.h> 49#include <asm/cpu.h>
50#include <asm/vdso.h>
50#include "entry.h" 51#include "entry.h"
51 52
52/* 53/*
@@ -500,6 +501,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
500 goto out; 501 goto out;
501 lowcore->extended_save_area_addr = (u32) save_area; 502 lowcore->extended_save_area_addr = (u32) save_area;
502 } 503 }
504#else
505 if (vdso_alloc_per_cpu(cpu, lowcore))
506 goto out;
503#endif 507#endif
504 lowcore_ptr[cpu] = lowcore; 508 lowcore_ptr[cpu] = lowcore;
505 return 0; 509 return 0;
@@ -522,6 +526,8 @@ static void smp_free_lowcore(int cpu)
522#ifndef CONFIG_64BIT 526#ifndef CONFIG_64BIT
523 if (MACHINE_HAS_IEEE) 527 if (MACHINE_HAS_IEEE)
524 free_page((unsigned long) lowcore->extended_save_area_addr); 528 free_page((unsigned long) lowcore->extended_save_area_addr);
529#else
530 vdso_free_per_cpu(cpu, lowcore);
525#endif 531#endif
526 free_page(lowcore->panic_stack - PAGE_SIZE); 532 free_page(lowcore->panic_stack - PAGE_SIZE);
527 free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); 533 free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
@@ -664,6 +670,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
664 lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); 670 lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
665 panic_stack = __get_free_page(GFP_KERNEL); 671 panic_stack = __get_free_page(GFP_KERNEL);
666 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); 672 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
673 BUG_ON(!lowcore || !panic_stack || !async_stack);
667#ifndef CONFIG_64BIT 674#ifndef CONFIG_64BIT
668 if (MACHINE_HAS_IEEE) 675 if (MACHINE_HAS_IEEE)
669 save_area = get_zeroed_page(GFP_KERNEL); 676 save_area = get_zeroed_page(GFP_KERNEL);
@@ -677,6 +684,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
677#ifndef CONFIG_64BIT 684#ifndef CONFIG_64BIT
678 if (MACHINE_HAS_IEEE) 685 if (MACHINE_HAS_IEEE)
679 lowcore->extended_save_area_addr = (u32) save_area; 686 lowcore->extended_save_area_addr = (u32) save_area;
687#else
688 BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore));
680#endif 689#endif
681 set_prefix((u32)(unsigned long) lowcore); 690 set_prefix((u32)(unsigned long) lowcore);
682 local_mcck_enable(); 691 local_mcck_enable();
@@ -845,9 +854,11 @@ static ssize_t show_idle_count(struct sys_device *dev,
845 unsigned long long idle_count; 854 unsigned long long idle_count;
846 855
847 idle = &per_cpu(s390_idle, dev->id); 856 idle = &per_cpu(s390_idle, dev->id);
848 spin_lock_irq(&idle->lock); 857 spin_lock(&idle->lock);
849 idle_count = idle->idle_count; 858 idle_count = idle->idle_count;
850 spin_unlock_irq(&idle->lock); 859 if (idle->idle_enter)
860 idle_count++;
861 spin_unlock(&idle->lock);
851 return sprintf(buf, "%llu\n", idle_count); 862 return sprintf(buf, "%llu\n", idle_count);
852} 863}
853static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); 864static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -856,18 +867,17 @@ static ssize_t show_idle_time(struct sys_device *dev,
856 struct sysdev_attribute *attr, char *buf) 867 struct sysdev_attribute *attr, char *buf)
857{ 868{
858 struct s390_idle_data *idle; 869 struct s390_idle_data *idle;
859 unsigned long long new_time; 870 unsigned long long now, idle_time, idle_enter;
860 871
861 idle = &per_cpu(s390_idle, dev->id); 872 idle = &per_cpu(s390_idle, dev->id);
862 spin_lock_irq(&idle->lock); 873 spin_lock(&idle->lock);
863 if (idle->in_idle) { 874 now = get_clock();
864 new_time = get_clock(); 875 idle_time = idle->idle_time;
865 idle->idle_time += new_time - idle->idle_enter; 876 idle_enter = idle->idle_enter;
866 idle->idle_enter = new_time; 877 if (idle_enter != 0ULL && idle_enter < now)
867 } 878 idle_time += now - idle_enter;
868 new_time = idle->idle_time; 879 spin_unlock(&idle->lock);
869 spin_unlock_irq(&idle->lock); 880 return sprintf(buf, "%llu\n", idle_time >> 12);
870 return sprintf(buf, "%llu\n", new_time >> 12);
871} 881}
872static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); 882static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
873 883
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 10a6ccef4412..25a6a82f1c02 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -31,9 +31,6 @@
31#include <asm/sections.h> 31#include <asm/sections.h>
32#include <asm/vdso.h> 32#include <asm/vdso.h>
33 33
34/* Max supported size for symbol names */
35#define MAX_SYMNAME 64
36
37#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) 34#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
38extern char vdso32_start, vdso32_end; 35extern char vdso32_start, vdso32_end;
39static void *vdso32_kbase = &vdso32_start; 36static void *vdso32_kbase = &vdso32_start;
@@ -71,6 +68,119 @@ static union {
71struct vdso_data *vdso_data = &vdso_data_store.data; 68struct vdso_data *vdso_data = &vdso_data_store.data;
72 69
73/* 70/*
71 * Setup vdso data page.
72 */
73static void vdso_init_data(struct vdso_data *vd)
74{
75 unsigned int facility_list;
76
77 facility_list = stfl();
78 vd->ectg_available = switch_amode && (facility_list & 1);
79}
80
81#ifdef CONFIG_64BIT
82/*
83 * Setup per cpu vdso data page.
84 */
85static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
86{
87}
88
89/*
90 * Allocate/free per cpu vdso data.
91 */
92#ifdef CONFIG_64BIT
93#define SEGMENT_ORDER 2
94#else
95#define SEGMENT_ORDER 1
96#endif
97
98int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
99{
100 unsigned long segment_table, page_table, page_frame;
101 u32 *psal, *aste;
102 int i;
103
104 lowcore->vdso_per_cpu_data = __LC_PASTE;
105
106 if (!switch_amode || !vdso_enabled)
107 return 0;
108
109 segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
110 page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
111 page_frame = get_zeroed_page(GFP_KERNEL);
112 if (!segment_table || !page_table || !page_frame)
113 goto out;
114
115 clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
116 PAGE_SIZE << SEGMENT_ORDER);
117 clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
118 256*sizeof(unsigned long));
119
120 *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
121 *(unsigned long *) page_table = _PAGE_RO + page_frame;
122
123 psal = (u32 *) (page_table + 256*sizeof(unsigned long));
124 aste = psal + 32;
125
126 for (i = 4; i < 32; i += 4)
127 psal[i] = 0x80000000;
128
129 lowcore->paste[4] = (u32)(addr_t) psal;
130 psal[0] = 0x20000000;
131 psal[2] = (u32)(addr_t) aste;
132 *(unsigned long *) (aste + 2) = segment_table +
133 _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
134 aste[4] = (u32)(addr_t) psal;
135 lowcore->vdso_per_cpu_data = page_frame;
136
137 vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);
138 return 0;
139
140out:
141 free_page(page_frame);
142 free_page(page_table);
143 free_pages(segment_table, SEGMENT_ORDER);
144 return -ENOMEM;
145}
146
147#ifdef CONFIG_HOTPLUG_CPU
148void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
149{
150 unsigned long segment_table, page_table, page_frame;
151 u32 *psal, *aste;
152
153 if (!switch_amode || !vdso_enabled)
154 return;
155
156 psal = (u32 *)(addr_t) lowcore->paste[4];
157 aste = (u32 *)(addr_t) psal[2];
158 segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
159 page_table = *(unsigned long *) segment_table;
160 page_frame = *(unsigned long *) page_table;
161
162 free_page(page_frame);
163 free_page(page_table);
164 free_pages(segment_table, SEGMENT_ORDER);
165}
166#endif /* CONFIG_HOTPLUG_CPU */
167
168static void __vdso_init_cr5(void *dummy)
169{
170 unsigned long cr5;
171
172 cr5 = offsetof(struct _lowcore, paste);
173 __ctl_load(cr5, 5, 5);
174}
175
176static void vdso_init_cr5(void)
177{
178 if (switch_amode && vdso_enabled)
179 on_each_cpu(__vdso_init_cr5, NULL, 1);
180}
181#endif /* CONFIG_64BIT */
182
183/*
74 * This is called from binfmt_elf, we create the special vma for the 184 * This is called from binfmt_elf, we create the special vma for the
75 * vDSO and insert it into the mm struct tree 185 * vDSO and insert it into the mm struct tree
76 */ 186 */
@@ -172,6 +282,9 @@ static int __init vdso_init(void)
172{ 282{
173 int i; 283 int i;
174 284
285 if (!vdso_enabled)
286 return 0;
287 vdso_init_data(vdso_data);
175#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) 288#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
176 /* Calculate the size of the 32 bit vDSO */ 289 /* Calculate the size of the 32 bit vDSO */
177 vdso32_pages = ((&vdso32_end - &vdso32_start 290 vdso32_pages = ((&vdso32_end - &vdso32_start
@@ -208,6 +321,10 @@ static int __init vdso_init(void)
208 } 321 }
209 vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); 322 vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
210 vdso64_pagelist[vdso64_pages] = NULL; 323 vdso64_pagelist[vdso64_pages] = NULL;
324#ifndef CONFIG_SMP
325 BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore));
326#endif
327 vdso_init_cr5();
211#endif /* CONFIG_64BIT */ 328#endif /* CONFIG_64BIT */
212 329
213 get_page(virt_to_page(vdso_data)); 330 get_page(virt_to_page(vdso_data));
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 488e31a3c0e7..9ce8caafdb4e 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -22,7 +22,12 @@ __kernel_clock_getres:
22 cghi %r2,CLOCK_REALTIME 22 cghi %r2,CLOCK_REALTIME
23 je 0f 23 je 0f
24 cghi %r2,CLOCK_MONOTONIC 24 cghi %r2,CLOCK_MONOTONIC
25 je 0f
26 cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
25 jne 2f 27 jne 2f
28 larl %r5,_vdso_data
29 icm %r0,15,__LC_ECTG_OK(%r5)
30 jz 2f
260: ltgr %r3,%r3 310: ltgr %r3,%r3
27 jz 1f /* res == NULL */ 32 jz 1f /* res == NULL */
28 larl %r1,3f 33 larl %r1,3f
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 738a410b7eb2..79dbfee831ec 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -22,8 +22,10 @@ __kernel_clock_gettime:
22 larl %r5,_vdso_data 22 larl %r5,_vdso_data
23 cghi %r2,CLOCK_REALTIME 23 cghi %r2,CLOCK_REALTIME
24 je 4f 24 je 4f
25 cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
26 je 9f
25 cghi %r2,CLOCK_MONOTONIC 27 cghi %r2,CLOCK_MONOTONIC
26 jne 9f 28 jne 12f
27 29
28 /* CLOCK_MONOTONIC */ 30 /* CLOCK_MONOTONIC */
29 ltgr %r3,%r3 31 ltgr %r3,%r3
@@ -42,7 +44,7 @@ __kernel_clock_gettime:
42 alg %r0,__VDSO_WTOM_SEC(%r5) 44 alg %r0,__VDSO_WTOM_SEC(%r5)
43 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ 45 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
44 jne 0b 46 jne 0b
45 larl %r5,10f 47 larl %r5,13f
461: clg %r1,0(%r5) 481: clg %r1,0(%r5)
47 jl 2f 49 jl 2f
48 slg %r1,0(%r5) 50 slg %r1,0(%r5)
@@ -68,7 +70,7 @@ __kernel_clock_gettime:
68 lg %r0,__VDSO_XTIME_SEC(%r5) 70 lg %r0,__VDSO_XTIME_SEC(%r5)
69 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ 71 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
70 jne 5b 72 jne 5b
71 larl %r5,10f 73 larl %r5,13f
726: clg %r1,0(%r5) 746: clg %r1,0(%r5)
73 jl 7f 75 jl 7f
74 slg %r1,0(%r5) 76 slg %r1,0(%r5)
@@ -79,11 +81,38 @@ __kernel_clock_gettime:
798: lghi %r2,0 818: lghi %r2,0
80 br %r14 82 br %r14
81 83
84 /* CLOCK_THREAD_CPUTIME_ID for this thread */
859: icm %r0,15,__VDSO_ECTG_OK(%r5)
86 jz 12f
87 ear %r2,%a4
88 llilh %r4,0x0100
89 sar %a4,%r4
90 lghi %r4,0
91 sacf 512 /* Magic ectg instruction */
92 .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
93 sacf 0
94 sar %a4,%r2
95 algr %r1,%r0 /* r1 = cputime as TOD value */
96 mghi %r1,1000 /* convert to nanoseconds */
97 srlg %r1,%r1,12 /* r1 = cputime in nanosec */
98 lgr %r4,%r1
99 larl %r5,13f
100 srlg %r1,%r1,9 /* divide by 1000000000 */
101 mlg %r0,8(%r5)
102 srlg %r0,%r0,11 /* r0 = tv_sec */
103 stg %r0,0(%r3)
104 msg %r0,0(%r5) /* calculate tv_nsec */
105 slgr %r4,%r0 /* r4 = tv_nsec */
106 stg %r4,8(%r3)
107 lghi %r2,0
108 br %r14
109
82 /* Fallback to system call */ 110 /* Fallback to system call */
839: lghi %r1,__NR_clock_gettime 11112: lghi %r1,__NR_clock_gettime
84 svc 0 112 svc 0
85 br %r14 113 br %r14
86 114
8710: .quad 1000000000 11513: .quad 1000000000
11614: .quad 19342813113834067
88 .cfi_endproc 117 .cfi_endproc
89 .size __kernel_clock_gettime,.-__kernel_clock_gettime 118 .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 75a6e62ea973..2fb36e462194 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -23,19 +23,43 @@
23#include <asm/s390_ext.h> 23#include <asm/s390_ext.h>
24#include <asm/timer.h> 24#include <asm/timer.h>
25#include <asm/irq_regs.h> 25#include <asm/irq_regs.h>
26#include <asm/cpu.h>
26 27
27static ext_int_info_t ext_int_info_timer; 28static ext_int_info_t ext_int_info_timer;
29
28static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); 30static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
29 31
32DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
33 .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
34};
35
36static inline __u64 get_vtimer(void)
37{
38 __u64 timer;
39
40 asm volatile("STPT %0" : "=m" (timer));
41 return timer;
42}
43
44static inline void set_vtimer(__u64 expires)
45{
46 __u64 timer;
47
48 asm volatile (" STPT %0\n" /* Store current cpu timer value */
49 " SPT %1" /* Set new value immediatly afterwards */
50 : "=m" (timer) : "m" (expires) );
51 S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
52 S390_lowcore.last_update_timer = expires;
53}
54
30/* 55/*
31 * Update process times based on virtual cpu times stored by entry.S 56 * Update process times based on virtual cpu times stored by entry.S
32 * to the lowcore fields user_timer, system_timer & steal_clock. 57 * to the lowcore fields user_timer, system_timer & steal_clock.
33 */ 58 */
34void account_process_tick(struct task_struct *tsk, int user_tick) 59static void do_account_vtime(struct task_struct *tsk, int hardirq_offset)
35{ 60{
36 cputime_t cputime; 61 struct thread_info *ti = task_thread_info(tsk);
37 __u64 timer, clock; 62 __u64 timer, clock, user, system, steal;
38 int rcu_user_flag;
39 63
40 timer = S390_lowcore.last_update_timer; 64 timer = S390_lowcore.last_update_timer;
41 clock = S390_lowcore.last_update_clock; 65 clock = S390_lowcore.last_update_clock;
@@ -44,50 +68,41 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
44 : "=m" (S390_lowcore.last_update_timer), 68 : "=m" (S390_lowcore.last_update_timer),
45 "=m" (S390_lowcore.last_update_clock) ); 69 "=m" (S390_lowcore.last_update_clock) );
46 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 70 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
47 S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; 71 S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
48 72
49 cputime = S390_lowcore.user_timer >> 12; 73 user = S390_lowcore.user_timer - ti->user_timer;
50 rcu_user_flag = cputime != 0; 74 S390_lowcore.steal_timer -= user;
51 S390_lowcore.user_timer -= cputime << 12; 75 ti->user_timer = S390_lowcore.user_timer;
52 S390_lowcore.steal_clock -= cputime << 12; 76 account_user_time(tsk, user, user);
53 account_user_time(tsk, cputime); 77
54 78 system = S390_lowcore.system_timer - ti->system_timer;
55 cputime = S390_lowcore.system_timer >> 12; 79 S390_lowcore.steal_timer -= system;
56 S390_lowcore.system_timer -= cputime << 12; 80 ti->system_timer = S390_lowcore.system_timer;
57 S390_lowcore.steal_clock -= cputime << 12; 81 account_system_time(tsk, hardirq_offset, system, system);
58 account_system_time(tsk, HARDIRQ_OFFSET, cputime); 82
59 83 steal = S390_lowcore.steal_timer;
60 cputime = S390_lowcore.steal_clock; 84 if ((s64) steal > 0) {
61 if ((__s64) cputime > 0) { 85 S390_lowcore.steal_timer = 0;
62 cputime >>= 12; 86 account_steal_time(steal);
63 S390_lowcore.steal_clock -= cputime << 12;
64 account_steal_time(tsk, cputime);
65 } 87 }
66} 88}
67 89
68/* 90void account_vtime(struct task_struct *prev, struct task_struct *next)
69 * Update process times based on virtual cpu times stored by entry.S
70 * to the lowcore fields user_timer, system_timer & steal_clock.
71 */
72void account_vtime(struct task_struct *tsk)
73{ 91{
74 cputime_t cputime; 92 struct thread_info *ti;
75 __u64 timer; 93
76 94 do_account_vtime(prev, 0);
77 timer = S390_lowcore.last_update_timer; 95 ti = task_thread_info(prev);
78 asm volatile (" STPT %0" /* Store current cpu timer value */ 96 ti->user_timer = S390_lowcore.user_timer;
79 : "=m" (S390_lowcore.last_update_timer) ); 97 ti->system_timer = S390_lowcore.system_timer;
80 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 98 ti = task_thread_info(next);
81 99 S390_lowcore.user_timer = ti->user_timer;
82 cputime = S390_lowcore.user_timer >> 12; 100 S390_lowcore.system_timer = ti->system_timer;
83 S390_lowcore.user_timer -= cputime << 12; 101}
84 S390_lowcore.steal_clock -= cputime << 12;
85 account_user_time(tsk, cputime);
86 102
87 cputime = S390_lowcore.system_timer >> 12; 103void account_process_tick(struct task_struct *tsk, int user_tick)
88 S390_lowcore.system_timer -= cputime << 12; 104{
89 S390_lowcore.steal_clock -= cputime << 12; 105 do_account_vtime(tsk, HARDIRQ_OFFSET);
90 account_system_time(tsk, 0, cputime);
91} 106}
92 107
93/* 108/*
@@ -96,80 +111,131 @@ void account_vtime(struct task_struct *tsk)
96 */ 111 */
97void account_system_vtime(struct task_struct *tsk) 112void account_system_vtime(struct task_struct *tsk)
98{ 113{
99 cputime_t cputime; 114 struct thread_info *ti = task_thread_info(tsk);
100 __u64 timer; 115 __u64 timer, system;
101 116
102 timer = S390_lowcore.last_update_timer; 117 timer = S390_lowcore.last_update_timer;
103 asm volatile (" STPT %0" /* Store current cpu timer value */ 118 S390_lowcore.last_update_timer = get_vtimer();
104 : "=m" (S390_lowcore.last_update_timer) );
105 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 119 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
106 120
107 cputime = S390_lowcore.system_timer >> 12; 121 system = S390_lowcore.system_timer - ti->system_timer;
108 S390_lowcore.system_timer -= cputime << 12; 122 S390_lowcore.steal_timer -= system;
109 S390_lowcore.steal_clock -= cputime << 12; 123 ti->system_timer = S390_lowcore.system_timer;
110 account_system_time(tsk, 0, cputime); 124 account_system_time(tsk, 0, system, system);
111} 125}
112EXPORT_SYMBOL_GPL(account_system_vtime); 126EXPORT_SYMBOL_GPL(account_system_vtime);
113 127
114static inline void set_vtimer(__u64 expires) 128void vtime_start_cpu(void)
115{
116 __u64 timer;
117
118 asm volatile (" STPT %0\n" /* Store current cpu timer value */
119 " SPT %1" /* Set new value immediatly afterwards */
120 : "=m" (timer) : "m" (expires) );
121 S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
122 S390_lowcore.last_update_timer = expires;
123
124 /* store expire time for this CPU timer */
125 __get_cpu_var(virt_cpu_timer).to_expire = expires;
126}
127
128void vtime_start_cpu_timer(void)
129{ 129{
130 struct vtimer_queue *vt_list; 130 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
131 131 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
132 vt_list = &__get_cpu_var(virt_cpu_timer); 132 __u64 idle_time, expires;
133 133
134 /* CPU timer interrupt is pending, don't reprogramm it */ 134 /* Account time spent with enabled wait psw loaded as idle time. */
135 if (vt_list->idle & 1LL<<63) 135 idle_time = S390_lowcore.int_clock - idle->idle_enter;
136 return; 136 account_idle_time(idle_time);
137 S390_lowcore.last_update_clock = S390_lowcore.int_clock;
138
139 /* Account system time spent going idle. */
140 S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
141 S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer;
142
143 /* Restart vtime CPU timer */
144 if (vq->do_spt) {
145 /* Program old expire value but first save progress. */
146 expires = vq->idle - S390_lowcore.async_enter_timer;
147 expires += get_vtimer();
148 set_vtimer(expires);
149 } else {
150 /* Don't account the CPU timer delta while the cpu was idle. */
151 vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer;
152 }
137 153
138 if (!list_empty(&vt_list->list)) 154 spin_lock(&idle->lock);
139 set_vtimer(vt_list->idle); 155 idle->idle_time += idle_time;
156 idle->idle_enter = 0ULL;
157 idle->idle_count++;
158 spin_unlock(&idle->lock);
140} 159}
141 160
142void vtime_stop_cpu_timer(void) 161void vtime_stop_cpu(void)
143{ 162{
144 struct vtimer_queue *vt_list; 163 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
145 164 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
146 vt_list = &__get_cpu_var(virt_cpu_timer); 165 psw_t psw;
147 166
148 /* nothing to do */ 167 /* Wait for external, I/O or machine check interrupt. */
149 if (list_empty(&vt_list->list)) { 168 psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
150 vt_list->idle = VTIMER_MAX_SLICE; 169
151 goto fire; 170 /* Check if the CPU timer needs to be reprogrammed. */
171 if (vq->do_spt) {
172 __u64 vmax = VTIMER_MAX_SLICE;
173 /*
174 * The inline assembly is equivalent to
175 * vq->idle = get_cpu_timer();
176 * set_cpu_timer(VTIMER_MAX_SLICE);
177 * idle->idle_enter = get_clock();
178 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
179 * PSW_MASK_IO | PSW_MASK_EXT);
180 * The difference is that the inline assembly makes sure that
181 * the last three instruction are stpt, stck and lpsw in that
182 * order. This is done to increase the precision.
183 */
184 asm volatile(
185#ifndef CONFIG_64BIT
186 " basr 1,0\n"
187 "0: ahi 1,1f-0b\n"
188 " st 1,4(%2)\n"
189#else /* CONFIG_64BIT */
190 " larl 1,1f\n"
191 " stg 1,8(%2)\n"
192#endif /* CONFIG_64BIT */
193 " stpt 0(%4)\n"
194 " spt 0(%5)\n"
195 " stck 0(%3)\n"
196#ifndef CONFIG_64BIT
197 " lpsw 0(%2)\n"
198#else /* CONFIG_64BIT */
199 " lpswe 0(%2)\n"
200#endif /* CONFIG_64BIT */
201 "1:"
202 : "=m" (idle->idle_enter), "=m" (vq->idle)
203 : "a" (&psw), "a" (&idle->idle_enter),
204 "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
205 : "memory", "cc", "1");
206 } else {
207 /*
208 * The inline assembly is equivalent to
209 * vq->idle = get_cpu_timer();
210 * idle->idle_enter = get_clock();
211 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
212 * PSW_MASK_IO | PSW_MASK_EXT);
213 * The difference is that the inline assembly makes sure that
214 * the last three instruction are stpt, stck and lpsw in that
215 * order. This is done to increase the precision.
216 */
217 asm volatile(
218#ifndef CONFIG_64BIT
219 " basr 1,0\n"
220 "0: ahi 1,1f-0b\n"
221 " st 1,4(%2)\n"
222#else /* CONFIG_64BIT */
223 " larl 1,1f\n"
224 " stg 1,8(%2)\n"
225#endif /* CONFIG_64BIT */
226 " stpt 0(%4)\n"
227 " stck 0(%3)\n"
228#ifndef CONFIG_64BIT
229 " lpsw 0(%2)\n"
230#else /* CONFIG_64BIT */
231 " lpswe 0(%2)\n"
232#endif /* CONFIG_64BIT */
233 "1:"
234 : "=m" (idle->idle_enter), "=m" (vq->idle)
235 : "a" (&psw), "a" (&idle->idle_enter),
236 "a" (&vq->idle), "m" (psw)
237 : "memory", "cc", "1");
152 } 238 }
153
154 /* store the actual expire value */
155 asm volatile ("STPT %0" : "=m" (vt_list->idle));
156
157 /*
158 * If the CPU timer is negative we don't reprogramm
159 * it because we will get instantly an interrupt.
160 */
161 if (vt_list->idle & 1LL<<63)
162 return;
163
164 vt_list->offset += vt_list->to_expire - vt_list->idle;
165
166 /*
167 * We cannot halt the CPU timer, we just write a value that
168 * nearly never expires (only after 71 years) and re-write
169 * the stored expire value if we continue the timer
170 */
171 fire:
172 set_vtimer(VTIMER_MAX_SLICE);
173} 239}
174 240
175/* 241/*
@@ -195,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
195 */ 261 */
196static void do_callbacks(struct list_head *cb_list) 262static void do_callbacks(struct list_head *cb_list)
197{ 263{
198 struct vtimer_queue *vt_list; 264 struct vtimer_queue *vq;
199 struct vtimer_list *event, *tmp; 265 struct vtimer_list *event, *tmp;
200 void (*fn)(unsigned long);
201 unsigned long data;
202 266
203 if (list_empty(cb_list)) 267 if (list_empty(cb_list))
204 return; 268 return;
205 269
206 vt_list = &__get_cpu_var(virt_cpu_timer); 270 vq = &__get_cpu_var(virt_cpu_timer);
207 271
208 list_for_each_entry_safe(event, tmp, cb_list, entry) { 272 list_for_each_entry_safe(event, tmp, cb_list, entry) {
209 fn = event->function; 273 list_del_init(&event->entry);
210 data = event->data; 274 (event->function)(event->data);
211 fn(data); 275 if (event->interval) {
212 276 /* Recharge interval timer */
213 if (!event->interval) 277 event->expires = event->interval + vq->elapsed;
214 /* delete one shot timer */ 278 spin_lock(&vq->lock);
215 list_del_init(&event->entry); 279 list_add_sorted(event, &vq->list);
216 else { 280 spin_unlock(&vq->lock);
217 /* move interval timer back to list */
218 spin_lock(&vt_list->lock);
219 list_del_init(&event->entry);
220 list_add_sorted(event, &vt_list->list);
221 spin_unlock(&vt_list->lock);
222 } 281 }
223 } 282 }
224} 283}
@@ -228,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list)
228 */ 287 */
229static void do_cpu_timer_interrupt(__u16 error_code) 288static void do_cpu_timer_interrupt(__u16 error_code)
230{ 289{
231 __u64 next, delta; 290 struct vtimer_queue *vq;
232 struct vtimer_queue *vt_list;
233 struct vtimer_list *event, *tmp; 291 struct vtimer_list *event, *tmp;
234 struct list_head *ptr; 292 struct list_head cb_list; /* the callback queue */
235 /* the callback queue */ 293 __u64 elapsed, next;
236 struct list_head cb_list;
237 294
238 INIT_LIST_HEAD(&cb_list); 295 INIT_LIST_HEAD(&cb_list);
239 vt_list = &__get_cpu_var(virt_cpu_timer); 296 vq = &__get_cpu_var(virt_cpu_timer);
240 297
241 /* walk timer list, fire all expired events */ 298 /* walk timer list, fire all expired events */
242 spin_lock(&vt_list->lock); 299 spin_lock(&vq->lock);
243 300
244 if (vt_list->to_expire < VTIMER_MAX_SLICE) 301 elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer);
245 vt_list->offset += vt_list->to_expire; 302 BUG_ON((s64) elapsed < 0);
246 303 vq->elapsed = 0;
247 list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { 304 list_for_each_entry_safe(event, tmp, &vq->list, entry) {
248 if (event->expires > vt_list->offset) 305 if (event->expires < elapsed)
249 /* found first unexpired event, leave */ 306 /* move expired timer to the callback queue */
250 break; 307 list_move_tail(&event->entry, &cb_list);
251 308 else
252 /* re-charge interval timer, we have to add the offset */ 309 event->expires -= elapsed;
253 if (event->interval)
254 event->expires = event->interval + vt_list->offset;
255
256 /* move expired timer to the callback queue */
257 list_move_tail(&event->entry, &cb_list);
258 } 310 }
259 spin_unlock(&vt_list->lock); 311 spin_unlock(&vq->lock);
312
313 vq->do_spt = list_empty(&cb_list);
260 do_callbacks(&cb_list); 314 do_callbacks(&cb_list);
261 315
262 /* next event is first in list */ 316 /* next event is first in list */
263 spin_lock(&vt_list->lock); 317 next = VTIMER_MAX_SLICE;
264 if (!list_empty(&vt_list->list)) { 318 spin_lock(&vq->lock);
265 ptr = vt_list->list.next; 319 if (!list_empty(&vq->list)) {
266 event = list_entry(ptr, struct vtimer_list, entry); 320 event = list_first_entry(&vq->list, struct vtimer_list, entry);
267 next = event->expires - vt_list->offset; 321 next = event->expires;
268 322 } else
269 /* add the expired time from this interrupt handler 323 vq->do_spt = 0;
270 * and the callback functions 324 spin_unlock(&vq->lock);
271 */ 325 /*
272 asm volatile ("STPT %0" : "=m" (delta)); 326 * To improve precision add the time spent by the
273 delta = 0xffffffffffffffffLL - delta + 1; 327 * interrupt handler to the elapsed time.
274 vt_list->offset += delta; 328 * Note: CPU timer counts down and we got an interrupt,
275 next -= delta; 329 * the current content is negative
276 } else { 330 */
277 vt_list->offset = 0; 331 elapsed = S390_lowcore.async_enter_timer - get_vtimer();
278 next = VTIMER_MAX_SLICE; 332 set_vtimer(next - elapsed);
279 } 333 vq->timer = next - elapsed;
280 spin_unlock(&vt_list->lock); 334 vq->elapsed = elapsed;
281 set_vtimer(next);
282} 335}
283 336
284void init_virt_timer(struct vtimer_list *timer) 337void init_virt_timer(struct vtimer_list *timer)
285{ 338{
286 timer->function = NULL; 339 timer->function = NULL;
287 INIT_LIST_HEAD(&timer->entry); 340 INIT_LIST_HEAD(&timer->entry);
288 spin_lock_init(&timer->lock);
289} 341}
290EXPORT_SYMBOL(init_virt_timer); 342EXPORT_SYMBOL(init_virt_timer);
291 343
@@ -299,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer)
299 */ 351 */
300static void internal_add_vtimer(struct vtimer_list *timer) 352static void internal_add_vtimer(struct vtimer_list *timer)
301{ 353{
354 struct vtimer_queue *vq;
302 unsigned long flags; 355 unsigned long flags;
303 __u64 done; 356 __u64 left, expires;
304 struct vtimer_list *event;
305 struct vtimer_queue *vt_list;
306 357
307 vt_list = &per_cpu(virt_cpu_timer, timer->cpu); 358 vq = &per_cpu(virt_cpu_timer, timer->cpu);
308 spin_lock_irqsave(&vt_list->lock, flags); 359 spin_lock_irqsave(&vq->lock, flags);
309 360
310 BUG_ON(timer->cpu != smp_processor_id()); 361 BUG_ON(timer->cpu != smp_processor_id());
311 362
312 /* if list is empty we only have to set the timer */ 363 if (list_empty(&vq->list)) {
313 if (list_empty(&vt_list->list)) { 364 /* First timer on this cpu, just program it. */
314 /* reset the offset, this may happen if the last timer was 365 list_add(&timer->entry, &vq->list);
315 * just deleted by mod_virt_timer and the interrupt 366 set_vtimer(timer->expires);
316 * didn't happen until here 367 vq->timer = timer->expires;
317 */ 368 vq->elapsed = 0;
318 vt_list->offset = 0; 369 } else {
319 goto fire; 370 /* Check progress of old timers. */
371 expires = timer->expires;
372 left = get_vtimer();
373 if (likely((s64) expires < (s64) left)) {
374 /* The new timer expires before the current timer. */
375 set_vtimer(expires);
376 vq->elapsed += vq->timer - left;
377 vq->timer = expires;
378 } else {
379 vq->elapsed += vq->timer - left;
380 vq->timer = left;
381 }
382 /* Insert new timer into per cpu list. */
383 timer->expires += vq->elapsed;
384 list_add_sorted(timer, &vq->list);
320 } 385 }
321 386
322 /* save progress */ 387 spin_unlock_irqrestore(&vq->lock, flags);
323 asm volatile ("STPT %0" : "=m" (done));
324
325 /* calculate completed work */
326 done = vt_list->to_expire - done + vt_list->offset;
327 vt_list->offset = 0;
328
329 list_for_each_entry(event, &vt_list->list, entry)
330 event->expires -= done;
331
332 fire:
333 list_add_sorted(timer, &vt_list->list);
334
335 /* get first element, which is the next vtimer slice */
336 event = list_entry(vt_list->list.next, struct vtimer_list, entry);
337
338 set_vtimer(event->expires);
339 spin_unlock_irqrestore(&vt_list->lock, flags);
340 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ 388 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */
341 put_cpu(); 389 put_cpu();
342} 390}
@@ -381,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic);
381 * If we change a pending timer the function must be called on the CPU 429 * If we change a pending timer the function must be called on the CPU
382 * where the timer is running on, e.g. by smp_call_function_single() 430 * where the timer is running on, e.g. by smp_call_function_single()
383 * 431 *
384 * The original mod_timer adds the timer if it is not pending. For compatibility 432 * The original mod_timer adds the timer if it is not pending. For
385 * we do the same. The timer will be added on the current CPU as a oneshot timer. 433 * compatibility we do the same. The timer will be added on the current
434 * CPU as a oneshot timer.
386 * 435 *
387 * returns whether it has modified a pending timer (1) or not (0) 436 * returns whether it has modified a pending timer (1) or not (0)
388 */ 437 */
389int mod_virt_timer(struct vtimer_list *timer, __u64 expires) 438int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
390{ 439{
391 struct vtimer_queue *vt_list; 440 struct vtimer_queue *vq;
392 unsigned long flags; 441 unsigned long flags;
393 int cpu; 442 int cpu;
394 443
@@ -404,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
404 return 1; 453 return 1;
405 454
406 cpu = get_cpu(); 455 cpu = get_cpu();
407 vt_list = &per_cpu(virt_cpu_timer, cpu); 456 vq = &per_cpu(virt_cpu_timer, cpu);
408 457
409 /* check if we run on the right CPU */ 458 /* check if we run on the right CPU */
410 BUG_ON(timer->cpu != cpu); 459 BUG_ON(timer->cpu != cpu);
411 460
412 /* disable interrupts before test if timer is pending */ 461 /* disable interrupts before test if timer is pending */
413 spin_lock_irqsave(&vt_list->lock, flags); 462 spin_lock_irqsave(&vq->lock, flags);
414 463
415 /* if timer isn't pending add it on the current CPU */ 464 /* if timer isn't pending add it on the current CPU */
416 if (!vtimer_pending(timer)) { 465 if (!vtimer_pending(timer)) {
417 spin_unlock_irqrestore(&vt_list->lock, flags); 466 spin_unlock_irqrestore(&vq->lock, flags);
418 /* we do not activate an interval timer with mod_virt_timer */ 467 /* we do not activate an interval timer with mod_virt_timer */
419 timer->interval = 0; 468 timer->interval = 0;
420 timer->expires = expires; 469 timer->expires = expires;
@@ -431,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
431 timer->interval = expires; 480 timer->interval = expires;
432 481
433 /* the timer can't expire anymore so we can release the lock */ 482 /* the timer can't expire anymore so we can release the lock */
434 spin_unlock_irqrestore(&vt_list->lock, flags); 483 spin_unlock_irqrestore(&vq->lock, flags);
435 internal_add_vtimer(timer); 484 internal_add_vtimer(timer);
436 return 1; 485 return 1;
437} 486}
@@ -445,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer);
445int del_virt_timer(struct vtimer_list *timer) 494int del_virt_timer(struct vtimer_list *timer)
446{ 495{
447 unsigned long flags; 496 unsigned long flags;
448 struct vtimer_queue *vt_list; 497 struct vtimer_queue *vq;
449 498
450 /* check if timer is pending */ 499 /* check if timer is pending */
451 if (!vtimer_pending(timer)) 500 if (!vtimer_pending(timer))
452 return 0; 501 return 0;
453 502
454 vt_list = &per_cpu(virt_cpu_timer, timer->cpu); 503 vq = &per_cpu(virt_cpu_timer, timer->cpu);
455 spin_lock_irqsave(&vt_list->lock, flags); 504 spin_lock_irqsave(&vq->lock, flags);
456 505
457 /* we don't interrupt a running timer, just let it expire! */ 506 /* we don't interrupt a running timer, just let it expire! */
458 list_del_init(&timer->entry); 507 list_del_init(&timer->entry);
459 508
460 /* last timer removed */ 509 spin_unlock_irqrestore(&vq->lock, flags);
461 if (list_empty(&vt_list->list)) {
462 vt_list->to_expire = 0;
463 vt_list->offset = 0;
464 }
465
466 spin_unlock_irqrestore(&vt_list->lock, flags);
467 return 1; 510 return 1;
468} 511}
469EXPORT_SYMBOL(del_virt_timer); 512EXPORT_SYMBOL(del_virt_timer);
@@ -473,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer);
473 */ 516 */
474void init_cpu_vtimer(void) 517void init_cpu_vtimer(void)
475{ 518{
476 struct vtimer_queue *vt_list; 519 struct vtimer_queue *vq;
477 520
478 /* kick the virtual timer */ 521 /* kick the virtual timer */
479 S390_lowcore.exit_timer = VTIMER_MAX_SLICE;
480 S390_lowcore.last_update_timer = VTIMER_MAX_SLICE;
481 asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
482 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); 522 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
523 asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer));
524
525 /* initialize per cpu vtimer structure */
526 vq = &__get_cpu_var(virt_cpu_timer);
527 INIT_LIST_HEAD(&vq->list);
528 spin_lock_init(&vq->lock);
483 529
484 /* enable cpu timer interrupts */ 530 /* enable cpu timer interrupts */
485 __ctl_set_bit(0,10); 531 __ctl_set_bit(0,10);
486
487 vt_list = &__get_cpu_var(virt_cpu_timer);
488 INIT_LIST_HEAD(&vt_list->list);
489 spin_lock_init(&vt_list->lock);
490 vt_list->to_expire = 0;
491 vt_list->offset = 0;
492 vt_list->idle = 0;
493
494} 532}
495 533
496void __init vtime_init(void) 534void __init vtime_init(void)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8b00eb2ddf57..be8497186b96 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -113,8 +113,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
113int kvm_dev_ioctl_check_extension(long ext) 113int kvm_dev_ioctl_check_extension(long ext)
114{ 114{
115 switch (ext) { 115 switch (ext) {
116 case KVM_CAP_USER_MEMORY:
117 return 1;
118 default: 116 default:
119 return 0; 117 return 0;
120 } 118 }
@@ -185,8 +183,6 @@ struct kvm *kvm_arch_create_vm(void)
185 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 183 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
186 VM_EVENT(kvm, 3, "%s", "vm created"); 184 VM_EVENT(kvm, 3, "%s", "vm created");
187 185
188 try_module_get(THIS_MODULE);
189
190 return kvm; 186 return kvm;
191out_nodbf: 187out_nodbf:
192 free_page((unsigned long)(kvm->arch.sca)); 188 free_page((unsigned long)(kvm->arch.sca));
@@ -196,13 +192,33 @@ out_nokvm:
196 return ERR_PTR(rc); 192 return ERR_PTR(rc);
197} 193}
198 194
195void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
196{
197 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
198 free_page((unsigned long)(vcpu->arch.sie_block));
199 kvm_vcpu_uninit(vcpu);
200 kfree(vcpu);
201}
202
203static void kvm_free_vcpus(struct kvm *kvm)
204{
205 unsigned int i;
206
207 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
208 if (kvm->vcpus[i]) {
209 kvm_arch_vcpu_destroy(kvm->vcpus[i]);
210 kvm->vcpus[i] = NULL;
211 }
212 }
213}
214
199void kvm_arch_destroy_vm(struct kvm *kvm) 215void kvm_arch_destroy_vm(struct kvm *kvm)
200{ 216{
201 debug_unregister(kvm->arch.dbf); 217 kvm_free_vcpus(kvm);
202 kvm_free_physmem(kvm); 218 kvm_free_physmem(kvm);
203 free_page((unsigned long)(kvm->arch.sca)); 219 free_page((unsigned long)(kvm->arch.sca));
220 debug_unregister(kvm->arch.dbf);
204 kfree(kvm); 221 kfree(kvm);
205 module_put(THIS_MODULE);
206} 222}
207 223
208/* Section: vcpu related */ 224/* Section: vcpu related */
@@ -213,8 +229,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
213 229
214void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 230void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
215{ 231{
216 /* kvm common code refers to this, but does'nt call it */ 232 /* Nothing todo */
217 BUG();
218} 233}
219 234
220void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 235void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -308,8 +323,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
308 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 323 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
309 vcpu->arch.sie_block); 324 vcpu->arch.sie_block);
310 325
311 try_module_get(THIS_MODULE);
312
313 return vcpu; 326 return vcpu;
314out_free_cpu: 327out_free_cpu:
315 kfree(vcpu); 328 kfree(vcpu);
@@ -317,14 +330,6 @@ out_nomem:
317 return ERR_PTR(rc); 330 return ERR_PTR(rc);
318} 331}
319 332
320void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
321{
322 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
323 free_page((unsigned long)(vcpu->arch.sie_block));
324 kfree(vcpu);
325 module_put(THIS_MODULE);
326}
327
328int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 333int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
329{ 334{
330 /* kvm common code refers to this, but never calls it */ 335 /* kvm common code refers to this, but never calls it */
diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c
index b151a25cb14d..80c35ff71d56 100644
--- a/arch/sh/kernel/init_task.c
+++ b/arch/sh/kernel/init_task.c
@@ -7,7 +7,6 @@
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8#include <asm/pgtable.h> 8#include <asm/pgtable.h>
9 9
10static struct fs_struct init_fs = INIT_FS;
11static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 10static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
12static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 11static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
13struct pt_regs fake_swapper_regs; 12struct pt_regs fake_swapper_regs;
diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c
index 62126e4cec54..f28cb8278e98 100644
--- a/arch/sparc/kernel/init_task.c
+++ b/arch/sparc/kernel/init_task.c
@@ -8,7 +8,6 @@
8#include <asm/pgtable.h> 8#include <asm/pgtable.h>
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10 10
11static struct fs_struct init_fs = INIT_FS;
12static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 11static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
13static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 12static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
14struct mm_struct init_mm = INIT_MM(init_mm); 13struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
index 910eda8fca18..806d381947bf 100644
--- a/arch/um/kernel/init_task.c
+++ b/arch/um/kernel/init_task.c
@@ -10,7 +10,6 @@
10#include "linux/mqueue.h" 10#include "linux/mqueue.h"
11#include "asm/uaccess.h" 11#include "asm/uaccess.h"
12 12
13static struct fs_struct init_fs = INIT_FS;
14struct mm_struct init_mm = INIT_MM(init_mm); 13struct mm_struct init_mm = INIT_MM(init_mm);
15static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 14static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
16static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 249d1e0824b5..862adb9bf0d4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -586,6 +586,16 @@ config AMD_IOMMU
586 your BIOS for an option to enable it or if you have an IVRS ACPI 586 your BIOS for an option to enable it or if you have an IVRS ACPI
587 table. 587 table.
588 588
589config AMD_IOMMU_STATS
590 bool "Export AMD IOMMU statistics to debugfs"
591 depends on AMD_IOMMU
592 select DEBUG_FS
593 help
594 This option enables code in the AMD IOMMU driver to collect various
595 statistics about whats happening in the driver and exports that
596 information to userspace via debugfs.
597 If unsure, say N.
598
589# need this always selected by IOMMU for the VIA workaround 599# need this always selected by IOMMU for the VIA workaround
590config SWIOTLB 600config SWIOTLB
591 def_bool y if X86_64 601 def_bool y if X86_64
@@ -599,6 +609,9 @@ config SWIOTLB
599config IOMMU_HELPER 609config IOMMU_HELPER
600 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) 610 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
601 611
612config IOMMU_API
613 def_bool (AMD_IOMMU || DMAR)
614
602config MAXSMP 615config MAXSMP
603 bool "Configure Maximum number of SMP Processors and NUMA Nodes" 616 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
604 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL 617 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index b195f85526e3..9dabd00e9805 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -24,15 +24,14 @@
24#include <asm/ucontext.h> 24#include <asm/ucontext.h>
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/i387.h> 26#include <asm/i387.h>
27#include <asm/ia32.h>
28#include <asm/ptrace.h> 27#include <asm/ptrace.h>
29#include <asm/ia32_unistd.h> 28#include <asm/ia32_unistd.h>
30#include <asm/user32.h> 29#include <asm/user32.h>
31#include <asm/sigcontext32.h> 30#include <asm/sigcontext32.h>
32#include <asm/proto.h> 31#include <asm/proto.h>
33#include <asm/vdso.h> 32#include <asm/vdso.h>
34
35#include <asm/sigframe.h> 33#include <asm/sigframe.h>
34#include <asm/sys_ia32.h>
36 35
37#define DEBUG_SIG 0 36#define DEBUG_SIG 0
38 37
diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c
index d21991ce606c..29cdcd02ead3 100644
--- a/arch/x86/ia32/ipc32.c
+++ b/arch/x86/ia32/ipc32.c
@@ -8,6 +8,7 @@
8#include <linux/shm.h> 8#include <linux/shm.h>
9#include <linux/ipc.h> 9#include <linux/ipc.h>
10#include <linux/compat.h> 10#include <linux/compat.h>
11#include <asm/sys_ia32.h>
11 12
12asmlinkage long sys32_ipc(u32 call, int first, int second, int third, 13asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
13 compat_uptr_t ptr, u32 fifth) 14 compat_uptr_t ptr, u32 fifth)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 2e09dcd3c0a6..6c0d7f6231af 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -44,8 +44,8 @@
44#include <asm/types.h> 44#include <asm/types.h>
45#include <asm/uaccess.h> 45#include <asm/uaccess.h>
46#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <asm/ia32.h>
48#include <asm/vgtod.h> 47#include <asm/vgtod.h>
48#include <asm/sys_ia32.h>
49 49
50#define AA(__x) ((unsigned long)(__x)) 50#define AA(__x) ((unsigned long)(__x))
51 51
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ac302a2fa339..95c8cd9d22b5 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -190,16 +190,23 @@
190/* FIXME: move this macro to <linux/pci.h> */ 190/* FIXME: move this macro to <linux/pci.h> */
191#define PCI_BUS(x) (((x) >> 8) & 0xff) 191#define PCI_BUS(x) (((x) >> 8) & 0xff)
192 192
193/* Protection domain flags */
194#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
195#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
196 domain for an IOMMU */
197
193/* 198/*
194 * This structure contains generic data for IOMMU protection domains 199 * This structure contains generic data for IOMMU protection domains
195 * independent of their use. 200 * independent of their use.
196 */ 201 */
197struct protection_domain { 202struct protection_domain {
198 spinlock_t lock; /* mostly used to lock the page table*/ 203 spinlock_t lock; /* mostly used to lock the page table*/
199 u16 id; /* the domain id written to the device table */ 204 u16 id; /* the domain id written to the device table */
200 int mode; /* paging mode (0-6 levels) */ 205 int mode; /* paging mode (0-6 levels) */
201 u64 *pt_root; /* page table root pointer */ 206 u64 *pt_root; /* page table root pointer */
202 void *priv; /* private data */ 207 unsigned long flags; /* flags to find out type of domain */
208 unsigned dev_cnt; /* devices assigned to this domain */
209 void *priv; /* private data */
203}; 210};
204 211
205/* 212/*
@@ -295,7 +302,7 @@ struct amd_iommu {
295 bool int_enabled; 302 bool int_enabled;
296 303
297 /* if one, we need to send a completion wait command */ 304 /* if one, we need to send a completion wait command */
298 int need_sync; 305 bool need_sync;
299 306
300 /* default dma_ops domain for that IOMMU */ 307 /* default dma_ops domain for that IOMMU */
301 struct dma_ops_domain *default_dom; 308 struct dma_ops_domain *default_dom;
@@ -374,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table;
374extern unsigned long *amd_iommu_pd_alloc_bitmap; 381extern unsigned long *amd_iommu_pd_alloc_bitmap;
375 382
376/* will be 1 if device isolation is enabled */ 383/* will be 1 if device isolation is enabled */
377extern int amd_iommu_isolate; 384extern bool amd_iommu_isolate;
378 385
379/* 386/*
380 * If true, the addresses will be flushed on unmap time, not when 387 * If true, the addresses will be flushed on unmap time, not when
@@ -382,18 +389,6 @@ extern int amd_iommu_isolate;
382 */ 389 */
383extern bool amd_iommu_unmap_flush; 390extern bool amd_iommu_unmap_flush;
384 391
385/* takes a PCI device id and prints it out in a readable form */
386static inline void print_devid(u16 devid, int nl)
387{
388 int bus = devid >> 8;
389 int dev = devid >> 3 & 0x1f;
390 int fn = devid & 0x07;
391
392 printk("%02x:%02x.%x", bus, dev, fn);
393 if (nl)
394 printk("\n");
395}
396
397/* takes bus and device/function and returns the device id 392/* takes bus and device/function and returns the device id
398 * FIXME: should that be in generic PCI code? */ 393 * FIXME: should that be in generic PCI code? */
399static inline u16 calc_devid(u8 bus, u8 devfn) 394static inline u16 calc_devid(u8 bus, u8 devfn)
@@ -401,4 +396,32 @@ static inline u16 calc_devid(u8 bus, u8 devfn)
401 return (((u16)bus) << 8) | devfn; 396 return (((u16)bus) << 8) | devfn;
402} 397}
403 398
399#ifdef CONFIG_AMD_IOMMU_STATS
400
401struct __iommu_counter {
402 char *name;
403 struct dentry *dent;
404 u64 value;
405};
406
407#define DECLARE_STATS_COUNTER(nm) \
408 static struct __iommu_counter nm = { \
409 .name = #nm, \
410 }
411
412#define INC_STATS_COUNTER(name) name.value += 1
413#define ADD_STATS_COUNTER(name, x) name.value += (x)
414#define SUB_STATS_COUNTER(name, x) name.value -= (x)
415
416#else /* CONFIG_AMD_IOMMU_STATS */
417
418#define DECLARE_STATS_COUNTER(name)
419#define INC_STATS_COUNTER(name)
420#define ADD_STATS_COUNTER(name, x)
421#define SUB_STATS_COUNTER(name, x)
422
423static inline void amd_iommu_stats_init(void) { }
424
425#endif /* CONFIG_AMD_IOMMU_STATS */
426
404#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ 427#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 25caa0738af5..ab1d51a8855e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -54,7 +54,6 @@ extern int disable_apic;
54extern int is_vsmp_box(void); 54extern int is_vsmp_box(void);
55extern void xapic_wait_icr_idle(void); 55extern void xapic_wait_icr_idle(void);
56extern u32 safe_xapic_wait_icr_idle(void); 56extern u32 safe_xapic_wait_icr_idle(void);
57extern u64 xapic_icr_read(void);
58extern void xapic_icr_write(u32, u32); 57extern void xapic_icr_write(u32, u32);
59extern int setup_profiling_timer(unsigned int); 58extern int setup_profiling_timer(unsigned int);
60 59
@@ -93,7 +92,7 @@ static inline u32 native_apic_msr_read(u32 reg)
93} 92}
94 93
95#ifndef CONFIG_X86_32 94#ifndef CONFIG_X86_32
96extern int x2apic, x2apic_preenabled; 95extern int x2apic;
97extern void check_x2apic(void); 96extern void check_x2apic(void);
98extern void enable_x2apic(void); 97extern void enable_x2apic(void);
99extern void enable_IR_x2apic(void); 98extern void enable_IR_x2apic(void);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index a2e545c91c35..ca5ffb2856b6 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size);
90 90
91#endif /* CONFIG_X86_32 */ 91#endif /* CONFIG_X86_32 */
92 92
93extern int add_efi_memmap;
93extern void efi_reserve_early(void); 94extern void efi_reserve_early(void);
94extern void efi_call_phys_prelog(void); 95extern void efi_call_phys_prelog(void);
95extern void efi_call_phys_epilog(void); 96extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8346be87cfa1..730843d1d2fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
21 21
22#include <asm/pvclock-abi.h> 22#include <asm/pvclock-abi.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/mtrr.h>
24 25
25#define KVM_MAX_VCPUS 16 26#define KVM_MAX_VCPUS 16
26#define KVM_MEMORY_SLOTS 32 27#define KVM_MEMORY_SLOTS 32
@@ -86,6 +87,7 @@
86#define KVM_MIN_FREE_MMU_PAGES 5 87#define KVM_MIN_FREE_MMU_PAGES 5
87#define KVM_REFILL_PAGES 25 88#define KVM_REFILL_PAGES 25
88#define KVM_MAX_CPUID_ENTRIES 40 89#define KVM_MAX_CPUID_ENTRIES 40
90#define KVM_NR_FIXED_MTRR_REGION 88
89#define KVM_NR_VAR_MTRR 8 91#define KVM_NR_VAR_MTRR 8
90 92
91extern spinlock_t kvm_lock; 93extern spinlock_t kvm_lock;
@@ -180,6 +182,8 @@ struct kvm_mmu_page {
180 struct list_head link; 182 struct list_head link;
181 struct hlist_node hash_link; 183 struct hlist_node hash_link;
182 184
185 struct list_head oos_link;
186
183 /* 187 /*
184 * The following two entries are used to key the shadow page in the 188 * The following two entries are used to key the shadow page in the
185 * hash table. 189 * hash table.
@@ -190,13 +194,16 @@ struct kvm_mmu_page {
190 u64 *spt; 194 u64 *spt;
191 /* hold the gfn of each spte inside spt */ 195 /* hold the gfn of each spte inside spt */
192 gfn_t *gfns; 196 gfn_t *gfns;
193 unsigned long slot_bitmap; /* One bit set per slot which has memory 197 /*
194 * in this shadow page. 198 * One bit set per slot which has memory
195 */ 199 * in this shadow page.
200 */
201 DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
196 int multimapped; /* More than one parent_pte? */ 202 int multimapped; /* More than one parent_pte? */
197 int root_count; /* Currently serving as active root */ 203 int root_count; /* Currently serving as active root */
198 bool unsync; 204 bool unsync;
199 bool unsync_children; 205 bool global;
206 unsigned int unsync_children;
200 union { 207 union {
201 u64 *parent_pte; /* !multimapped */ 208 u64 *parent_pte; /* !multimapped */
202 struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ 209 struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -327,8 +334,10 @@ struct kvm_vcpu_arch {
327 334
328 bool nmi_pending; 335 bool nmi_pending;
329 bool nmi_injected; 336 bool nmi_injected;
337 bool nmi_window_open;
330 338
331 u64 mtrr[0x100]; 339 struct mtrr_state_type mtrr_state;
340 u32 pat;
332}; 341};
333 342
334struct kvm_mem_alias { 343struct kvm_mem_alias {
@@ -350,11 +359,13 @@ struct kvm_arch{
350 */ 359 */
351 struct list_head active_mmu_pages; 360 struct list_head active_mmu_pages;
352 struct list_head assigned_dev_head; 361 struct list_head assigned_dev_head;
353 struct dmar_domain *intel_iommu_domain; 362 struct list_head oos_global_pages;
363 struct iommu_domain *iommu_domain;
354 struct kvm_pic *vpic; 364 struct kvm_pic *vpic;
355 struct kvm_ioapic *vioapic; 365 struct kvm_ioapic *vioapic;
356 struct kvm_pit *vpit; 366 struct kvm_pit *vpit;
357 struct hlist_head irq_ack_notifier_list; 367 struct hlist_head irq_ack_notifier_list;
368 int vapics_in_nmi_mode;
358 369
359 int round_robin_prev_vcpu; 370 int round_robin_prev_vcpu;
360 unsigned int tss_addr; 371 unsigned int tss_addr;
@@ -378,6 +389,7 @@ struct kvm_vm_stat {
378 u32 mmu_recycled; 389 u32 mmu_recycled;
379 u32 mmu_cache_miss; 390 u32 mmu_cache_miss;
380 u32 mmu_unsync; 391 u32 mmu_unsync;
392 u32 mmu_unsync_global;
381 u32 remote_tlb_flush; 393 u32 remote_tlb_flush;
382 u32 lpages; 394 u32 lpages;
383}; 395};
@@ -397,6 +409,7 @@ struct kvm_vcpu_stat {
397 u32 halt_exits; 409 u32 halt_exits;
398 u32 halt_wakeup; 410 u32 halt_wakeup;
399 u32 request_irq_exits; 411 u32 request_irq_exits;
412 u32 request_nmi_exits;
400 u32 irq_exits; 413 u32 irq_exits;
401 u32 host_state_reload; 414 u32 host_state_reload;
402 u32 efer_reload; 415 u32 efer_reload;
@@ -405,6 +418,7 @@ struct kvm_vcpu_stat {
405 u32 insn_emulation_fail; 418 u32 insn_emulation_fail;
406 u32 hypercalls; 419 u32 hypercalls;
407 u32 irq_injections; 420 u32 irq_injections;
421 u32 nmi_injections;
408}; 422};
409 423
410struct descriptor_table { 424struct descriptor_table {
@@ -477,6 +491,7 @@ struct kvm_x86_ops {
477 491
478 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 492 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
479 int (*get_tdp_level)(void); 493 int (*get_tdp_level)(void);
494 int (*get_mt_mask_shift)(void);
480}; 495};
481 496
482extern struct kvm_x86_ops *kvm_x86_ops; 497extern struct kvm_x86_ops *kvm_x86_ops;
@@ -490,7 +505,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
490void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); 505void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
491void kvm_mmu_set_base_ptes(u64 base_pte); 506void kvm_mmu_set_base_ptes(u64 base_pte);
492void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 507void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
493 u64 dirty_mask, u64 nx_mask, u64 x_mask); 508 u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
494 509
495int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 510int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
496void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 511void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -587,12 +602,14 @@ unsigned long segment_base(u16 selector);
587 602
588void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); 603void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
589void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 604void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
590 const u8 *new, int bytes); 605 const u8 *new, int bytes,
606 bool guest_initiated);
591int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); 607int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
592void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); 608void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
593int kvm_mmu_load(struct kvm_vcpu *vcpu); 609int kvm_mmu_load(struct kvm_vcpu *vcpu);
594void kvm_mmu_unload(struct kvm_vcpu *vcpu); 610void kvm_mmu_unload(struct kvm_vcpu *vcpu);
595void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 611void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
612void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
596 613
597int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 614int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
598 615
@@ -607,6 +624,8 @@ void kvm_disable_tdp(void);
607int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); 624int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
608int complete_pio(struct kvm_vcpu *vcpu); 625int complete_pio(struct kvm_vcpu *vcpu);
609 626
627struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
628
610static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) 629static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
611{ 630{
612 struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); 631 struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
@@ -702,18 +721,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
702 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 721 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
703} 722}
704 723
705#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
706#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
707#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
708#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
709#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
710#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
711#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
712#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
713#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
714#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
715#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
716
717#define MSR_IA32_TIME_STAMP_COUNTER 0x010 724#define MSR_IA32_TIME_STAMP_COUNTER 0x010
718 725
719#define TSS_IOPB_BASE_OFFSET 0x66 726#define TSS_IOPB_BASE_OFFSET 0x66
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 25179a29f208..6a159732881a 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -123,6 +123,7 @@ struct decode_cache {
123 u8 ad_bytes; 123 u8 ad_bytes;
124 u8 rex_prefix; 124 u8 rex_prefix;
125 struct operand src; 125 struct operand src;
126 struct operand src2;
126 struct operand dst; 127 struct operand dst;
127 bool has_seg_override; 128 bool has_seg_override;
128 u8 seg_override; 129 u8 seg_override;
@@ -146,22 +147,18 @@ struct x86_emulate_ctxt {
146 /* Register state before/after emulation. */ 147 /* Register state before/after emulation. */
147 struct kvm_vcpu *vcpu; 148 struct kvm_vcpu *vcpu;
148 149
149 /* Linear faulting address (if emulating a page-faulting instruction) */
150 unsigned long eflags; 150 unsigned long eflags;
151
152 /* Emulated execution mode, represented by an X86EMUL_MODE value. */ 151 /* Emulated execution mode, represented by an X86EMUL_MODE value. */
153 int mode; 152 int mode;
154
155 u32 cs_base; 153 u32 cs_base;
156 154
157 /* decode cache */ 155 /* decode cache */
158
159 struct decode_cache decode; 156 struct decode_cache decode;
160}; 157};
161 158
162/* Repeat String Operation Prefix */ 159/* Repeat String Operation Prefix */
163#define REPE_PREFIX 1 160#define REPE_PREFIX 1
164#define REPNE_PREFIX 2 161#define REPNE_PREFIX 2
165 162
166/* Execution mode, passed to the emulator. */ 163/* Execution mode, passed to the emulator. */
167#define X86EMUL_MODE_REAL 0 /* Real mode. */ 164#define X86EMUL_MODE_REAL 0 /* Real mode. */
@@ -170,7 +167,7 @@ struct x86_emulate_ctxt {
170#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ 167#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
171 168
172/* Host execution mode. */ 169/* Host execution mode. */
173#if defined(__i386__) 170#if defined(CONFIG_X86_32)
174#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 171#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
175#elif defined(CONFIG_X86_64) 172#elif defined(CONFIG_X86_64)
176#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 173#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 91885c28f66b..62d14ce3cd00 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -6,13 +6,13 @@
6#include <asm/mpspec_def.h> 6#include <asm/mpspec_def.h>
7 7
8extern int apic_version[MAX_APICS]; 8extern int apic_version[MAX_APICS];
9extern int pic_mode;
9 10
10#ifdef CONFIG_X86_32 11#ifdef CONFIG_X86_32
11#include <mach_mpspec.h> 12#include <mach_mpspec.h>
12 13
13extern unsigned int def_to_bigsmp; 14extern unsigned int def_to_bigsmp;
14extern u8 apicid_2_node[]; 15extern u8 apicid_2_node[];
15extern int pic_mode;
16 16
17#ifdef CONFIG_X86_NUMAQ 17#ifdef CONFIG_X86_NUMAQ
18extern int mp_bus_id_to_node[MAX_MP_BUSSES]; 18extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 7c1e4258b31e..cb988aab716d 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -57,6 +57,31 @@ struct mtrr_gentry {
57}; 57};
58#endif /* !__i386__ */ 58#endif /* !__i386__ */
59 59
60struct mtrr_var_range {
61 u32 base_lo;
62 u32 base_hi;
63 u32 mask_lo;
64 u32 mask_hi;
65};
66
67/* In the Intel processor's MTRR interface, the MTRR type is always held in
68 an 8 bit field: */
69typedef u8 mtrr_type;
70
71#define MTRR_NUM_FIXED_RANGES 88
72#define MTRR_MAX_VAR_RANGES 256
73
74struct mtrr_state_type {
75 struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
76 mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
77 unsigned char enabled;
78 unsigned char have_fixed;
79 mtrr_type def_type;
80};
81
82#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
83#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
84
60/* These are the various ioctls */ 85/* These are the various ioctls */
61#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) 86#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry)
62#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) 87#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)
diff --git a/arch/x86/pci/pci.h b/arch/x86/include/asm/pci_x86.h
index 1959018aac02..e60fd3e14bdf 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -57,7 +57,8 @@ extern struct pci_ops pci_root_ops;
57struct irq_info { 57struct irq_info {
58 u8 bus, devfn; /* Bus, device and function */ 58 u8 bus, devfn; /* Bus, device and function */
59 struct { 59 struct {
60 u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ 60 u8 link; /* IRQ line ID, chipset dependent,
61 0 = not routed */
61 u16 bitmap; /* Available IRQs */ 62 u16 bitmap; /* Available IRQs */
62 } __attribute__((packed)) irq[4]; 63 } __attribute__((packed)) irq[4];
63 u8 slot; /* Slot number, 0=onboard */ 64 u8 slot; /* Slot number, 0=onboard */
@@ -69,11 +70,13 @@ struct irq_routing_table {
69 u16 version; /* PIRQ_VERSION */ 70 u16 version; /* PIRQ_VERSION */
70 u16 size; /* Table size in bytes */ 71 u16 size; /* Table size in bytes */
71 u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ 72 u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */
72 u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ 73 u16 exclusive_irqs; /* IRQs devoted exclusively to
73 u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ 74 PCI usage */
75 u16 rtr_vendor, rtr_device; /* Vendor and device ID of
76 interrupt router */
74 u32 miniport_data; /* Crap */ 77 u32 miniport_data; /* Crap */
75 u8 rfu[11]; 78 u8 rfu[11];
76 u8 checksum; /* Modulo 256 checksum must give zero */ 79 u8 checksum; /* Modulo 256 checksum must give 0 */
77 struct irq_info slots[0]; 80 struct irq_info slots[0];
78} __attribute__((packed)); 81} __attribute__((packed));
79 82
@@ -148,15 +151,15 @@ static inline unsigned int mmio_config_readl(void __iomem *pos)
148 151
149static inline void mmio_config_writeb(void __iomem *pos, u8 val) 152static inline void mmio_config_writeb(void __iomem *pos, u8 val)
150{ 153{
151 asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); 154 asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
152} 155}
153 156
154static inline void mmio_config_writew(void __iomem *pos, u16 val) 157static inline void mmio_config_writew(void __iomem *pos, u16 val)
155{ 158{
156 asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); 159 asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
157} 160}
158 161
159static inline void mmio_config_writel(void __iomem *pos, u32 val) 162static inline void mmio_config_writel(void __iomem *pos, u32 val)
160{ 163{
161 asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); 164 asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
162} 165}
diff --git a/arch/x86/kvm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e869..1b8afa78e869 100644
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/include/asm/svm.h
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
new file mode 100644
index 000000000000..ffb08be2a530
--- /dev/null
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -0,0 +1,101 @@
1/*
2 * sys_ia32.h - Linux ia32 syscall interfaces
3 *
4 * Copyright (c) 2008 Jaswinder Singh Rajput
5 *
6 * This file is released under the GPLv2.
7 * See the file COPYING for more details.
8 */
9
10#ifndef _ASM_X86_SYS_IA32_H
11#define _ASM_X86_SYS_IA32_H
12
13#include <linux/compiler.h>
14#include <linux/linkage.h>
15#include <linux/types.h>
16#include <linux/signal.h>
17#include <asm/compat.h>
18#include <asm/ia32.h>
19
20/* ia32/sys_ia32.c */
21asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long);
22asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
23
24asmlinkage long sys32_stat64(char __user *, struct stat64 __user *);
25asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
26asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
27asmlinkage long sys32_fstatat(unsigned int, char __user *,
28 struct stat64 __user *, int);
29struct mmap_arg_struct;
30asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
31asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
32
33asmlinkage long sys32_pipe(int __user *);
34struct sigaction32;
35struct old_sigaction32;
36asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
37 struct sigaction32 __user *, unsigned int);
38asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
39 struct old_sigaction32 __user *);
40asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
41 compat_sigset_t __user *, unsigned int);
42asmlinkage long sys32_alarm(unsigned int);
43
44struct sel_arg_struct;
45asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
46asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
47asmlinkage long sys32_sysfs(int, u32, u32);
48
49asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
50 struct compat_timespec __user *);
51asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
52asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
53
54#ifdef CONFIG_SYSCTL_SYSCALL
55struct sysctl_ia32;
56asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
57#endif
58
59asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
60asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
61
62asmlinkage long sys32_personality(unsigned long);
63asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
64
65asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
66 unsigned long, unsigned long, unsigned long);
67
68struct oldold_utsname;
69struct old_utsname;
70asmlinkage long sys32_olduname(struct oldold_utsname __user *);
71long sys32_uname(struct old_utsname __user *);
72
73long sys32_ustat(unsigned, struct ustat32 __user *);
74
75asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
76 compat_uptr_t __user *, struct pt_regs *);
77asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
78
79long sys32_lseek(unsigned int, int, unsigned int);
80long sys32_kill(int, int);
81long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
82long sys32_vm86_warning(void);
83long sys32_lookup_dcookie(u32, u32, char __user *, size_t);
84
85asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
86asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
87 unsigned, unsigned, int);
88asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
89asmlinkage long sys32_fallocate(int, int, unsigned,
90 unsigned, unsigned, unsigned);
91
92/* ia32/ia32_signal.c */
93asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
94asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
95 stack_ia32_t __user *, struct pt_regs *);
96asmlinkage long sys32_sigreturn(struct pt_regs *);
97asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
98
99/* ia32/ipc32.c */
100asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
101#endif /* _ASM_X86_SYS_IA32_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index e2363253bbbf..50423c7b56b2 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -133,61 +133,61 @@ struct bau_msg_payload {
133 * see table 4.2.3.0.1 in broacast_assist spec. 133 * see table 4.2.3.0.1 in broacast_assist spec.
134 */ 134 */
135struct bau_msg_header { 135struct bau_msg_header {
136 int dest_subnodeid:6; /* must be zero */ 136 unsigned int dest_subnodeid:6; /* must be zero */
137 /* bits 5:0 */ 137 /* bits 5:0 */
138 int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ 138 unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
139 /* bits 20:6 */ 139 /* bits 20:6 */ /* first bit in node_map */
140 int command:8; /* message type */ 140 unsigned int command:8; /* message type */
141 /* bits 28:21 */ 141 /* bits 28:21 */
142 /* 0x38: SN3net EndPoint Message */ 142 /* 0x38: SN3net EndPoint Message */
143 int rsvd_1:3; /* must be zero */ 143 unsigned int rsvd_1:3; /* must be zero */
144 /* bits 31:29 */ 144 /* bits 31:29 */
145 /* int will align on 32 bits */ 145 /* int will align on 32 bits */
146 int rsvd_2:9; /* must be zero */ 146 unsigned int rsvd_2:9; /* must be zero */
147 /* bits 40:32 */ 147 /* bits 40:32 */
148 /* Suppl_A is 56-41 */ 148 /* Suppl_A is 56-41 */
149 int payload_2a:8; /* becomes byte 16 of msg */ 149 unsigned int payload_2a:8;/* becomes byte 16 of msg */
150 /* bits 48:41 */ /* not currently using */ 150 /* bits 48:41 */ /* not currently using */
151 int payload_2b:8; /* becomes byte 17 of msg */ 151 unsigned int payload_2b:8;/* becomes byte 17 of msg */
152 /* bits 56:49 */ /* not currently using */ 152 /* bits 56:49 */ /* not currently using */
153 /* Address field (96:57) is never used as an 153 /* Address field (96:57) is never used as an
154 address (these are address bits 42:3) */ 154 address (these are address bits 42:3) */
155 int rsvd_3:1; /* must be zero */ 155 unsigned int rsvd_3:1; /* must be zero */
156 /* bit 57 */ 156 /* bit 57 */
157 /* address bits 27:4 are payload */ 157 /* address bits 27:4 are payload */
158 /* these 24 bits become bytes 12-14 of msg */ 158 /* these 24 bits become bytes 12-14 of msg */
159 int replied_to:1; /* sent as 0 by the source to byte 12 */ 159 unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
160 /* bit 58 */ 160 /* bit 58 */
161 161
162 int payload_1a:5; /* not currently used */ 162 unsigned int payload_1a:5;/* not currently used */
163 /* bits 63:59 */ 163 /* bits 63:59 */
164 int payload_1b:8; /* not currently used */ 164 unsigned int payload_1b:8;/* not currently used */
165 /* bits 71:64 */ 165 /* bits 71:64 */
166 int payload_1c:8; /* not currently used */ 166 unsigned int payload_1c:8;/* not currently used */
167 /* bits 79:72 */ 167 /* bits 79:72 */
168 int payload_1d:2; /* not currently used */ 168 unsigned int payload_1d:2;/* not currently used */
169 /* bits 81:80 */ 169 /* bits 81:80 */
170 170
171 int rsvd_4:7; /* must be zero */ 171 unsigned int rsvd_4:7; /* must be zero */
172 /* bits 88:82 */ 172 /* bits 88:82 */
173 int sw_ack_flag:1; /* software acknowledge flag */ 173 unsigned int sw_ack_flag:1;/* software acknowledge flag */
174 /* bit 89 */ 174 /* bit 89 */
175 /* INTD trasactions at destination are to 175 /* INTD trasactions at destination are to
176 wait for software acknowledge */ 176 wait for software acknowledge */
177 int rsvd_5:6; /* must be zero */ 177 unsigned int rsvd_5:6; /* must be zero */
178 /* bits 95:90 */ 178 /* bits 95:90 */
179 int rsvd_6:5; /* must be zero */ 179 unsigned int rsvd_6:5; /* must be zero */
180 /* bits 100:96 */ 180 /* bits 100:96 */
181 int int_both:1; /* if 1, interrupt both sockets on the blade */ 181 unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */
182 /* bit 101*/ 182 /* bit 101*/
183 int fairness:3; /* usually zero */ 183 unsigned int fairness:3;/* usually zero */
184 /* bits 104:102 */ 184 /* bits 104:102 */
185 int multilevel:1; /* multi-level multicast format */ 185 unsigned int multilevel:1; /* multi-level multicast format */
186 /* bit 105 */ 186 /* bit 105 */
187 /* 0 for TLB: endpoint multi-unicast messages */ 187 /* 0 for TLB: endpoint multi-unicast messages */
188 int chaining:1; /* next descriptor is part of this activation*/ 188 unsigned int chaining:1;/* next descriptor is part of this activation*/
189 /* bit 106 */ 189 /* bit 106 */
190 int rsvd_7:21; /* must be zero */ 190 unsigned int rsvd_7:21; /* must be zero */
191 /* bits 127:107 */ 191 /* bits 127:107 */
192}; 192};
193 193
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
new file mode 100644
index 000000000000..593636275238
--- /dev/null
+++ b/arch/x86/include/asm/virtext.h
@@ -0,0 +1,132 @@
1/* CPU virtualization extensions handling
2 *
3 * This should carry the code for handling CPU virtualization extensions
4 * that needs to live in the kernel core.
5 *
6 * Author: Eduardo Habkost <ehabkost@redhat.com>
7 *
8 * Copyright (C) 2008, Red Hat Inc.
9 *
10 * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
14 */
15#ifndef _ASM_X86_VIRTEX_H
16#define _ASM_X86_VIRTEX_H
17
18#include <asm/processor.h>
19#include <asm/system.h>
20
21#include <asm/vmx.h>
22#include <asm/svm.h>
23
24/*
25 * VMX functions:
26 */
27
28static inline int cpu_has_vmx(void)
29{
30 unsigned long ecx = cpuid_ecx(1);
31 return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
32}
33
34
35/** Disable VMX on the current CPU
36 *
37 * vmxoff causes a undefined-opcode exception if vmxon was not run
38 * on the CPU previously. Only call this function if you know VMX
39 * is enabled.
40 */
41static inline void cpu_vmxoff(void)
42{
43 asm volatile (ASM_VMX_VMXOFF : : : "cc");
44 write_cr4(read_cr4() & ~X86_CR4_VMXE);
45}
46
47static inline int cpu_vmx_enabled(void)
48{
49 return read_cr4() & X86_CR4_VMXE;
50}
51
52/** Disable VMX if it is enabled on the current CPU
53 *
54 * You shouldn't call this if cpu_has_vmx() returns 0.
55 */
56static inline void __cpu_emergency_vmxoff(void)
57{
58 if (cpu_vmx_enabled())
59 cpu_vmxoff();
60}
61
62/** Disable VMX if it is supported and enabled on the current CPU
63 */
64static inline void cpu_emergency_vmxoff(void)
65{
66 if (cpu_has_vmx())
67 __cpu_emergency_vmxoff();
68}
69
70
71
72
73/*
74 * SVM functions:
75 */
76
77/** Check if the CPU has SVM support
78 *
79 * You can use the 'msg' arg to get a message describing the problem,
80 * if the function returns zero. Simply pass NULL if you are not interested
81 * on the messages; gcc should take care of not generating code for
82 * the messages on this case.
83 */
84static inline int cpu_has_svm(const char **msg)
85{
86 uint32_t eax, ebx, ecx, edx;
87
88 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
89 if (msg)
90 *msg = "not amd";
91 return 0;
92 }
93
94 cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
95 if (eax < SVM_CPUID_FUNC) {
96 if (msg)
97 *msg = "can't execute cpuid_8000000a";
98 return 0;
99 }
100
101 cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
102 if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
103 if (msg)
104 *msg = "svm not available";
105 return 0;
106 }
107 return 1;
108}
109
110
111/** Disable SVM on the current CPU
112 *
113 * You should call this only if cpu_has_svm() returned true.
114 */
115static inline void cpu_svm_disable(void)
116{
117 uint64_t efer;
118
119 wrmsrl(MSR_VM_HSAVE_PA, 0);
120 rdmsrl(MSR_EFER, efer);
121 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
122}
123
124/** Makes sure SVM is disabled, if it is supported on the CPU
125 */
126static inline void cpu_emergency_svm_disable(void)
127{
128 if (cpu_has_svm(NULL))
129 cpu_svm_disable();
130}
131
132#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/include/asm/vmx.h
index ec5edc339da6..d0238e6151d8 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -63,10 +63,13 @@
63 63
64#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 64#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
65#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 65#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
66#define VM_EXIT_SAVE_IA32_PAT 0x00040000
67#define VM_EXIT_LOAD_IA32_PAT 0x00080000
66 68
67#define VM_ENTRY_IA32E_MODE 0x00000200 69#define VM_ENTRY_IA32E_MODE 0x00000200
68#define VM_ENTRY_SMM 0x00000400 70#define VM_ENTRY_SMM 0x00000400
69#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 71#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
72#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
70 73
71/* VMCS Encodings */ 74/* VMCS Encodings */
72enum vmcs_field { 75enum vmcs_field {
@@ -112,6 +115,8 @@ enum vmcs_field {
112 VMCS_LINK_POINTER_HIGH = 0x00002801, 115 VMCS_LINK_POINTER_HIGH = 0x00002801,
113 GUEST_IA32_DEBUGCTL = 0x00002802, 116 GUEST_IA32_DEBUGCTL = 0x00002802,
114 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, 117 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
118 GUEST_IA32_PAT = 0x00002804,
119 GUEST_IA32_PAT_HIGH = 0x00002805,
115 GUEST_PDPTR0 = 0x0000280a, 120 GUEST_PDPTR0 = 0x0000280a,
116 GUEST_PDPTR0_HIGH = 0x0000280b, 121 GUEST_PDPTR0_HIGH = 0x0000280b,
117 GUEST_PDPTR1 = 0x0000280c, 122 GUEST_PDPTR1 = 0x0000280c,
@@ -120,6 +125,8 @@ enum vmcs_field {
120 GUEST_PDPTR2_HIGH = 0x0000280f, 125 GUEST_PDPTR2_HIGH = 0x0000280f,
121 GUEST_PDPTR3 = 0x00002810, 126 GUEST_PDPTR3 = 0x00002810,
122 GUEST_PDPTR3_HIGH = 0x00002811, 127 GUEST_PDPTR3_HIGH = 0x00002811,
128 HOST_IA32_PAT = 0x00002c00,
129 HOST_IA32_PAT_HIGH = 0x00002c01,
123 PIN_BASED_VM_EXEC_CONTROL = 0x00004000, 130 PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
124 CPU_BASED_VM_EXEC_CONTROL = 0x00004002, 131 CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
125 EXCEPTION_BITMAP = 0x00004004, 132 EXCEPTION_BITMAP = 0x00004004,
@@ -331,8 +338,9 @@ enum vmcs_field {
331 338
332#define AR_RESERVD_MASK 0xfffe0f00 339#define AR_RESERVD_MASK 0xfffe0f00
333 340
334#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 341#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0)
335#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 342#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1)
343#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2)
336 344
337#define VMX_NR_VPIDS (1 << 16) 345#define VMX_NR_VPIDS (1 << 16)
338#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 346#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
@@ -356,4 +364,19 @@ enum vmcs_field {
356 364
357#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 365#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
358 366
367
368#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
369#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
370#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
371#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
372#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
373#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
374#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
375#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
376#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
377#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
378#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
379
380
381
359#endif 382#endif
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da717b350..5113c080f0c4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -20,8 +20,12 @@
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/gfp.h> 21#include <linux/gfp.h>
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/debugfs.h>
23#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
24#include <linux/iommu-helper.h> 25#include <linux/iommu-helper.h>
26#ifdef CONFIG_IOMMU_API
27#include <linux/iommu.h>
28#endif
25#include <asm/proto.h> 29#include <asm/proto.h>
26#include <asm/iommu.h> 30#include <asm/iommu.h>
27#include <asm/gart.h> 31#include <asm/gart.h>
@@ -38,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
38static LIST_HEAD(iommu_pd_list); 42static LIST_HEAD(iommu_pd_list);
39static DEFINE_SPINLOCK(iommu_pd_list_lock); 43static DEFINE_SPINLOCK(iommu_pd_list_lock);
40 44
45#ifdef CONFIG_IOMMU_API
46static struct iommu_ops amd_iommu_ops;
47#endif
48
41/* 49/*
42 * general struct to manage commands send to an IOMMU 50 * general struct to manage commands send to an IOMMU
43 */ 51 */
@@ -47,6 +55,68 @@ struct iommu_cmd {
47 55
48static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 56static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
49 struct unity_map_entry *e); 57 struct unity_map_entry *e);
58static struct dma_ops_domain *find_protection_domain(u16 devid);
59
60
61#ifdef CONFIG_AMD_IOMMU_STATS
62
63/*
64 * Initialization code for statistics collection
65 */
66
67DECLARE_STATS_COUNTER(compl_wait);
68DECLARE_STATS_COUNTER(cnt_map_single);
69DECLARE_STATS_COUNTER(cnt_unmap_single);
70DECLARE_STATS_COUNTER(cnt_map_sg);
71DECLARE_STATS_COUNTER(cnt_unmap_sg);
72DECLARE_STATS_COUNTER(cnt_alloc_coherent);
73DECLARE_STATS_COUNTER(cnt_free_coherent);
74DECLARE_STATS_COUNTER(cross_page);
75DECLARE_STATS_COUNTER(domain_flush_single);
76DECLARE_STATS_COUNTER(domain_flush_all);
77DECLARE_STATS_COUNTER(alloced_io_mem);
78DECLARE_STATS_COUNTER(total_map_requests);
79
80static struct dentry *stats_dir;
81static struct dentry *de_isolate;
82static struct dentry *de_fflush;
83
84static void amd_iommu_stats_add(struct __iommu_counter *cnt)
85{
86 if (stats_dir == NULL)
87 return;
88
89 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
90 &cnt->value);
91}
92
93static void amd_iommu_stats_init(void)
94{
95 stats_dir = debugfs_create_dir("amd-iommu", NULL);
96 if (stats_dir == NULL)
97 return;
98
99 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
100 (u32 *)&amd_iommu_isolate);
101
102 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
103 (u32 *)&amd_iommu_unmap_flush);
104
105 amd_iommu_stats_add(&compl_wait);
106 amd_iommu_stats_add(&cnt_map_single);
107 amd_iommu_stats_add(&cnt_unmap_single);
108 amd_iommu_stats_add(&cnt_map_sg);
109 amd_iommu_stats_add(&cnt_unmap_sg);
110 amd_iommu_stats_add(&cnt_alloc_coherent);
111 amd_iommu_stats_add(&cnt_free_coherent);
112 amd_iommu_stats_add(&cross_page);
113 amd_iommu_stats_add(&domain_flush_single);
114 amd_iommu_stats_add(&domain_flush_all);
115 amd_iommu_stats_add(&alloced_io_mem);
116 amd_iommu_stats_add(&total_map_requests);
117}
118
119#endif
50 120
51/* returns !0 if the IOMMU is caching non-present entries in its TLB */ 121/* returns !0 if the IOMMU is caching non-present entries in its TLB */
52static int iommu_has_npcache(struct amd_iommu *iommu) 122static int iommu_has_npcache(struct amd_iommu *iommu)
@@ -189,13 +259,55 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
189 spin_lock_irqsave(&iommu->lock, flags); 259 spin_lock_irqsave(&iommu->lock, flags);
190 ret = __iommu_queue_command(iommu, cmd); 260 ret = __iommu_queue_command(iommu, cmd);
191 if (!ret) 261 if (!ret)
192 iommu->need_sync = 1; 262 iommu->need_sync = true;
193 spin_unlock_irqrestore(&iommu->lock, flags); 263 spin_unlock_irqrestore(&iommu->lock, flags);
194 264
195 return ret; 265 return ret;
196} 266}
197 267
198/* 268/*
269 * This function waits until an IOMMU has completed a completion
270 * wait command
271 */
272static void __iommu_wait_for_completion(struct amd_iommu *iommu)
273{
274 int ready = 0;
275 unsigned status = 0;
276 unsigned long i = 0;
277
278 INC_STATS_COUNTER(compl_wait);
279
280 while (!ready && (i < EXIT_LOOP_COUNT)) {
281 ++i;
282 /* wait for the bit to become one */
283 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
284 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
285 }
286
287 /* set bit back to zero */
288 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
289 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
290
291 if (unlikely(i == EXIT_LOOP_COUNT))
292 panic("AMD IOMMU: Completion wait loop failed\n");
293}
294
295/*
296 * This function queues a completion wait command into the command
297 * buffer of an IOMMU
298 */
299static int __iommu_completion_wait(struct amd_iommu *iommu)
300{
301 struct iommu_cmd cmd;
302
303 memset(&cmd, 0, sizeof(cmd));
304 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
305 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
306
307 return __iommu_queue_command(iommu, &cmd);
308}
309
310/*
199 * This function is called whenever we need to ensure that the IOMMU has 311 * This function is called whenever we need to ensure that the IOMMU has
200 * completed execution of all commands we sent. It sends a 312 * completed execution of all commands we sent. It sends a
201 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 313 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
@@ -204,40 +316,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
204 */ 316 */
205static int iommu_completion_wait(struct amd_iommu *iommu) 317static int iommu_completion_wait(struct amd_iommu *iommu)
206{ 318{
207 int ret = 0, ready = 0; 319 int ret = 0;
208 unsigned status = 0; 320 unsigned long flags;
209 struct iommu_cmd cmd;
210 unsigned long flags, i = 0;
211
212 memset(&cmd, 0, sizeof(cmd));
213 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
214 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
215 321
216 spin_lock_irqsave(&iommu->lock, flags); 322 spin_lock_irqsave(&iommu->lock, flags);
217 323
218 if (!iommu->need_sync) 324 if (!iommu->need_sync)
219 goto out; 325 goto out;
220 326
221 iommu->need_sync = 0; 327 ret = __iommu_completion_wait(iommu);
222 328
223 ret = __iommu_queue_command(iommu, &cmd); 329 iommu->need_sync = false;
224 330
225 if (ret) 331 if (ret)
226 goto out; 332 goto out;
227 333
228 while (!ready && (i < EXIT_LOOP_COUNT)) { 334 __iommu_wait_for_completion(iommu);
229 ++i;
230 /* wait for the bit to become one */
231 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
232 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
233 }
234
235 /* set bit back to zero */
236 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
237 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
238
239 if (unlikely(i == EXIT_LOOP_COUNT))
240 panic("AMD IOMMU: Completion wait loop failed\n");
241 335
242out: 336out:
243 spin_unlock_irqrestore(&iommu->lock, flags); 337 spin_unlock_irqrestore(&iommu->lock, flags);
@@ -264,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
264 return ret; 358 return ret;
265} 359}
266 360
361static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
362 u16 domid, int pde, int s)
363{
364 memset(cmd, 0, sizeof(*cmd));
365 address &= PAGE_MASK;
366 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
367 cmd->data[1] |= domid;
368 cmd->data[2] = lower_32_bits(address);
369 cmd->data[3] = upper_32_bits(address);
370 if (s) /* size bit - we flush more than one 4kb page */
371 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
372 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
373 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
374}
375
267/* 376/*
268 * Generic command send function for invalidaing TLB entries 377 * Generic command send function for invalidaing TLB entries
269 */ 378 */
@@ -273,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
273 struct iommu_cmd cmd; 382 struct iommu_cmd cmd;
274 int ret; 383 int ret;
275 384
276 memset(&cmd, 0, sizeof(cmd)); 385 __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
277 address &= PAGE_MASK;
278 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
279 cmd.data[1] |= domid;
280 cmd.data[2] = lower_32_bits(address);
281 cmd.data[3] = upper_32_bits(address);
282 if (s) /* size bit - we flush more than one 4kb page */
283 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
284 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
285 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
286 386
287 ret = iommu_queue_command(iommu, &cmd); 387 ret = iommu_queue_command(iommu, &cmd);
288 388
@@ -321,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
321{ 421{
322 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 422 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
323 423
424 INC_STATS_COUNTER(domain_flush_single);
425
324 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 426 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
325} 427}
326 428
429/*
430 * This function is used to flush the IO/TLB for a given protection domain
431 * on every IOMMU in the system
432 */
433static void iommu_flush_domain(u16 domid)
434{
435 unsigned long flags;
436 struct amd_iommu *iommu;
437 struct iommu_cmd cmd;
438
439 INC_STATS_COUNTER(domain_flush_all);
440
441 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
442 domid, 1, 1);
443
444 list_for_each_entry(iommu, &amd_iommu_list, list) {
445 spin_lock_irqsave(&iommu->lock, flags);
446 __iommu_queue_command(iommu, &cmd);
447 __iommu_completion_wait(iommu);
448 __iommu_wait_for_completion(iommu);
449 spin_unlock_irqrestore(&iommu->lock, flags);
450 }
451}
452
327/**************************************************************************** 453/****************************************************************************
328 * 454 *
329 * The functions below are used the create the page table mappings for 455 * The functions below are used the create the page table mappings for
@@ -338,10 +464,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
338 * supporting all features of AMD IOMMU page tables like level skipping 464 * supporting all features of AMD IOMMU page tables like level skipping
339 * and full 64 bit address spaces. 465 * and full 64 bit address spaces.
340 */ 466 */
341static int iommu_map(struct protection_domain *dom, 467static int iommu_map_page(struct protection_domain *dom,
342 unsigned long bus_addr, 468 unsigned long bus_addr,
343 unsigned long phys_addr, 469 unsigned long phys_addr,
344 int prot) 470 int prot)
345{ 471{
346 u64 __pte, *pte, *page; 472 u64 __pte, *pte, *page;
347 473
@@ -388,6 +514,28 @@ static int iommu_map(struct protection_domain *dom,
388 return 0; 514 return 0;
389} 515}
390 516
517static void iommu_unmap_page(struct protection_domain *dom,
518 unsigned long bus_addr)
519{
520 u64 *pte;
521
522 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
523
524 if (!IOMMU_PTE_PRESENT(*pte))
525 return;
526
527 pte = IOMMU_PTE_PAGE(*pte);
528 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
529
530 if (!IOMMU_PTE_PRESENT(*pte))
531 return;
532
533 pte = IOMMU_PTE_PAGE(*pte);
534 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
535
536 *pte = 0;
537}
538
391/* 539/*
392 * This function checks if a specific unity mapping entry is needed for 540 * This function checks if a specific unity mapping entry is needed for
393 * this specific IOMMU. 541 * this specific IOMMU.
@@ -440,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
440 588
441 for (addr = e->address_start; addr < e->address_end; 589 for (addr = e->address_start; addr < e->address_end;
442 addr += PAGE_SIZE) { 590 addr += PAGE_SIZE) {
443 ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); 591 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
444 if (ret) 592 if (ret)
445 return ret; 593 return ret;
446 /* 594 /*
@@ -571,6 +719,16 @@ static u16 domain_id_alloc(void)
571 return id; 719 return id;
572} 720}
573 721
722static void domain_id_free(int id)
723{
724 unsigned long flags;
725
726 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
727 if (id > 0 && id < MAX_DOMAIN_ID)
728 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
729 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
730}
731
574/* 732/*
575 * Used to reserve address ranges in the aperture (e.g. for exclusion 733 * Used to reserve address ranges in the aperture (e.g. for exclusion
576 * ranges. 734 * ranges.
@@ -587,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
587 iommu_area_reserve(dom->bitmap, start_page, pages); 745 iommu_area_reserve(dom->bitmap, start_page, pages);
588} 746}
589 747
590static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) 748static void free_pagetable(struct protection_domain *domain)
591{ 749{
592 int i, j; 750 int i, j;
593 u64 *p1, *p2, *p3; 751 u64 *p1, *p2, *p3;
594 752
595 p1 = dma_dom->domain.pt_root; 753 p1 = domain->pt_root;
596 754
597 if (!p1) 755 if (!p1)
598 return; 756 return;
@@ -613,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
613 } 771 }
614 772
615 free_page((unsigned long)p1); 773 free_page((unsigned long)p1);
774
775 domain->pt_root = NULL;
616} 776}
617 777
618/* 778/*
@@ -624,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
624 if (!dom) 784 if (!dom)
625 return; 785 return;
626 786
627 dma_ops_free_pagetable(dom); 787 free_pagetable(&dom->domain);
628 788
629 kfree(dom->pte_pages); 789 kfree(dom->pte_pages);
630 790
@@ -663,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
663 goto free_dma_dom; 823 goto free_dma_dom;
664 dma_dom->domain.mode = PAGE_MODE_3_LEVEL; 824 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
665 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 825 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
826 dma_dom->domain.flags = PD_DMA_OPS_MASK;
666 dma_dom->domain.priv = dma_dom; 827 dma_dom->domain.priv = dma_dom;
667 if (!dma_dom->domain.pt_root) 828 if (!dma_dom->domain.pt_root)
668 goto free_dma_dom; 829 goto free_dma_dom;
@@ -725,6 +886,15 @@ free_dma_dom:
725} 886}
726 887
727/* 888/*
889 * little helper function to check whether a given protection domain is a
890 * dma_ops domain
891 */
892static bool dma_ops_domain(struct protection_domain *domain)
893{
894 return domain->flags & PD_DMA_OPS_MASK;
895}
896
897/*
728 * Find out the protection domain structure for a given PCI device. This 898 * Find out the protection domain structure for a given PCI device. This
729 * will give us the pointer to the page table root for example. 899 * will give us the pointer to the page table root for example.
730 */ 900 */
@@ -744,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid)
744 * If a device is not yet associated with a domain, this function does 914 * If a device is not yet associated with a domain, this function does
745 * assigns it visible for the hardware 915 * assigns it visible for the hardware
746 */ 916 */
747static void set_device_domain(struct amd_iommu *iommu, 917static void attach_device(struct amd_iommu *iommu,
748 struct protection_domain *domain, 918 struct protection_domain *domain,
749 u16 devid) 919 u16 devid)
750{ 920{
751 unsigned long flags; 921 unsigned long flags;
752
753 u64 pte_root = virt_to_phys(domain->pt_root); 922 u64 pte_root = virt_to_phys(domain->pt_root);
754 923
924 domain->dev_cnt += 1;
925
755 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 926 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
756 << DEV_ENTRY_MODE_SHIFT; 927 << DEV_ENTRY_MODE_SHIFT;
757 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 928 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
@@ -767,6 +938,116 @@ static void set_device_domain(struct amd_iommu *iommu,
767 iommu_queue_inv_dev_entry(iommu, devid); 938 iommu_queue_inv_dev_entry(iommu, devid);
768} 939}
769 940
941/*
942 * Removes a device from a protection domain (unlocked)
943 */
944static void __detach_device(struct protection_domain *domain, u16 devid)
945{
946
947 /* lock domain */
948 spin_lock(&domain->lock);
949
950 /* remove domain from the lookup table */
951 amd_iommu_pd_table[devid] = NULL;
952
953 /* remove entry from the device table seen by the hardware */
954 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
955 amd_iommu_dev_table[devid].data[1] = 0;
956 amd_iommu_dev_table[devid].data[2] = 0;
957
958 /* decrease reference counter */
959 domain->dev_cnt -= 1;
960
961 /* ready */
962 spin_unlock(&domain->lock);
963}
964
965/*
966 * Removes a device from a protection domain (with devtable_lock held)
967 */
968static void detach_device(struct protection_domain *domain, u16 devid)
969{
970 unsigned long flags;
971
972 /* lock device table */
973 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
974 __detach_device(domain, devid);
975 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
976}
977
978static int device_change_notifier(struct notifier_block *nb,
979 unsigned long action, void *data)
980{
981 struct device *dev = data;
982 struct pci_dev *pdev = to_pci_dev(dev);
983 u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
984 struct protection_domain *domain;
985 struct dma_ops_domain *dma_domain;
986 struct amd_iommu *iommu;
987 int order = amd_iommu_aperture_order;
988 unsigned long flags;
989
990 if (devid > amd_iommu_last_bdf)
991 goto out;
992
993 devid = amd_iommu_alias_table[devid];
994
995 iommu = amd_iommu_rlookup_table[devid];
996 if (iommu == NULL)
997 goto out;
998
999 domain = domain_for_device(devid);
1000
1001 if (domain && !dma_ops_domain(domain))
1002 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
1003 "to a non-dma-ops domain\n", dev_name(dev));
1004
1005 switch (action) {
1006 case BUS_NOTIFY_BOUND_DRIVER:
1007 if (domain)
1008 goto out;
1009 dma_domain = find_protection_domain(devid);
1010 if (!dma_domain)
1011 dma_domain = iommu->default_dom;
1012 attach_device(iommu, &dma_domain->domain, devid);
1013 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
1014 "device %s\n", dma_domain->domain.id, dev_name(dev));
1015 break;
1016 case BUS_NOTIFY_UNBIND_DRIVER:
1017 if (!domain)
1018 goto out;
1019 detach_device(domain, devid);
1020 break;
1021 case BUS_NOTIFY_ADD_DEVICE:
1022 /* allocate a protection domain if a device is added */
1023 dma_domain = find_protection_domain(devid);
1024 if (dma_domain)
1025 goto out;
1026 dma_domain = dma_ops_domain_alloc(iommu, order);
1027 if (!dma_domain)
1028 goto out;
1029 dma_domain->target_dev = devid;
1030
1031 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1032 list_add_tail(&dma_domain->list, &iommu_pd_list);
1033 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1034
1035 break;
1036 default:
1037 goto out;
1038 }
1039
1040 iommu_queue_inv_dev_entry(iommu, devid);
1041 iommu_completion_wait(iommu);
1042
1043out:
1044 return 0;
1045}
1046
1047struct notifier_block device_nb = {
1048 .notifier_call = device_change_notifier,
1049};
1050
770/***************************************************************************** 1051/*****************************************************************************
771 * 1052 *
772 * The next functions belong to the dma_ops mapping/unmapping code. 1053 * The next functions belong to the dma_ops mapping/unmapping code.
@@ -802,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
802 list_for_each_entry(entry, &iommu_pd_list, list) { 1083 list_for_each_entry(entry, &iommu_pd_list, list) {
803 if (entry->target_dev == devid) { 1084 if (entry->target_dev == devid) {
804 ret = entry; 1085 ret = entry;
805 list_del(&ret->list);
806 break; 1086 break;
807 } 1087 }
808 } 1088 }
@@ -853,14 +1133,13 @@ static int get_device_resources(struct device *dev,
853 if (!dma_dom) 1133 if (!dma_dom)
854 dma_dom = (*iommu)->default_dom; 1134 dma_dom = (*iommu)->default_dom;
855 *domain = &dma_dom->domain; 1135 *domain = &dma_dom->domain;
856 set_device_domain(*iommu, *domain, *bdf); 1136 attach_device(*iommu, *domain, *bdf);
857 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1137 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
858 "device ", (*domain)->id); 1138 "device %s\n", (*domain)->id, dev_name(dev));
859 print_devid(_bdf, 1);
860 } 1139 }
861 1140
862 if (domain_for_device(_bdf) == NULL) 1141 if (domain_for_device(_bdf) == NULL)
863 set_device_domain(*iommu, *domain, _bdf); 1142 attach_device(*iommu, *domain, _bdf);
864 1143
865 return 1; 1144 return 1;
866} 1145}
@@ -946,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev,
946 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 1225 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
947 paddr &= PAGE_MASK; 1226 paddr &= PAGE_MASK;
948 1227
1228 INC_STATS_COUNTER(total_map_requests);
1229
1230 if (pages > 1)
1231 INC_STATS_COUNTER(cross_page);
1232
949 if (align) 1233 if (align)
950 align_mask = (1UL << get_order(size)) - 1; 1234 align_mask = (1UL << get_order(size)) - 1;
951 1235
@@ -962,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev,
962 } 1246 }
963 address += offset; 1247 address += offset;
964 1248
1249 ADD_STATS_COUNTER(alloced_io_mem, size);
1250
965 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1251 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
966 iommu_flush_tlb(iommu, dma_dom->domain.id); 1252 iommu_flush_tlb(iommu, dma_dom->domain.id);
967 dma_dom->need_flush = false; 1253 dma_dom->need_flush = false;
@@ -998,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu,
998 start += PAGE_SIZE; 1284 start += PAGE_SIZE;
999 } 1285 }
1000 1286
1287 SUB_STATS_COUNTER(alloced_io_mem, size);
1288
1001 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1289 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1002 1290
1003 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 1291 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
@@ -1019,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1019 dma_addr_t addr; 1307 dma_addr_t addr;
1020 u64 dma_mask; 1308 u64 dma_mask;
1021 1309
1310 INC_STATS_COUNTER(cnt_map_single);
1311
1022 if (!check_device(dev)) 1312 if (!check_device(dev))
1023 return bad_dma_address; 1313 return bad_dma_address;
1024 1314
@@ -1030,6 +1320,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1030 /* device not handled by any AMD IOMMU */ 1320 /* device not handled by any AMD IOMMU */
1031 return (dma_addr_t)paddr; 1321 return (dma_addr_t)paddr;
1032 1322
1323 if (!dma_ops_domain(domain))
1324 return bad_dma_address;
1325
1033 spin_lock_irqsave(&domain->lock, flags); 1326 spin_lock_irqsave(&domain->lock, flags);
1034 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, 1327 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
1035 dma_mask); 1328 dma_mask);
@@ -1055,11 +1348,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
1055 struct protection_domain *domain; 1348 struct protection_domain *domain;
1056 u16 devid; 1349 u16 devid;
1057 1350
1351 INC_STATS_COUNTER(cnt_unmap_single);
1352
1058 if (!check_device(dev) || 1353 if (!check_device(dev) ||
1059 !get_device_resources(dev, &iommu, &domain, &devid)) 1354 !get_device_resources(dev, &iommu, &domain, &devid))
1060 /* device not handled by any AMD IOMMU */ 1355 /* device not handled by any AMD IOMMU */
1061 return; 1356 return;
1062 1357
1358 if (!dma_ops_domain(domain))
1359 return;
1360
1063 spin_lock_irqsave(&domain->lock, flags); 1361 spin_lock_irqsave(&domain->lock, flags);
1064 1362
1065 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1363 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
@@ -1104,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1104 int mapped_elems = 0; 1402 int mapped_elems = 0;
1105 u64 dma_mask; 1403 u64 dma_mask;
1106 1404
1405 INC_STATS_COUNTER(cnt_map_sg);
1406
1107 if (!check_device(dev)) 1407 if (!check_device(dev))
1108 return 0; 1408 return 0;
1109 1409
@@ -1114,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1114 if (!iommu || !domain) 1414 if (!iommu || !domain)
1115 return map_sg_no_iommu(dev, sglist, nelems, dir); 1415 return map_sg_no_iommu(dev, sglist, nelems, dir);
1116 1416
1417 if (!dma_ops_domain(domain))
1418 return 0;
1419
1117 spin_lock_irqsave(&domain->lock, flags); 1420 spin_lock_irqsave(&domain->lock, flags);
1118 1421
1119 for_each_sg(sglist, s, nelems, i) { 1422 for_each_sg(sglist, s, nelems, i) {
@@ -1163,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1163 u16 devid; 1466 u16 devid;
1164 int i; 1467 int i;
1165 1468
1469 INC_STATS_COUNTER(cnt_unmap_sg);
1470
1166 if (!check_device(dev) || 1471 if (!check_device(dev) ||
1167 !get_device_resources(dev, &iommu, &domain, &devid)) 1472 !get_device_resources(dev, &iommu, &domain, &devid))
1168 return; 1473 return;
1169 1474
1475 if (!dma_ops_domain(domain))
1476 return;
1477
1170 spin_lock_irqsave(&domain->lock, flags); 1478 spin_lock_irqsave(&domain->lock, flags);
1171 1479
1172 for_each_sg(sglist, s, nelems, i) { 1480 for_each_sg(sglist, s, nelems, i) {
@@ -1194,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
1194 phys_addr_t paddr; 1502 phys_addr_t paddr;
1195 u64 dma_mask = dev->coherent_dma_mask; 1503 u64 dma_mask = dev->coherent_dma_mask;
1196 1504
1505 INC_STATS_COUNTER(cnt_alloc_coherent);
1506
1197 if (!check_device(dev)) 1507 if (!check_device(dev))
1198 return NULL; 1508 return NULL;
1199 1509
@@ -1212,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
1212 return virt_addr; 1522 return virt_addr;
1213 } 1523 }
1214 1524
1525 if (!dma_ops_domain(domain))
1526 goto out_free;
1527
1215 if (!dma_mask) 1528 if (!dma_mask)
1216 dma_mask = *dev->dma_mask; 1529 dma_mask = *dev->dma_mask;
1217 1530
@@ -1220,18 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
1220 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1533 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1221 size, DMA_BIDIRECTIONAL, true, dma_mask); 1534 size, DMA_BIDIRECTIONAL, true, dma_mask);
1222 1535
1223 if (*dma_addr == bad_dma_address) { 1536 if (*dma_addr == bad_dma_address)
1224 free_pages((unsigned long)virt_addr, get_order(size)); 1537 goto out_free;
1225 virt_addr = NULL;
1226 goto out;
1227 }
1228 1538
1229 iommu_completion_wait(iommu); 1539 iommu_completion_wait(iommu);
1230 1540
1231out:
1232 spin_unlock_irqrestore(&domain->lock, flags); 1541 spin_unlock_irqrestore(&domain->lock, flags);
1233 1542
1234 return virt_addr; 1543 return virt_addr;
1544
1545out_free:
1546
1547 free_pages((unsigned long)virt_addr, get_order(size));
1548
1549 return NULL;
1235} 1550}
1236 1551
1237/* 1552/*
@@ -1245,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size,
1245 struct protection_domain *domain; 1560 struct protection_domain *domain;
1246 u16 devid; 1561 u16 devid;
1247 1562
1563 INC_STATS_COUNTER(cnt_free_coherent);
1564
1248 if (!check_device(dev)) 1565 if (!check_device(dev))
1249 return; 1566 return;
1250 1567
@@ -1253,6 +1570,9 @@ static void free_coherent(struct device *dev, size_t size,
1253 if (!iommu || !domain) 1570 if (!iommu || !domain)
1254 goto free_mem; 1571 goto free_mem;
1255 1572
1573 if (!dma_ops_domain(domain))
1574 goto free_mem;
1575
1256 spin_lock_irqsave(&domain->lock, flags); 1576 spin_lock_irqsave(&domain->lock, flags);
1257 1577
1258 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 1578 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
@@ -1296,7 +1616,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
1296 * we don't need to preallocate the protection domains anymore. 1616 * we don't need to preallocate the protection domains anymore.
1297 * For now we have to. 1617 * For now we have to.
1298 */ 1618 */
1299void prealloc_protection_domains(void) 1619static void prealloc_protection_domains(void)
1300{ 1620{
1301 struct pci_dev *dev = NULL; 1621 struct pci_dev *dev = NULL;
1302 struct dma_ops_domain *dma_dom; 1622 struct dma_ops_domain *dma_dom;
@@ -1305,7 +1625,7 @@ void prealloc_protection_domains(void)
1305 u16 devid; 1625 u16 devid;
1306 1626
1307 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1627 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1308 devid = (dev->bus->number << 8) | dev->devfn; 1628 devid = calc_devid(dev->bus->number, dev->devfn);
1309 if (devid > amd_iommu_last_bdf) 1629 if (devid > amd_iommu_last_bdf)
1310 continue; 1630 continue;
1311 devid = amd_iommu_alias_table[devid]; 1631 devid = amd_iommu_alias_table[devid];
@@ -1352,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void)
1352 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1672 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
1353 if (iommu->default_dom == NULL) 1673 if (iommu->default_dom == NULL)
1354 return -ENOMEM; 1674 return -ENOMEM;
1675 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
1355 ret = iommu_init_unity_mappings(iommu); 1676 ret = iommu_init_unity_mappings(iommu);
1356 if (ret) 1677 if (ret)
1357 goto free_domains; 1678 goto free_domains;
@@ -1375,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void)
1375 /* Make the driver finally visible to the drivers */ 1696 /* Make the driver finally visible to the drivers */
1376 dma_ops = &amd_iommu_dma_ops; 1697 dma_ops = &amd_iommu_dma_ops;
1377 1698
1699 register_iommu(&amd_iommu_ops);
1700
1701 bus_register_notifier(&pci_bus_type, &device_nb);
1702
1703 amd_iommu_stats_init();
1704
1378 return 0; 1705 return 0;
1379 1706
1380free_domains: 1707free_domains:
@@ -1386,3 +1713,224 @@ free_domains:
1386 1713
1387 return ret; 1714 return ret;
1388} 1715}
1716
1717/*****************************************************************************
1718 *
1719 * The following functions belong to the exported interface of AMD IOMMU
1720 *
1721 * This interface allows access to lower level functions of the IOMMU
1722 * like protection domain handling and assignement of devices to domains
1723 * which is not possible with the dma_ops interface.
1724 *
1725 *****************************************************************************/
1726
1727static void cleanup_domain(struct protection_domain *domain)
1728{
1729 unsigned long flags;
1730 u16 devid;
1731
1732 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1733
1734 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
1735 if (amd_iommu_pd_table[devid] == domain)
1736 __detach_device(domain, devid);
1737
1738 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1739}
1740
1741static int amd_iommu_domain_init(struct iommu_domain *dom)
1742{
1743 struct protection_domain *domain;
1744
1745 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1746 if (!domain)
1747 return -ENOMEM;
1748
1749 spin_lock_init(&domain->lock);
1750 domain->mode = PAGE_MODE_3_LEVEL;
1751 domain->id = domain_id_alloc();
1752 if (!domain->id)
1753 goto out_free;
1754 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1755 if (!domain->pt_root)
1756 goto out_free;
1757
1758 dom->priv = domain;
1759
1760 return 0;
1761
1762out_free:
1763 kfree(domain);
1764
1765 return -ENOMEM;
1766}
1767
1768static void amd_iommu_domain_destroy(struct iommu_domain *dom)
1769{
1770 struct protection_domain *domain = dom->priv;
1771
1772 if (!domain)
1773 return;
1774
1775 if (domain->dev_cnt > 0)
1776 cleanup_domain(domain);
1777
1778 BUG_ON(domain->dev_cnt != 0);
1779
1780 free_pagetable(domain);
1781
1782 domain_id_free(domain->id);
1783
1784 kfree(domain);
1785
1786 dom->priv = NULL;
1787}
1788
1789static void amd_iommu_detach_device(struct iommu_domain *dom,
1790 struct device *dev)
1791{
1792 struct protection_domain *domain = dom->priv;
1793 struct amd_iommu *iommu;
1794 struct pci_dev *pdev;
1795 u16 devid;
1796
1797 if (dev->bus != &pci_bus_type)
1798 return;
1799
1800 pdev = to_pci_dev(dev);
1801
1802 devid = calc_devid(pdev->bus->number, pdev->devfn);
1803
1804 if (devid > 0)
1805 detach_device(domain, devid);
1806
1807 iommu = amd_iommu_rlookup_table[devid];
1808 if (!iommu)
1809 return;
1810
1811 iommu_queue_inv_dev_entry(iommu, devid);
1812 iommu_completion_wait(iommu);
1813}
1814
1815static int amd_iommu_attach_device(struct iommu_domain *dom,
1816 struct device *dev)
1817{
1818 struct protection_domain *domain = dom->priv;
1819 struct protection_domain *old_domain;
1820 struct amd_iommu *iommu;
1821 struct pci_dev *pdev;
1822 u16 devid;
1823
1824 if (dev->bus != &pci_bus_type)
1825 return -EINVAL;
1826
1827 pdev = to_pci_dev(dev);
1828
1829 devid = calc_devid(pdev->bus->number, pdev->devfn);
1830
1831 if (devid >= amd_iommu_last_bdf ||
1832 devid != amd_iommu_alias_table[devid])
1833 return -EINVAL;
1834
1835 iommu = amd_iommu_rlookup_table[devid];
1836 if (!iommu)
1837 return -EINVAL;
1838
1839 old_domain = domain_for_device(devid);
1840 if (old_domain)
1841 return -EBUSY;
1842
1843 attach_device(iommu, domain, devid);
1844
1845 iommu_completion_wait(iommu);
1846
1847 return 0;
1848}
1849
1850static int amd_iommu_map_range(struct iommu_domain *dom,
1851 unsigned long iova, phys_addr_t paddr,
1852 size_t size, int iommu_prot)
1853{
1854 struct protection_domain *domain = dom->priv;
1855 unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
1856 int prot = 0;
1857 int ret;
1858
1859 if (iommu_prot & IOMMU_READ)
1860 prot |= IOMMU_PROT_IR;
1861 if (iommu_prot & IOMMU_WRITE)
1862 prot |= IOMMU_PROT_IW;
1863
1864 iova &= PAGE_MASK;
1865 paddr &= PAGE_MASK;
1866
1867 for (i = 0; i < npages; ++i) {
1868 ret = iommu_map_page(domain, iova, paddr, prot);
1869 if (ret)
1870 return ret;
1871
1872 iova += PAGE_SIZE;
1873 paddr += PAGE_SIZE;
1874 }
1875
1876 return 0;
1877}
1878
1879static void amd_iommu_unmap_range(struct iommu_domain *dom,
1880 unsigned long iova, size_t size)
1881{
1882
1883 struct protection_domain *domain = dom->priv;
1884 unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
1885
1886 iova &= PAGE_MASK;
1887
1888 for (i = 0; i < npages; ++i) {
1889 iommu_unmap_page(domain, iova);
1890 iova += PAGE_SIZE;
1891 }
1892
1893 iommu_flush_domain(domain->id);
1894}
1895
1896static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
1897 unsigned long iova)
1898{
1899 struct protection_domain *domain = dom->priv;
1900 unsigned long offset = iova & ~PAGE_MASK;
1901 phys_addr_t paddr;
1902 u64 *pte;
1903
1904 pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
1905
1906 if (!IOMMU_PTE_PRESENT(*pte))
1907 return 0;
1908
1909 pte = IOMMU_PTE_PAGE(*pte);
1910 pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
1911
1912 if (!IOMMU_PTE_PRESENT(*pte))
1913 return 0;
1914
1915 pte = IOMMU_PTE_PAGE(*pte);
1916 pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
1917
1918 if (!IOMMU_PTE_PRESENT(*pte))
1919 return 0;
1920
1921 paddr = *pte & IOMMU_PAGE_MASK;
1922 paddr |= offset;
1923
1924 return paddr;
1925}
1926
1927static struct iommu_ops amd_iommu_ops = {
1928 .domain_init = amd_iommu_domain_init,
1929 .domain_destroy = amd_iommu_domain_destroy,
1930 .attach_dev = amd_iommu_attach_device,
1931 .detach_dev = amd_iommu_detach_device,
1932 .map = amd_iommu_map_range,
1933 .unmap = amd_iommu_unmap_range,
1934 .iova_to_phys = amd_iommu_iova_to_phys,
1935};
1936
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800c55ca..42c33cebf00f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
123 we find in ACPI */ 123 we find in ACPI */
124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
125int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ 125bool amd_iommu_isolate = true; /* if true, device isolation is
126 enabled */
126bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 127bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
127 128
128LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 129LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -243,20 +244,16 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
243} 244}
244 245
245/* Function to enable the hardware */ 246/* Function to enable the hardware */
246void __init iommu_enable(struct amd_iommu *iommu) 247static void __init iommu_enable(struct amd_iommu *iommu)
247{ 248{
248 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " 249 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
249 "at %02x:%02x.%x cap 0x%hx\n", 250 dev_name(&iommu->dev->dev), iommu->cap_ptr);
250 iommu->dev->bus->number,
251 PCI_SLOT(iommu->dev->devfn),
252 PCI_FUNC(iommu->dev->devfn),
253 iommu->cap_ptr);
254 251
255 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 252 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
256} 253}
257 254
258/* Function to enable IOMMU event logging and event interrupts */ 255/* Function to enable IOMMU event logging and event interrupts */
259void __init iommu_enable_event_logging(struct amd_iommu *iommu) 256static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
260{ 257{
261 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 258 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
262 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 259 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
@@ -1218,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str)
1218{ 1215{
1219 for (; *str; ++str) { 1216 for (; *str; ++str) {
1220 if (strncmp(str, "isolate", 7) == 0) 1217 if (strncmp(str, "isolate", 7) == 0)
1221 amd_iommu_isolate = 1; 1218 amd_iommu_isolate = true;
1222 if (strncmp(str, "share", 5) == 0) 1219 if (strncmp(str, "share", 5) == 0)
1223 amd_iommu_isolate = 0; 1220 amd_iommu_isolate = false;
1224 if (strncmp(str, "fullflush", 9) == 0) 1221 if (strncmp(str, "fullflush", 9) == 0)
1225 amd_iommu_unmap_flush = true; 1222 amd_iommu_unmap_flush = true;
1226 } 1223 }
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 99589245fd8d..b13d3c4dbd42 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -98,8 +98,8 @@ __setup("apicpmtimer", setup_apicpmtimer);
98#ifdef HAVE_X2APIC 98#ifdef HAVE_X2APIC
99int x2apic; 99int x2apic;
100/* x2apic enabled before OS handover */ 100/* x2apic enabled before OS handover */
101int x2apic_preenabled; 101static int x2apic_preenabled;
102int disable_x2apic; 102static int disable_x2apic;
103static __init int setup_nox2apic(char *str) 103static __init int setup_nox2apic(char *str)
104{ 104{
105 disable_x2apic = 1; 105 disable_x2apic = 1;
@@ -226,7 +226,7 @@ void xapic_icr_write(u32 low, u32 id)
226 apic_write(APIC_ICR, low); 226 apic_write(APIC_ICR, low);
227} 227}
228 228
229u64 xapic_icr_read(void) 229static u64 xapic_icr_read(void)
230{ 230{
231 u32 icr1, icr2; 231 u32 icr1, icr2;
232 232
@@ -266,7 +266,7 @@ void x2apic_icr_write(u32 low, u32 id)
266 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); 266 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
267} 267}
268 268
269u64 x2apic_icr_read(void) 269static u64 x2apic_icr_read(void)
270{ 270{
271 unsigned long val; 271 unsigned long val;
272 272
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 2a0a2a3cac26..f63882728d91 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,7 +25,7 @@
25#include <asm/uv/bios.h> 25#include <asm/uv/bios.h>
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27 27
28struct uv_systab uv_systab; 28static struct uv_systab uv_systab;
29 29
30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
31{ 31{
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f01eeb..b59ddcc88cd8 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
14#include <asm/pat.h> 14#include <asm/pat.h>
15#include "mtrr.h" 15#include "mtrr.h"
16 16
17struct mtrr_state {
18 struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
19 mtrr_type fixed_ranges[NUM_FIXED_RANGES];
20 unsigned char enabled;
21 unsigned char have_fixed;
22 mtrr_type def_type;
23};
24
25struct fixed_range_block { 17struct fixed_range_block {
26 int base_msr; /* start address of an MTRR block */ 18 int base_msr; /* start address of an MTRR block */
27 int ranges; /* number of MTRRs in this block */ 19 int ranges; /* number of MTRRs in this block */
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
35}; 27};
36 28
37static unsigned long smp_changes_mask; 29static unsigned long smp_changes_mask;
38static struct mtrr_state mtrr_state = {};
39static int mtrr_state_set; 30static int mtrr_state_set;
40u64 mtrr_tom2; 31u64 mtrr_tom2;
41 32
33struct mtrr_state_type mtrr_state = {};
34EXPORT_SYMBOL_GPL(mtrr_state);
35
42#undef MODULE_PARAM_PREFIX 36#undef MODULE_PARAM_PREFIX
43#define MODULE_PARAM_PREFIX "mtrr." 37#define MODULE_PARAM_PREFIX "mtrr."
44 38
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1159e269e596..d259e5d2e054 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
49 49
50u32 num_var_ranges = 0; 50u32 num_var_ranges = 0;
51 51
52unsigned int mtrr_usage_table[MAX_VAR_RANGES]; 52unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
53static DEFINE_MUTEX(mtrr_mutex); 53static DEFINE_MUTEX(mtrr_mutex);
54 54
55u64 size_or_mask, size_and_mask; 55u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
574 unsigned long lsize; 574 unsigned long lsize;
575}; 575};
576 576
577static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; 577static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
578 578
579static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 579static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
580{ 580{
@@ -824,16 +824,14 @@ static int enable_mtrr_cleanup __initdata =
824 824
825static int __init disable_mtrr_cleanup_setup(char *str) 825static int __init disable_mtrr_cleanup_setup(char *str)
826{ 826{
827 if (enable_mtrr_cleanup != -1) 827 enable_mtrr_cleanup = 0;
828 enable_mtrr_cleanup = 0;
829 return 0; 828 return 0;
830} 829}
831early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); 830early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
832 831
833static int __init enable_mtrr_cleanup_setup(char *str) 832static int __init enable_mtrr_cleanup_setup(char *str)
834{ 833{
835 if (enable_mtrr_cleanup != -1) 834 enable_mtrr_cleanup = 1;
836 enable_mtrr_cleanup = 1;
837 return 0; 835 return 0;
838} 836}
839early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); 837early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec656b23..ffd60409cc6d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,11 +8,6 @@
8#define MTRRcap_MSR 0x0fe 8#define MTRRcap_MSR 0x0fe
9#define MTRRdefType_MSR 0x2ff 9#define MTRRdefType_MSR 0x2ff
10 10
11#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
12#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
13
14#define NUM_FIXED_RANGES 88
15#define MAX_VAR_RANGES 256
16#define MTRRfix64K_00000_MSR 0x250 11#define MTRRfix64K_00000_MSR 0x250
17#define MTRRfix16K_80000_MSR 0x258 12#define MTRRfix16K_80000_MSR 0x258
18#define MTRRfix16K_A0000_MSR 0x259 13#define MTRRfix16K_A0000_MSR 0x259
@@ -29,11 +24,7 @@
29#define MTRR_CHANGE_MASK_VARIABLE 0x02 24#define MTRR_CHANGE_MASK_VARIABLE 0x02
30#define MTRR_CHANGE_MASK_DEFTYPE 0x04 25#define MTRR_CHANGE_MASK_DEFTYPE 0x04
31 26
32/* In the Intel processor's MTRR interface, the MTRR type is always held in 27extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
33 an 8 bit field: */
34typedef u8 mtrr_type;
35
36extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
37 28
38struct mtrr_ops { 29struct mtrr_ops {
39 u32 vendor; 30 u32 vendor;
@@ -70,13 +61,6 @@ struct set_mtrr_context {
70 u32 ccr3; 61 u32 ccr3;
71}; 62};
72 63
73struct mtrr_var_range {
74 u32 base_lo;
75 u32 base_hi;
76 u32 mask_lo;
77 u32 mask_hi;
78};
79
80void set_mtrr_done(struct set_mtrr_context *ctxt); 64void set_mtrr_done(struct set_mtrr_context *ctxt);
81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); 65void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); 66void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 62a3c23bd703..2ac1f0c2beb3 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -39,10 +39,10 @@
39#include <linux/device.h> 39#include <linux/device.h>
40#include <linux/cpu.h> 40#include <linux/cpu.h>
41#include <linux/notifier.h> 41#include <linux/notifier.h>
42#include <linux/uaccess.h>
42 43
43#include <asm/processor.h> 44#include <asm/processor.h>
44#include <asm/msr.h> 45#include <asm/msr.h>
45#include <asm/uaccess.h>
46#include <asm/system.h> 46#include <asm/system.h>
47 47
48static struct class *cpuid_class; 48static struct class *cpuid_class;
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
82} 82}
83 83
84static ssize_t cpuid_read(struct file *file, char __user *buf, 84static ssize_t cpuid_read(struct file *file, char __user *buf,
85 size_t count, loff_t * ppos) 85 size_t count, loff_t *ppos)
86{ 86{
87 char __user *tmp = buf; 87 char __user *tmp = buf;
88 struct cpuid_regs cmd; 88 struct cpuid_regs cmd;
@@ -117,7 +117,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
117 unsigned int cpu; 117 unsigned int cpu;
118 struct cpuinfo_x86 *c; 118 struct cpuinfo_x86 *c;
119 int ret = 0; 119 int ret = 0;
120 120
121 lock_kernel(); 121 lock_kernel();
122 122
123 cpu = iminor(file->f_path.dentry->d_inode); 123 cpu = iminor(file->f_path.dentry->d_inode);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852e4cd7..c689d19e35ab 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/smp.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h>
29 30
30#include <mach_ipi.h> 31#include <mach_ipi.h>
31 32
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
49#endif 50#endif
50 crash_save_cpu(regs, cpu); 51 crash_save_cpu(regs, cpu);
51 52
53 /* Disable VMX or SVM if needed.
54 *
55 * We need to disable virtualization on all CPUs.
56 * Having VMX or SVM enabled on any CPU may break rebooting
57 * after the kdump kernel has finished its task.
58 */
59 cpu_emergency_vmxoff();
60 cpu_emergency_svm_disable();
61
52 disable_local_APIC(); 62 disable_local_APIC();
53} 63}
54 64
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
80 local_irq_disable(); 90 local_irq_disable();
81 91
82 kdump_nmi_shootdown_cpus(); 92 kdump_nmi_shootdown_cpus();
93
94 /* Booting kdump kernel with VMX or SVM enabled won't work,
95 * because (among other limitations) we can't disable paging
96 * with the virt flags.
97 */
98 cpu_emergency_vmxoff();
99 cpu_emergency_svm_disable();
100
83 lapic_shutdown(); 101 lapic_shutdown();
84#if defined(CONFIG_X86_IO_APIC) 102#if defined(CONFIG_X86_IO_APIC)
85 disable_IO_APIC(); 103 disable_IO_APIC();
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 23b138e31e9c..504ad198e4ad 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...)
886 va_list ap; 886 va_list ap;
887 887
888 va_start(ap, fmt); 888 va_start(ap, fmt);
889 n = vscnprintf(buf, 512, fmt, ap); 889 n = vscnprintf(buf, sizeof(buf), fmt, ap);
890 early_console->write(early_console, buf, n); 890 early_console->write(early_console, buf, n);
891 va_end(ap); 891 va_end(ap);
892} 892}
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index 62895cf315ff..21bcc0e098ba 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -161,12 +161,12 @@ static unsigned int phys_pkg_id(int index_msb)
161 return current_cpu_data.initial_apicid >> index_msb; 161 return current_cpu_data.initial_apicid >> index_msb;
162} 162}
163 163
164void x2apic_send_IPI_self(int vector) 164static void x2apic_send_IPI_self(int vector)
165{ 165{
166 apic_write(APIC_SELF_IPI, vector); 166 apic_write(APIC_SELF_IPI, vector);
167} 167}
168 168
169void init_x2apic_ldr(void) 169static void init_x2apic_ldr(void)
170{ 170{
171 return; 171 return;
172} 172}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 388e05a5fc17..b9a4d8c4b935 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */ 29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda __read_mostly; 30static struct x8664_pda _boot_cpu_pda;
31 31
32#ifdef CONFIG_SMP 32#ifdef CONFIG_SMP
33/* 33/*
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index d39918076bb4..df3bf269beab 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -10,7 +10,6 @@
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11#include <asm/desc.h> 11#include <asm/desc.h>
12 12
13static struct fs_struct init_fs = INIT_FS;
14static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 13static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
16struct mm_struct init_mm = INIT_MM(init_mm); 15struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index a25c3f76b8ac..3639442aa7a4 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -170,7 +170,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS] = {
170 [15] = { .vector = IRQ15_VECTOR, }, 170 [15] = { .vector = IRQ15_VECTOR, },
171}; 171};
172 172
173void __init arch_early_irq_init(void) 173int __init arch_early_irq_init(void)
174{ 174{
175 struct irq_cfg *cfg; 175 struct irq_cfg *cfg;
176 struct irq_desc *desc; 176 struct irq_desc *desc;
@@ -188,6 +188,8 @@ void __init arch_early_irq_init(void)
188 if (i < NR_IRQS_LEGACY) 188 if (i < NR_IRQS_LEGACY)
189 cpumask_setall(cfg[i].domain); 189 cpumask_setall(cfg[i].domain);
190 } 190 }
191
192 return 0;
191} 193}
192 194
193#ifdef CONFIG_SPARSE_IRQ 195#ifdef CONFIG_SPARSE_IRQ
@@ -230,7 +232,7 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
230 return cfg; 232 return cfg;
231} 233}
232 234
233void arch_init_chip_data(struct irq_desc *desc, int cpu) 235int arch_init_chip_data(struct irq_desc *desc, int cpu)
234{ 236{
235 struct irq_cfg *cfg; 237 struct irq_cfg *cfg;
236 238
@@ -242,6 +244,8 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
242 BUG_ON(1); 244 BUG_ON(1);
243 } 245 }
244 } 246 }
247
248 return 0;
245} 249}
246 250
247#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC 251#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
@@ -702,7 +706,7 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
702} 706}
703 707
704#ifdef CONFIG_X86_64 708#ifdef CONFIG_X86_64
705void io_apic_sync(struct irq_pin_list *entry) 709static void io_apic_sync(struct irq_pin_list *entry)
706{ 710{
707 /* 711 /*
708 * Synchronize the IO-APIC and the CPU by doing 712 * Synchronize the IO-APIC and the CPU by doing
@@ -1400,8 +1404,6 @@ void __setup_vector_irq(int cpu)
1400 1404
1401 /* Mark the inuse vectors */ 1405 /* Mark the inuse vectors */
1402 for_each_irq_desc(irq, desc) { 1406 for_each_irq_desc(irq, desc) {
1403 if (!desc)
1404 continue;
1405 cfg = desc->chip_data; 1407 cfg = desc->chip_data;
1406 if (!cpumask_test_cpu(cpu, cfg->domain)) 1408 if (!cpumask_test_cpu(cpu, cfg->domain))
1407 continue; 1409 continue;
@@ -1783,8 +1785,6 @@ __apicdebuginit(void) print_IO_APIC(void)
1783 for_each_irq_desc(irq, desc) { 1785 for_each_irq_desc(irq, desc) {
1784 struct irq_pin_list *entry; 1786 struct irq_pin_list *entry;
1785 1787
1786 if (!desc)
1787 continue;
1788 cfg = desc->chip_data; 1788 cfg = desc->chip_data;
1789 entry = cfg->irq_2_pin; 1789 entry = cfg->irq_2_pin;
1790 if (!entry) 1790 if (!entry)
@@ -2425,9 +2425,6 @@ static void ir_irq_migration(struct work_struct *work)
2425 struct irq_desc *desc; 2425 struct irq_desc *desc;
2426 2426
2427 for_each_irq_desc(irq, desc) { 2427 for_each_irq_desc(irq, desc) {
2428 if (!desc)
2429 continue;
2430
2431 if (desc->status & IRQ_MOVE_PENDING) { 2428 if (desc->status & IRQ_MOVE_PENDING) {
2432 unsigned long flags; 2429 unsigned long flags;
2433 2430
@@ -2713,9 +2710,6 @@ static inline void init_IO_APIC_traps(void)
2713 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2710 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2714 */ 2711 */
2715 for_each_irq_desc(irq, desc) { 2712 for_each_irq_desc(irq, desc) {
2716 if (!desc)
2717 continue;
2718
2719 cfg = desc->chip_data; 2713 cfg = desc->chip_data;
2720 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { 2714 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2721 /* 2715 /*
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9b6a62..652fce6d2cce 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
89 */ 89 */
90static unsigned long kvm_get_tsc_khz(void) 90static unsigned long kvm_get_tsc_khz(void)
91{ 91{
92 return preset_lpj; 92 struct pvclock_vcpu_time_info *src;
93 src = &per_cpu(hv_clock, 0);
94 return pvclock_tsc_khz(src);
93} 95}
94 96
95static void kvm_get_preset_lpj(void) 97static void kvm_get_preset_lpj(void)
96{ 98{
97 struct pvclock_vcpu_time_info *src;
98 unsigned long khz; 99 unsigned long khz;
99 u64 lpj; 100 u64 lpj;
100 101
101 src = &per_cpu(hv_clock, 0); 102 khz = kvm_get_tsc_khz();
102 khz = pvclock_tsc_khz(src);
103 103
104 lpj = ((u64)khz * 1000); 104 lpj = ((u64)khz * 1000);
105 do_div(lpj, HZ); 105 do_div(lpj, HZ);
@@ -194,5 +194,7 @@ void __init kvmclock_init(void)
194#endif 194#endif
195 kvm_get_preset_lpj(); 195 kvm_get_preset_lpj();
196 clocksource_register(&kvm_clock); 196 clocksource_register(&kvm_clock);
197 pv_info.paravirt_enabled = 1;
198 pv_info.name = "KVM";
197 } 199 }
198} 200}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index eee32b43fee3..71f1d99a635d 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,8 +12,8 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/smp.h> 13#include <linux/smp.h>
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#include <linux/uaccess.h>
15 16
16#include <asm/uaccess.h>
17#include <asm/system.h> 17#include <asm/system.h>
18#include <asm/ldt.h> 18#include <asm/ldt.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
93 if (err < 0) 93 if (err < 0)
94 return err; 94 return err;
95 95
96 for(i = 0; i < old->size; i++) 96 for (i = 0; i < old->size; i++)
97 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); 97 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
98 return 0; 98 return 0;
99} 99}
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index efc2f361fe85..666e43df51f9 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -13,8 +13,7 @@
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/acpi.h> 14#include <asm/acpi.h>
15#include <asm/mmconfig.h> 15#include <asm/mmconfig.h>
16 16#include <asm/pci_x86.h>
17#include "../pci/pci.h"
18 17
19struct pci_hostbridge_probe { 18struct pci_hostbridge_probe {
20 u32 bus; 19 u32 bus;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 45e3b69808ba..c5c5b8df1dbc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -16,14 +16,14 @@
16#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/acpi.h> 17#include <linux/acpi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp.h>
20#include <linux/acpi.h>
19 21
20#include <asm/smp.h>
21#include <asm/mtrr.h> 22#include <asm/mtrr.h>
22#include <asm/mpspec.h> 23#include <asm/mpspec.h>
23#include <asm/pgalloc.h> 24#include <asm/pgalloc.h>
24#include <asm/io_apic.h> 25#include <asm/io_apic.h>
25#include <asm/proto.h> 26#include <asm/proto.h>
26#include <asm/acpi.h>
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/e820.h> 28#include <asm/e820.h>
29#include <asm/trampoline.h> 29#include <asm/trampoline.h>
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
95#endif 95#endif
96 96
97 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { 97 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
98 set_bit(m->mpc_busid, mp_bus_not_pci); 98 set_bit(m->mpc_busid, mp_bus_not_pci);
99#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 99#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
100 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; 100 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
101#endif 101#endif
102 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { 102 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
104 x86_quirks->mpc_oem_pci_bus(m); 104 x86_quirks->mpc_oem_pci_bus(m);
105 105
106 clear_bit(m->mpc_busid, mp_bus_not_pci); 106 clear_bit(m->mpc_busid, mp_bus_not_pci);
107#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 107#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
108 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; 108 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
109 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { 109 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
110 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; 110 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8bd1bf9622a7..45a09ccdc214 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -26,11 +26,10 @@
26#include <linux/kernel_stat.h> 26#include <linux/kernel_stat.h>
27#include <linux/kdebug.h> 27#include <linux/kdebug.h>
28#include <linux/smp.h> 28#include <linux/smp.h>
29#include <linux/nmi.h>
29 30
30#include <asm/i8259.h> 31#include <asm/i8259.h>
31#include <asm/io_apic.h> 32#include <asm/io_apic.h>
32#include <asm/smp.h>
33#include <asm/nmi.h>
34#include <asm/proto.h> 33#include <asm/proto.h>
35#include <asm/timer.h> 34#include <asm/timer.h>
36 35
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a35eaa379ff6..00c2bcd41463 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */
52 * to trigger bugs with some popular PCI cards, in particular 3ware (but 52 * to trigger bugs with some popular PCI cards, in particular 3ware (but
53 * has been also also seen with Qlogic at least). 53 * has been also also seen with Qlogic at least).
54 */ 54 */
55int iommu_fullflush = 1; 55static int iommu_fullflush = 1;
56 56
57/* Allocation bitmap for the remapping area: */ 57/* Allocation bitmap for the remapping area: */
58static DEFINE_SPINLOCK(iommu_bitmap_lock); 58static DEFINE_SPINLOCK(iommu_bitmap_lock);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index de4a9d643bee..2b46eb41643b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,8 @@
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/reboot_fixups.h> 13#include <asm/reboot_fixups.h>
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h>
16#include <asm/virtext.h>
15 17
16#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
17# include <linux/dmi.h> 19# include <linux/dmi.h>
@@ -23,7 +25,6 @@
23 25
24#include <mach_ipi.h> 26#include <mach_ipi.h>
25 27
26
27/* 28/*
28 * Power off function, if any 29 * Power off function, if any
29 */ 30 */
@@ -39,6 +40,12 @@ int reboot_force;
39static int reboot_cpu = -1; 40static int reboot_cpu = -1;
40#endif 41#endif
41 42
43/* This is set if we need to go through the 'emergency' path.
44 * When machine_emergency_restart() is called, we may be on
45 * an inconsistent state and won't be able to do a clean cleanup
46 */
47static int reboot_emergency;
48
42/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ 49/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
43bool port_cf9_safe = false; 50bool port_cf9_safe = false;
44 51
@@ -368,6 +375,48 @@ static inline void kb_wait(void)
368 } 375 }
369} 376}
370 377
378static void vmxoff_nmi(int cpu, struct die_args *args)
379{
380 cpu_emergency_vmxoff();
381}
382
383/* Use NMIs as IPIs to tell all CPUs to disable virtualization
384 */
385static void emergency_vmx_disable_all(void)
386{
387 /* Just make sure we won't change CPUs while doing this */
388 local_irq_disable();
389
390 /* We need to disable VMX on all CPUs before rebooting, otherwise
391 * we risk hanging up the machine, because the CPU ignore INIT
392 * signals when VMX is enabled.
393 *
394 * We can't take any locks and we may be on an inconsistent
395 * state, so we use NMIs as IPIs to tell the other CPUs to disable
396 * VMX and halt.
397 *
398 * For safety, we will avoid running the nmi_shootdown_cpus()
399 * stuff unnecessarily, but we don't have a way to check
400 * if other CPUs have VMX enabled. So we will call it only if the
401 * CPU we are running on has VMX enabled.
402 *
403 * We will miss cases where VMX is not enabled on all CPUs. This
404 * shouldn't do much harm because KVM always enable VMX on all
405 * CPUs anyway. But we can miss it on the small window where KVM
406 * is still enabling VMX.
407 */
408 if (cpu_has_vmx() && cpu_vmx_enabled()) {
409 /* Disable VMX on this CPU.
410 */
411 cpu_vmxoff();
412
413 /* Halt and disable VMX on the other CPUs */
414 nmi_shootdown_cpus(vmxoff_nmi);
415
416 }
417}
418
419
371void __attribute__((weak)) mach_reboot_fixups(void) 420void __attribute__((weak)) mach_reboot_fixups(void)
372{ 421{
373} 422}
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
376{ 425{
377 int i; 426 int i;
378 427
428 if (reboot_emergency)
429 emergency_vmx_disable_all();
430
379 /* Tell the BIOS if we want cold or warm reboot */ 431 /* Tell the BIOS if we want cold or warm reboot */
380 *((unsigned short *)__va(0x472)) = reboot_mode; 432 *((unsigned short *)__va(0x472)) = reboot_mode;
381 433
@@ -482,13 +534,19 @@ void native_machine_shutdown(void)
482#endif 534#endif
483} 535}
484 536
537static void __machine_emergency_restart(int emergency)
538{
539 reboot_emergency = emergency;
540 machine_ops.emergency_restart();
541}
542
485static void native_machine_restart(char *__unused) 543static void native_machine_restart(char *__unused)
486{ 544{
487 printk("machine restart\n"); 545 printk("machine restart\n");
488 546
489 if (!reboot_force) 547 if (!reboot_force)
490 machine_shutdown(); 548 machine_shutdown();
491 machine_emergency_restart(); 549 __machine_emergency_restart(0);
492} 550}
493 551
494static void native_machine_halt(void) 552static void native_machine_halt(void)
@@ -532,7 +590,7 @@ void machine_shutdown(void)
532 590
533void machine_emergency_restart(void) 591void machine_emergency_restart(void)
534{ 592{
535 machine_ops.emergency_restart(); 593 __machine_emergency_restart(1);
536} 594}
537 595
538void machine_restart(char *cmd) 596void machine_restart(char *cmd)
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6a00e5faaa74..f885023167e0 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -582,7 +582,6 @@ static int __init uv_ptc_init(void)
582static struct bau_control * __init uv_table_bases_init(int blade, int node) 582static struct bau_control * __init uv_table_bases_init(int blade, int node)
583{ 583{
584 int i; 584 int i;
585 int *ip;
586 struct bau_msg_status *msp; 585 struct bau_msg_status *msp;
587 struct bau_control *bau_tabp; 586 struct bau_control *bau_tabp;
588 587
@@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
599 bau_cpubits_clear(&msp->seen_by, (int) 598 bau_cpubits_clear(&msp->seen_by, (int)
600 uv_blade_nr_possible_cpus(blade)); 599 uv_blade_nr_possible_cpus(blade));
601 600
602 bau_tabp->watching =
603 kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
604 BUG_ON(!bau_tabp->watching);
605
606 for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
607 *ip = 0;
608
609 uv_bau_table_bases[blade] = bau_tabp; 601 uv_bau_table_bases[blade] = bau_tabp;
610 602
611 return bau_tabp; 603 return bau_tabp;
@@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu,
628 bcp->bau_msg_head = bau_tablesp->va_queue_first; 620 bcp->bau_msg_head = bau_tablesp->va_queue_first;
629 bcp->va_queue_first = bau_tablesp->va_queue_first; 621 bcp->va_queue_first = bau_tablesp->va_queue_first;
630 bcp->va_queue_last = bau_tablesp->va_queue_last; 622 bcp->va_queue_last = bau_tablesp->va_queue_last;
631 bcp->watching = bau_tablesp->watching;
632 bcp->msg_statuses = bau_tablesp->msg_statuses; 623 bcp->msg_statuses = bau_tablesp->msg_statuses;
633 bcp->descriptor_base = adp; 624 bcp->descriptor_base = adp;
634 } 625 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 2d1f4c7e4052..ce6650eb64e9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -292,8 +292,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
292 tsk->thread.error_code = error_code; 292 tsk->thread.error_code = error_code;
293 tsk->thread.trap_no = 8; 293 tsk->thread.trap_no = 8;
294 294
295 /* This is always a kernel trap and never fixable (and thus must 295 /*
296 never return). */ 296 * This is always a kernel trap and never fixable (and thus must
297 * never return).
298 */
297 for (;;) 299 for (;;)
298 die(str, regs, error_code); 300 die(str, regs, error_code);
299} 301}
@@ -520,9 +522,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
520} 522}
521 523
522#ifdef CONFIG_X86_64 524#ifdef CONFIG_X86_64
523/* Help handler running on IST stack to switch back to user stack 525/*
524 for scheduling or signal handling. The actual stack switch is done in 526 * Help handler running on IST stack to switch back to user stack
525 entry.S */ 527 * for scheduling or signal handling. The actual stack switch is done in
528 * entry.S
529 */
526asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) 530asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
527{ 531{
528 struct pt_regs *regs = eregs; 532 struct pt_regs *regs = eregs;
@@ -532,8 +536,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
532 /* Exception from user space */ 536 /* Exception from user space */
533 else if (user_mode(eregs)) 537 else if (user_mode(eregs))
534 regs = task_pt_regs(current); 538 regs = task_pt_regs(current);
535 /* Exception from kernel and interrupts are enabled. Move to 539 /*
536 kernel process stack. */ 540 * Exception from kernel and interrupts are enabled. Move to
541 * kernel process stack.
542 */
537 else if (eregs->flags & X86_EFLAGS_IF) 543 else if (eregs->flags & X86_EFLAGS_IF)
538 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); 544 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
539 if (eregs != regs) 545 if (eregs != regs)
@@ -685,12 +691,7 @@ void math_error(void __user *ip)
685 cwd = get_fpu_cwd(task); 691 cwd = get_fpu_cwd(task);
686 swd = get_fpu_swd(task); 692 swd = get_fpu_swd(task);
687 693
688 err = swd & ~cwd & 0x3f; 694 err = swd & ~cwd;
689
690#ifdef CONFIG_X86_32
691 if (!err)
692 return;
693#endif
694 695
695 if (err & 0x001) { /* Invalid op */ 696 if (err & 0x001) { /* Invalid op */
696 /* 697 /*
@@ -708,7 +709,11 @@ void math_error(void __user *ip)
708 } else if (err & 0x020) { /* Precision */ 709 } else if (err & 0x020) { /* Precision */
709 info.si_code = FPE_FLTRES; 710 info.si_code = FPE_FLTRES;
710 } else { 711 } else {
711 info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ 712 /*
713 * If we're using IRQ 13, or supposedly even some trap 16
714 * implementations, it's possible we get a spurious trap...
715 */
716 return; /* Spurious trap, no error */
712 } 717 }
713 force_sig_info(SIGFPE, &info, task); 718 force_sig_info(SIGFPE, &info, task);
714} 719}
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 15c3e6999182..2b54fe002e94 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf)
159 * Restore the extended state if present. Otherwise, restore the FP/SSE 159 * Restore the extended state if present. Otherwise, restore the FP/SSE
160 * state. 160 * state.
161 */ 161 */
162int restore_user_xstate(void __user *buf) 162static int restore_user_xstate(void __user *buf)
163{ 163{
164 struct _fpx_sw_bytes fx_sw_user; 164 struct _fpx_sw_bytes fx_sw_user;
165 u64 mask; 165 u64 mask;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c02343594b4d..d3ec292f00f2 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,8 +7,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
7ifeq ($(CONFIG_KVM_TRACE),y) 7ifeq ($(CONFIG_KVM_TRACE),y)
8common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) 8common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
9endif 9endif
10ifeq ($(CONFIG_DMAR),y) 10ifeq ($(CONFIG_IOMMU_API),y)
11common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 11common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
12endif 12endif
13 13
14EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm 14EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 59ebd37ad79e..e665d1c623ca 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -603,10 +603,29 @@ void kvm_free_pit(struct kvm *kvm)
603 603
604static void __inject_pit_timer_intr(struct kvm *kvm) 604static void __inject_pit_timer_intr(struct kvm *kvm)
605{ 605{
606 struct kvm_vcpu *vcpu;
607 int i;
608
606 mutex_lock(&kvm->lock); 609 mutex_lock(&kvm->lock);
607 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); 610 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
608 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); 611 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
609 mutex_unlock(&kvm->lock); 612 mutex_unlock(&kvm->lock);
613
614 /*
615 * Provides NMI watchdog support via Virtual Wire mode.
616 * The route is: PIT -> PIC -> LVT0 in NMI mode.
617 *
618 * Note: Our Virtual Wire implementation is simplified, only
619 * propagating PIT interrupts to all VCPUs when they have set
620 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
621 * VCPU0, and only if its LVT0 is in EXTINT mode.
622 */
623 if (kvm->arch.vapics_in_nmi_mode > 0)
624 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
625 vcpu = kvm->vcpus[i];
626 if (vcpu)
627 kvm_apic_nmi_wd_deliver(vcpu);
628 }
610} 629}
611 630
612void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) 631void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 17e41e165f1a..179dcb0103fd 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -26,10 +26,40 @@
26 * Port from Qemu. 26 * Port from Qemu.
27 */ 27 */
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/bitops.h>
29#include "irq.h" 30#include "irq.h"
30 31
31#include <linux/kvm_host.h> 32#include <linux/kvm_host.h>
32 33
34static void pic_lock(struct kvm_pic *s)
35{
36 spin_lock(&s->lock);
37}
38
39static void pic_unlock(struct kvm_pic *s)
40{
41 struct kvm *kvm = s->kvm;
42 unsigned acks = s->pending_acks;
43 bool wakeup = s->wakeup_needed;
44 struct kvm_vcpu *vcpu;
45
46 s->pending_acks = 0;
47 s->wakeup_needed = false;
48
49 spin_unlock(&s->lock);
50
51 while (acks) {
52 kvm_notify_acked_irq(kvm, __ffs(acks));
53 acks &= acks - 1;
54 }
55
56 if (wakeup) {
57 vcpu = s->kvm->vcpus[0];
58 if (vcpu)
59 kvm_vcpu_kick(vcpu);
60 }
61}
62
33static void pic_clear_isr(struct kvm_kpic_state *s, int irq) 63static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
34{ 64{
35 s->isr &= ~(1 << irq); 65 s->isr &= ~(1 << irq);
@@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s)
136 166
137void kvm_pic_update_irq(struct kvm_pic *s) 167void kvm_pic_update_irq(struct kvm_pic *s)
138{ 168{
169 pic_lock(s);
139 pic_update_irq(s); 170 pic_update_irq(s);
171 pic_unlock(s);
140} 172}
141 173
142void kvm_pic_set_irq(void *opaque, int irq, int level) 174void kvm_pic_set_irq(void *opaque, int irq, int level)
143{ 175{
144 struct kvm_pic *s = opaque; 176 struct kvm_pic *s = opaque;
145 177
178 pic_lock(s);
146 if (irq >= 0 && irq < PIC_NUM_PINS) { 179 if (irq >= 0 && irq < PIC_NUM_PINS) {
147 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 180 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
148 pic_update_irq(s); 181 pic_update_irq(s);
149 } 182 }
183 pic_unlock(s);
150} 184}
151 185
152/* 186/*
@@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
172 int irq, irq2, intno; 206 int irq, irq2, intno;
173 struct kvm_pic *s = pic_irqchip(kvm); 207 struct kvm_pic *s = pic_irqchip(kvm);
174 208
209 pic_lock(s);
175 irq = pic_get_irq(&s->pics[0]); 210 irq = pic_get_irq(&s->pics[0]);
176 if (irq >= 0) { 211 if (irq >= 0) {
177 pic_intack(&s->pics[0], irq); 212 pic_intack(&s->pics[0], irq);
@@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
196 intno = s->pics[0].irq_base + irq; 231 intno = s->pics[0].irq_base + irq;
197 } 232 }
198 pic_update_irq(s); 233 pic_update_irq(s);
234 pic_unlock(s);
199 kvm_notify_acked_irq(kvm, irq); 235 kvm_notify_acked_irq(kvm, irq);
200 236
201 return intno; 237 return intno;
@@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
203 239
204void kvm_pic_reset(struct kvm_kpic_state *s) 240void kvm_pic_reset(struct kvm_kpic_state *s)
205{ 241{
206 int irq, irqbase; 242 int irq, irqbase, n;
207 struct kvm *kvm = s->pics_state->irq_request_opaque; 243 struct kvm *kvm = s->pics_state->irq_request_opaque;
208 struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; 244 struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
209 245
@@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
214 250
215 for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { 251 for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
216 if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) 252 if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
217 if (s->irr & (1 << irq) || s->isr & (1 << irq)) 253 if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
218 kvm_notify_acked_irq(kvm, irq+irqbase); 254 n = irq + irqbase;
255 s->pics_state->pending_acks |= 1 << n;
256 }
219 } 257 }
220 s->last_irr = 0; 258 s->last_irr = 0;
221 s->irr = 0; 259 s->irr = 0;
@@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this,
406 printk(KERN_ERR "PIC: non byte write\n"); 444 printk(KERN_ERR "PIC: non byte write\n");
407 return; 445 return;
408 } 446 }
447 pic_lock(s);
409 switch (addr) { 448 switch (addr) {
410 case 0x20: 449 case 0x20:
411 case 0x21: 450 case 0x21:
@@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this,
418 elcr_ioport_write(&s->pics[addr & 1], addr, data); 457 elcr_ioport_write(&s->pics[addr & 1], addr, data);
419 break; 458 break;
420 } 459 }
460 pic_unlock(s);
421} 461}
422 462
423static void picdev_read(struct kvm_io_device *this, 463static void picdev_read(struct kvm_io_device *this,
@@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this,
431 printk(KERN_ERR "PIC: non byte read\n"); 471 printk(KERN_ERR "PIC: non byte read\n");
432 return; 472 return;
433 } 473 }
474 pic_lock(s);
434 switch (addr) { 475 switch (addr) {
435 case 0x20: 476 case 0x20:
436 case 0x21: 477 case 0x21:
@@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this,
444 break; 485 break;
445 } 486 }
446 *(unsigned char *)val = data; 487 *(unsigned char *)val = data;
488 pic_unlock(s);
447} 489}
448 490
449/* 491/*
@@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level)
459 s->output = level; 501 s->output = level;
460 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { 502 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
461 s->pics[0].isr_ack &= ~(1 << irq); 503 s->pics[0].isr_ack &= ~(1 << irq);
462 kvm_vcpu_kick(vcpu); 504 s->wakeup_needed = true;
463 } 505 }
464} 506}
465 507
@@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
469 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); 511 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
470 if (!s) 512 if (!s)
471 return NULL; 513 return NULL;
514 spin_lock_init(&s->lock);
515 s->kvm = kvm;
472 s->pics[0].elcr_mask = 0xf8; 516 s->pics[0].elcr_mask = 0xf8;
473 s->pics[1].elcr_mask = 0xde; 517 s->pics[1].elcr_mask = 0xde;
474 s->irq_request = pic_irq_request; 518 s->irq_request = pic_irq_request;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index f17c8f5bbf31..2bf32a03ceec 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -25,6 +25,7 @@
25#include <linux/mm_types.h> 25#include <linux/mm_types.h>
26#include <linux/hrtimer.h> 26#include <linux/hrtimer.h>
27#include <linux/kvm_host.h> 27#include <linux/kvm_host.h>
28#include <linux/spinlock.h>
28 29
29#include "iodev.h" 30#include "iodev.h"
30#include "ioapic.h" 31#include "ioapic.h"
@@ -59,6 +60,10 @@ struct kvm_kpic_state {
59}; 60};
60 61
61struct kvm_pic { 62struct kvm_pic {
63 spinlock_t lock;
64 bool wakeup_needed;
65 unsigned pending_acks;
66 struct kvm *kvm;
62 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ 67 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
63 irq_request_func *irq_request; 68 irq_request_func *irq_request;
64 void *irq_request_opaque; 69 void *irq_request_opaque;
@@ -87,6 +92,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
87void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); 92void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
88void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 93void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
89void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 94void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
95void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
90void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); 96void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
91void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); 97void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
92void __kvm_migrate_timers(struct kvm_vcpu *vcpu); 98void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 65ef0fc2c036..8e5ee99551f6 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -7,7 +7,7 @@
7#include <linux/kvm_host.h> 7#include <linux/kvm_host.h>
8#include <asm/msr.h> 8#include <asm/msr.h>
9 9
10#include "svm.h" 10#include <asm/svm.h>
11 11
12static const u32 host_save_user_msrs[] = { 12static const u32 host_save_user_msrs[] = {
13#ifdef CONFIG_X86_64 13#ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0fc3cab48943..afac68c0815c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic)
130 return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; 130 return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
131} 131}
132 132
133static inline int apic_lvt_nmi_mode(u32 lvt_val)
134{
135 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
136}
137
133static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 138static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
134 LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ 139 LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
135 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 140 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
@@ -354,6 +359,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
354 359
355 case APIC_DM_NMI: 360 case APIC_DM_NMI:
356 kvm_inject_nmi(vcpu); 361 kvm_inject_nmi(vcpu);
362 kvm_vcpu_kick(vcpu);
357 break; 363 break;
358 364
359 case APIC_DM_INIT: 365 case APIC_DM_INIT:
@@ -380,6 +386,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
380 } 386 }
381 break; 387 break;
382 388
389 case APIC_DM_EXTINT:
390 /*
391 * Should only be called by kvm_apic_local_deliver() with LVT0,
392 * before NMI watchdog was enabled. Already handled by
393 * kvm_apic_accept_pic_intr().
394 */
395 break;
396
383 default: 397 default:
384 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 398 printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
385 delivery_mode); 399 delivery_mode);
@@ -663,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic)
663 apic->timer.period))); 677 apic->timer.period)));
664} 678}
665 679
680static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
681{
682 int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
683
684 if (apic_lvt_nmi_mode(lvt0_val)) {
685 if (!nmi_wd_enabled) {
686 apic_debug("Receive NMI setting on APIC_LVT0 "
687 "for cpu %d\n", apic->vcpu->vcpu_id);
688 apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
689 }
690 } else if (nmi_wd_enabled)
691 apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
692}
693
666static void apic_mmio_write(struct kvm_io_device *this, 694static void apic_mmio_write(struct kvm_io_device *this,
667 gpa_t address, int len, const void *data) 695 gpa_t address, int len, const void *data)
668{ 696{
@@ -743,10 +771,11 @@ static void apic_mmio_write(struct kvm_io_device *this,
743 apic_set_reg(apic, APIC_ICR2, val & 0xff000000); 771 apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
744 break; 772 break;
745 773
774 case APIC_LVT0:
775 apic_manage_nmi_watchdog(apic, val);
746 case APIC_LVTT: 776 case APIC_LVTT:
747 case APIC_LVTTHMR: 777 case APIC_LVTTHMR:
748 case APIC_LVTPC: 778 case APIC_LVTPC:
749 case APIC_LVT0:
750 case APIC_LVT1: 779 case APIC_LVT1:
751 case APIC_LVTERR: 780 case APIC_LVTERR:
752 /* TODO: Check vector */ 781 /* TODO: Check vector */
@@ -961,12 +990,26 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
961 return 0; 990 return 0;
962} 991}
963 992
964static int __inject_apic_timer_irq(struct kvm_lapic *apic) 993static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
994{
995 u32 reg = apic_get_reg(apic, lvt_type);
996 int vector, mode, trig_mode;
997
998 if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
999 vector = reg & APIC_VECTOR_MASK;
1000 mode = reg & APIC_MODE_MASK;
1001 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
1002 return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
1003 }
1004 return 0;
1005}
1006
1007void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
965{ 1008{
966 int vector; 1009 struct kvm_lapic *apic = vcpu->arch.apic;
967 1010
968 vector = apic_lvt_vector(apic, APIC_LVTT); 1011 if (apic)
969 return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); 1012 kvm_apic_local_deliver(apic, APIC_LVT0);
970} 1013}
971 1014
972static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 1015static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
@@ -1061,9 +1104,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1061{ 1104{
1062 struct kvm_lapic *apic = vcpu->arch.apic; 1105 struct kvm_lapic *apic = vcpu->arch.apic;
1063 1106
1064 if (apic && apic_lvt_enabled(apic, APIC_LVTT) && 1107 if (apic && atomic_read(&apic->timer.pending) > 0) {
1065 atomic_read(&apic->timer.pending) > 0) { 1108 if (kvm_apic_local_deliver(apic, APIC_LVTT))
1066 if (__inject_apic_timer_irq(apic))
1067 atomic_dec(&apic->timer.pending); 1109 atomic_dec(&apic->timer.pending);
1068 } 1110 }
1069} 1111}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 410ddbc1aa2e..83f11c7474a1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -17,7 +17,6 @@
17 * 17 *
18 */ 18 */
19 19
20#include "vmx.h"
21#include "mmu.h" 20#include "mmu.h"
22 21
23#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
@@ -33,6 +32,7 @@
33#include <asm/page.h> 32#include <asm/page.h>
34#include <asm/cmpxchg.h> 33#include <asm/cmpxchg.h>
35#include <asm/io.h> 34#include <asm/io.h>
35#include <asm/vmx.h>
36 36
37/* 37/*
38 * When setting this variable to true it enables Two-Dimensional-Paging 38 * When setting this variable to true it enables Two-Dimensional-Paging
@@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
168static u64 __read_mostly shadow_user_mask; 168static u64 __read_mostly shadow_user_mask;
169static u64 __read_mostly shadow_accessed_mask; 169static u64 __read_mostly shadow_accessed_mask;
170static u64 __read_mostly shadow_dirty_mask; 170static u64 __read_mostly shadow_dirty_mask;
171static u64 __read_mostly shadow_mt_mask;
171 172
172void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) 173void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
173{ 174{
@@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
183EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); 184EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
184 185
185void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 186void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
186 u64 dirty_mask, u64 nx_mask, u64 x_mask) 187 u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
187{ 188{
188 shadow_user_mask = user_mask; 189 shadow_user_mask = user_mask;
189 shadow_accessed_mask = accessed_mask; 190 shadow_accessed_mask = accessed_mask;
190 shadow_dirty_mask = dirty_mask; 191 shadow_dirty_mask = dirty_mask;
191 shadow_nx_mask = nx_mask; 192 shadow_nx_mask = nx_mask;
192 shadow_x_mask = x_mask; 193 shadow_x_mask = x_mask;
194 shadow_mt_mask = mt_mask;
193} 195}
194EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); 196EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
195 197
@@ -384,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
384{ 386{
385 int *write_count; 387 int *write_count;
386 388
387 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); 389 gfn = unalias_gfn(kvm, gfn);
390 write_count = slot_largepage_idx(gfn,
391 gfn_to_memslot_unaliased(kvm, gfn));
388 *write_count += 1; 392 *write_count += 1;
389} 393}
390 394
@@ -392,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
392{ 396{
393 int *write_count; 397 int *write_count;
394 398
395 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); 399 gfn = unalias_gfn(kvm, gfn);
400 write_count = slot_largepage_idx(gfn,
401 gfn_to_memslot_unaliased(kvm, gfn));
396 *write_count -= 1; 402 *write_count -= 1;
397 WARN_ON(*write_count < 0); 403 WARN_ON(*write_count < 0);
398} 404}
399 405
400static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) 406static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
401{ 407{
402 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 408 struct kvm_memory_slot *slot;
403 int *largepage_idx; 409 int *largepage_idx;
404 410
411 gfn = unalias_gfn(kvm, gfn);
412 slot = gfn_to_memslot_unaliased(kvm, gfn);
405 if (slot) { 413 if (slot) {
406 largepage_idx = slot_largepage_idx(gfn, slot); 414 largepage_idx = slot_largepage_idx(gfn, slot);
407 return *largepage_idx; 415 return *largepage_idx;
@@ -613,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
613 return NULL; 621 return NULL;
614} 622}
615 623
616static void rmap_write_protect(struct kvm *kvm, u64 gfn) 624static int rmap_write_protect(struct kvm *kvm, u64 gfn)
617{ 625{
618 unsigned long *rmapp; 626 unsigned long *rmapp;
619 u64 *spte; 627 u64 *spte;
@@ -659,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
659 spte = rmap_next(kvm, rmapp, spte); 667 spte = rmap_next(kvm, rmapp, spte);
660 } 668 }
661 669
662 if (write_protected) 670 return write_protected;
663 kvm_flush_remote_tlbs(kvm);
664} 671}
665 672
666static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) 673static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -786,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
786 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 793 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
787 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 794 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
788 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 795 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
796 INIT_LIST_HEAD(&sp->oos_link);
789 ASSERT(is_empty_shadow_page(sp->spt)); 797 ASSERT(is_empty_shadow_page(sp->spt));
790 sp->slot_bitmap = 0; 798 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
791 sp->multimapped = 0; 799 sp->multimapped = 0;
800 sp->global = 1;
792 sp->parent_pte = parent_pte; 801 sp->parent_pte = parent_pte;
793 --vcpu->kvm->arch.n_free_mmu_pages; 802 --vcpu->kvm->arch.n_free_mmu_pages;
794 return sp; 803 return sp;
@@ -900,8 +909,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte)
900 struct kvm_mmu_page *sp = page_header(__pa(spte)); 909 struct kvm_mmu_page *sp = page_header(__pa(spte));
901 910
902 index = spte - sp->spt; 911 index = spte - sp->spt;
903 __set_bit(index, sp->unsync_child_bitmap); 912 if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
904 sp->unsync_children = 1; 913 sp->unsync_children++;
914 WARN_ON(!sp->unsync_children);
905} 915}
906 916
907static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) 917static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@@ -928,7 +938,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
928 938
929static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 939static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
930{ 940{
931 sp->unsync_children = 1;
932 kvm_mmu_update_parents_unsync(sp); 941 kvm_mmu_update_parents_unsync(sp);
933 return 1; 942 return 1;
934} 943}
@@ -959,38 +968,66 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
959{ 968{
960} 969}
961 970
971#define KVM_PAGE_ARRAY_NR 16
972
973struct kvm_mmu_pages {
974 struct mmu_page_and_offset {
975 struct kvm_mmu_page *sp;
976 unsigned int idx;
977 } page[KVM_PAGE_ARRAY_NR];
978 unsigned int nr;
979};
980
962#define for_each_unsync_children(bitmap, idx) \ 981#define for_each_unsync_children(bitmap, idx) \
963 for (idx = find_first_bit(bitmap, 512); \ 982 for (idx = find_first_bit(bitmap, 512); \
964 idx < 512; \ 983 idx < 512; \
965 idx = find_next_bit(bitmap, 512, idx+1)) 984 idx = find_next_bit(bitmap, 512, idx+1))
966 985
967static int mmu_unsync_walk(struct kvm_mmu_page *sp, 986int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
968 struct kvm_unsync_walk *walker) 987 int idx)
969{ 988{
970 int i, ret; 989 int i;
971 990
972 if (!sp->unsync_children) 991 if (sp->unsync)
973 return 0; 992 for (i=0; i < pvec->nr; i++)
993 if (pvec->page[i].sp == sp)
994 return 0;
995
996 pvec->page[pvec->nr].sp = sp;
997 pvec->page[pvec->nr].idx = idx;
998 pvec->nr++;
999 return (pvec->nr == KVM_PAGE_ARRAY_NR);
1000}
1001
1002static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
1003 struct kvm_mmu_pages *pvec)
1004{
1005 int i, ret, nr_unsync_leaf = 0;
974 1006
975 for_each_unsync_children(sp->unsync_child_bitmap, i) { 1007 for_each_unsync_children(sp->unsync_child_bitmap, i) {
976 u64 ent = sp->spt[i]; 1008 u64 ent = sp->spt[i];
977 1009
978 if (is_shadow_present_pte(ent)) { 1010 if (is_shadow_present_pte(ent) && !is_large_pte(ent)) {
979 struct kvm_mmu_page *child; 1011 struct kvm_mmu_page *child;
980 child = page_header(ent & PT64_BASE_ADDR_MASK); 1012 child = page_header(ent & PT64_BASE_ADDR_MASK);
981 1013
982 if (child->unsync_children) { 1014 if (child->unsync_children) {
983 ret = mmu_unsync_walk(child, walker); 1015 if (mmu_pages_add(pvec, child, i))
984 if (ret) 1016 return -ENOSPC;
1017
1018 ret = __mmu_unsync_walk(child, pvec);
1019 if (!ret)
1020 __clear_bit(i, sp->unsync_child_bitmap);
1021 else if (ret > 0)
1022 nr_unsync_leaf += ret;
1023 else
985 return ret; 1024 return ret;
986 __clear_bit(i, sp->unsync_child_bitmap);
987 } 1025 }
988 1026
989 if (child->unsync) { 1027 if (child->unsync) {
990 ret = walker->entry(child, walker); 1028 nr_unsync_leaf++;
991 __clear_bit(i, sp->unsync_child_bitmap); 1029 if (mmu_pages_add(pvec, child, i))
992 if (ret) 1030 return -ENOSPC;
993 return ret;
994 } 1031 }
995 } 1032 }
996 } 1033 }
@@ -998,7 +1035,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
998 if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) 1035 if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
999 sp->unsync_children = 0; 1036 sp->unsync_children = 0;
1000 1037
1001 return 0; 1038 return nr_unsync_leaf;
1039}
1040
1041static int mmu_unsync_walk(struct kvm_mmu_page *sp,
1042 struct kvm_mmu_pages *pvec)
1043{
1044 if (!sp->unsync_children)
1045 return 0;
1046
1047 mmu_pages_add(pvec, sp, 0);
1048 return __mmu_unsync_walk(sp, pvec);
1002} 1049}
1003 1050
1004static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) 1051static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@@ -1021,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
1021 return NULL; 1068 return NULL;
1022} 1069}
1023 1070
1071static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
1072{
1073 list_del(&sp->oos_link);
1074 --kvm->stat.mmu_unsync_global;
1075}
1076
1024static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1077static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1025{ 1078{
1026 WARN_ON(!sp->unsync); 1079 WARN_ON(!sp->unsync);
1027 sp->unsync = 0; 1080 sp->unsync = 0;
1081 if (sp->global)
1082 kvm_unlink_unsync_global(kvm, sp);
1028 --kvm->stat.mmu_unsync; 1083 --kvm->stat.mmu_unsync;
1029} 1084}
1030 1085
@@ -1037,7 +1092,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1037 return 1; 1092 return 1;
1038 } 1093 }
1039 1094
1040 rmap_write_protect(vcpu->kvm, sp->gfn); 1095 if (rmap_write_protect(vcpu->kvm, sp->gfn))
1096 kvm_flush_remote_tlbs(vcpu->kvm);
1041 kvm_unlink_unsync_page(vcpu->kvm, sp); 1097 kvm_unlink_unsync_page(vcpu->kvm, sp);
1042 if (vcpu->arch.mmu.sync_page(vcpu, sp)) { 1098 if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
1043 kvm_mmu_zap_page(vcpu->kvm, sp); 1099 kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1048,30 +1104,89 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1048 return 0; 1104 return 0;
1049} 1105}
1050 1106
1051struct sync_walker { 1107struct mmu_page_path {
1052 struct kvm_vcpu *vcpu; 1108 struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
1053 struct kvm_unsync_walk walker; 1109 unsigned int idx[PT64_ROOT_LEVEL-1];
1054}; 1110};
1055 1111
1056static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) 1112#define for_each_sp(pvec, sp, parents, i) \
1113 for (i = mmu_pages_next(&pvec, &parents, -1), \
1114 sp = pvec.page[i].sp; \
1115 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
1116 i = mmu_pages_next(&pvec, &parents, i))
1117
1118int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
1119 int i)
1057{ 1120{
1058 struct sync_walker *sync_walk = container_of(walk, struct sync_walker, 1121 int n;
1059 walker);
1060 struct kvm_vcpu *vcpu = sync_walk->vcpu;
1061 1122
1062 kvm_sync_page(vcpu, sp); 1123 for (n = i+1; n < pvec->nr; n++) {
1063 return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); 1124 struct kvm_mmu_page *sp = pvec->page[n].sp;
1125
1126 if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
1127 parents->idx[0] = pvec->page[n].idx;
1128 return n;
1129 }
1130
1131 parents->parent[sp->role.level-2] = sp;
1132 parents->idx[sp->role.level-1] = pvec->page[n].idx;
1133 }
1134
1135 return n;
1064} 1136}
1065 1137
1066static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1138void mmu_pages_clear_parents(struct mmu_page_path *parents)
1067{ 1139{
1068 struct sync_walker walker = { 1140 struct kvm_mmu_page *sp;
1069 .walker = { .entry = mmu_sync_fn, }, 1141 unsigned int level = 0;
1070 .vcpu = vcpu, 1142
1071 }; 1143 do {
1144 unsigned int idx = parents->idx[level];
1145
1146 sp = parents->parent[level];
1147 if (!sp)
1148 return;
1149
1150 --sp->unsync_children;
1151 WARN_ON((int)sp->unsync_children < 0);
1152 __clear_bit(idx, sp->unsync_child_bitmap);
1153 level++;
1154 } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
1155}
1156
1157static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
1158 struct mmu_page_path *parents,
1159 struct kvm_mmu_pages *pvec)
1160{
1161 parents->parent[parent->role.level-1] = NULL;
1162 pvec->nr = 0;
1163}
1164
1165static void mmu_sync_children(struct kvm_vcpu *vcpu,
1166 struct kvm_mmu_page *parent)
1167{
1168 int i;
1169 struct kvm_mmu_page *sp;
1170 struct mmu_page_path parents;
1171 struct kvm_mmu_pages pages;
1172
1173 kvm_mmu_pages_init(parent, &parents, &pages);
1174 while (mmu_unsync_walk(parent, &pages)) {
1175 int protected = 0;
1072 1176
1073 while (mmu_unsync_walk(sp, &walker.walker)) 1177 for_each_sp(pages, sp, parents, i)
1178 protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
1179
1180 if (protected)
1181 kvm_flush_remote_tlbs(vcpu->kvm);
1182
1183 for_each_sp(pages, sp, parents, i) {
1184 kvm_sync_page(vcpu, sp);
1185 mmu_pages_clear_parents(&parents);
1186 }
1074 cond_resched_lock(&vcpu->kvm->mmu_lock); 1187 cond_resched_lock(&vcpu->kvm->mmu_lock);
1188 kvm_mmu_pages_init(parent, &parents, &pages);
1189 }
1075} 1190}
1076 1191
1077static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, 1192static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1129,7 +1244,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1129 sp->role = role; 1244 sp->role = role;
1130 hlist_add_head(&sp->hash_link, bucket); 1245 hlist_add_head(&sp->hash_link, bucket);
1131 if (!metaphysical) { 1246 if (!metaphysical) {
1132 rmap_write_protect(vcpu->kvm, gfn); 1247 if (rmap_write_protect(vcpu->kvm, gfn))
1248 kvm_flush_remote_tlbs(vcpu->kvm);
1133 account_shadowed(vcpu->kvm, gfn); 1249 account_shadowed(vcpu->kvm, gfn);
1134 } 1250 }
1135 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) 1251 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
@@ -1153,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker,
1153 if (level == PT32E_ROOT_LEVEL) { 1269 if (level == PT32E_ROOT_LEVEL) {
1154 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; 1270 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
1155 shadow_addr &= PT64_BASE_ADDR_MASK; 1271 shadow_addr &= PT64_BASE_ADDR_MASK;
1272 if (!shadow_addr)
1273 return 1;
1156 --level; 1274 --level;
1157 } 1275 }
1158 1276
@@ -1237,33 +1355,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
1237 } 1355 }
1238} 1356}
1239 1357
1240struct zap_walker { 1358static int mmu_zap_unsync_children(struct kvm *kvm,
1241 struct kvm_unsync_walk walker; 1359 struct kvm_mmu_page *parent)
1242 struct kvm *kvm;
1243 int zapped;
1244};
1245
1246static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
1247{ 1360{
1248 struct zap_walker *zap_walk = container_of(walk, struct zap_walker, 1361 int i, zapped = 0;
1249 walker); 1362 struct mmu_page_path parents;
1250 kvm_mmu_zap_page(zap_walk->kvm, sp); 1363 struct kvm_mmu_pages pages;
1251 zap_walk->zapped = 1;
1252 return 0;
1253}
1254 1364
1255static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) 1365 if (parent->role.level == PT_PAGE_TABLE_LEVEL)
1256{
1257 struct zap_walker walker = {
1258 .walker = { .entry = mmu_zap_fn, },
1259 .kvm = kvm,
1260 .zapped = 0,
1261 };
1262
1263 if (sp->role.level == PT_PAGE_TABLE_LEVEL)
1264 return 0; 1366 return 0;
1265 mmu_unsync_walk(sp, &walker.walker); 1367
1266 return walker.zapped; 1368 kvm_mmu_pages_init(parent, &parents, &pages);
1369 while (mmu_unsync_walk(parent, &pages)) {
1370 struct kvm_mmu_page *sp;
1371
1372 for_each_sp(pages, sp, parents, i) {
1373 kvm_mmu_zap_page(kvm, sp);
1374 mmu_pages_clear_parents(&parents);
1375 }
1376 zapped += pages.nr;
1377 kvm_mmu_pages_init(parent, &parents, &pages);
1378 }
1379
1380 return zapped;
1267} 1381}
1268 1382
1269static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1383static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -1362,7 +1476,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
1362 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); 1476 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
1363 struct kvm_mmu_page *sp = page_header(__pa(pte)); 1477 struct kvm_mmu_page *sp = page_header(__pa(pte));
1364 1478
1365 __set_bit(slot, &sp->slot_bitmap); 1479 __set_bit(slot, sp->slot_bitmap);
1366} 1480}
1367 1481
1368static void mmu_convert_notrap(struct kvm_mmu_page *sp) 1482static void mmu_convert_notrap(struct kvm_mmu_page *sp)
@@ -1393,6 +1507,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
1393 return page; 1507 return page;
1394} 1508}
1395 1509
1510/*
1511 * The function is based on mtrr_type_lookup() in
1512 * arch/x86/kernel/cpu/mtrr/generic.c
1513 */
1514static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
1515 u64 start, u64 end)
1516{
1517 int i;
1518 u64 base, mask;
1519 u8 prev_match, curr_match;
1520 int num_var_ranges = KVM_NR_VAR_MTRR;
1521
1522 if (!mtrr_state->enabled)
1523 return 0xFF;
1524
1525 /* Make end inclusive end, instead of exclusive */
1526 end--;
1527
1528 /* Look in fixed ranges. Just return the type as per start */
1529 if (mtrr_state->have_fixed && (start < 0x100000)) {
1530 int idx;
1531
1532 if (start < 0x80000) {
1533 idx = 0;
1534 idx += (start >> 16);
1535 return mtrr_state->fixed_ranges[idx];
1536 } else if (start < 0xC0000) {
1537 idx = 1 * 8;
1538 idx += ((start - 0x80000) >> 14);
1539 return mtrr_state->fixed_ranges[idx];
1540 } else if (start < 0x1000000) {
1541 idx = 3 * 8;
1542 idx += ((start - 0xC0000) >> 12);
1543 return mtrr_state->fixed_ranges[idx];
1544 }
1545 }
1546
1547 /*
1548 * Look in variable ranges
1549 * Look of multiple ranges matching this address and pick type
1550 * as per MTRR precedence
1551 */
1552 if (!(mtrr_state->enabled & 2))
1553 return mtrr_state->def_type;
1554
1555 prev_match = 0xFF;
1556 for (i = 0; i < num_var_ranges; ++i) {
1557 unsigned short start_state, end_state;
1558
1559 if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
1560 continue;
1561
1562 base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
1563 (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
1564 mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
1565 (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
1566
1567 start_state = ((start & mask) == (base & mask));
1568 end_state = ((end & mask) == (base & mask));
1569 if (start_state != end_state)
1570 return 0xFE;
1571
1572 if ((start & mask) != (base & mask))
1573 continue;
1574
1575 curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
1576 if (prev_match == 0xFF) {
1577 prev_match = curr_match;
1578 continue;
1579 }
1580
1581 if (prev_match == MTRR_TYPE_UNCACHABLE ||
1582 curr_match == MTRR_TYPE_UNCACHABLE)
1583 return MTRR_TYPE_UNCACHABLE;
1584
1585 if ((prev_match == MTRR_TYPE_WRBACK &&
1586 curr_match == MTRR_TYPE_WRTHROUGH) ||
1587 (prev_match == MTRR_TYPE_WRTHROUGH &&
1588 curr_match == MTRR_TYPE_WRBACK)) {
1589 prev_match = MTRR_TYPE_WRTHROUGH;
1590 curr_match = MTRR_TYPE_WRTHROUGH;
1591 }
1592
1593 if (prev_match != curr_match)
1594 return MTRR_TYPE_UNCACHABLE;
1595 }
1596
1597 if (prev_match != 0xFF)
1598 return prev_match;
1599
1600 return mtrr_state->def_type;
1601}
1602
1603static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
1604{
1605 u8 mtrr;
1606
1607 mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
1608 (gfn << PAGE_SHIFT) + PAGE_SIZE);
1609 if (mtrr == 0xfe || mtrr == 0xff)
1610 mtrr = MTRR_TYPE_WRBACK;
1611 return mtrr;
1612}
1613
1396static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1614static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1397{ 1615{
1398 unsigned index; 1616 unsigned index;
@@ -1409,9 +1627,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1409 if (s->role.word != sp->role.word) 1627 if (s->role.word != sp->role.word)
1410 return 1; 1628 return 1;
1411 } 1629 }
1412 kvm_mmu_mark_parents_unsync(vcpu, sp);
1413 ++vcpu->kvm->stat.mmu_unsync; 1630 ++vcpu->kvm->stat.mmu_unsync;
1414 sp->unsync = 1; 1631 sp->unsync = 1;
1632
1633 if (sp->global) {
1634 list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
1635 ++vcpu->kvm->stat.mmu_unsync_global;
1636 } else
1637 kvm_mmu_mark_parents_unsync(vcpu, sp);
1638
1415 mmu_convert_notrap(sp); 1639 mmu_convert_notrap(sp);
1416 return 0; 1640 return 0;
1417} 1641}
@@ -1437,11 +1661,24 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
1437static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1661static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1438 unsigned pte_access, int user_fault, 1662 unsigned pte_access, int user_fault,
1439 int write_fault, int dirty, int largepage, 1663 int write_fault, int dirty, int largepage,
1440 gfn_t gfn, pfn_t pfn, bool speculative, 1664 int global, gfn_t gfn, pfn_t pfn, bool speculative,
1441 bool can_unsync) 1665 bool can_unsync)
1442{ 1666{
1443 u64 spte; 1667 u64 spte;
1444 int ret = 0; 1668 int ret = 0;
1669 u64 mt_mask = shadow_mt_mask;
1670 struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
1671
1672 if (!(vcpu->arch.cr4 & X86_CR4_PGE))
1673 global = 0;
1674 if (!global && sp->global) {
1675 sp->global = 0;
1676 if (sp->unsync) {
1677 kvm_unlink_unsync_global(vcpu->kvm, sp);
1678 kvm_mmu_mark_parents_unsync(vcpu, sp);
1679 }
1680 }
1681
1445 /* 1682 /*
1446 * We don't set the accessed bit, since we sometimes want to see 1683 * We don't set the accessed bit, since we sometimes want to see
1447 * whether the guest actually used the pte (in order to detect 1684 * whether the guest actually used the pte (in order to detect
@@ -1460,6 +1697,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1460 spte |= shadow_user_mask; 1697 spte |= shadow_user_mask;
1461 if (largepage) 1698 if (largepage)
1462 spte |= PT_PAGE_SIZE_MASK; 1699 spte |= PT_PAGE_SIZE_MASK;
1700 if (mt_mask) {
1701 mt_mask = get_memory_type(vcpu, gfn) <<
1702 kvm_x86_ops->get_mt_mask_shift();
1703 spte |= mt_mask;
1704 }
1463 1705
1464 spte |= (u64)pfn << PAGE_SHIFT; 1706 spte |= (u64)pfn << PAGE_SHIFT;
1465 1707
@@ -1474,6 +1716,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1474 1716
1475 spte |= PT_WRITABLE_MASK; 1717 spte |= PT_WRITABLE_MASK;
1476 1718
1719 /*
1720 * Optimization: for pte sync, if spte was writable the hash
1721 * lookup is unnecessary (and expensive). Write protection
1722 * is responsibility of mmu_get_page / kvm_sync_page.
1723 * Same reasoning can be applied to dirty page accounting.
1724 */
1725 if (!can_unsync && is_writeble_pte(*shadow_pte))
1726 goto set_pte;
1727
1477 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { 1728 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
1478 pgprintk("%s: found shadow page for %lx, marking ro\n", 1729 pgprintk("%s: found shadow page for %lx, marking ro\n",
1479 __func__, gfn); 1730 __func__, gfn);
@@ -1495,8 +1746,8 @@ set_pte:
1495static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1746static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1496 unsigned pt_access, unsigned pte_access, 1747 unsigned pt_access, unsigned pte_access,
1497 int user_fault, int write_fault, int dirty, 1748 int user_fault, int write_fault, int dirty,
1498 int *ptwrite, int largepage, gfn_t gfn, 1749 int *ptwrite, int largepage, int global,
1499 pfn_t pfn, bool speculative) 1750 gfn_t gfn, pfn_t pfn, bool speculative)
1500{ 1751{
1501 int was_rmapped = 0; 1752 int was_rmapped = 0;
1502 int was_writeble = is_writeble_pte(*shadow_pte); 1753 int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1529,7 +1780,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1529 } 1780 }
1530 } 1781 }
1531 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1782 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
1532 dirty, largepage, gfn, pfn, speculative, true)) { 1783 dirty, largepage, global, gfn, pfn, speculative, true)) {
1533 if (write_fault) 1784 if (write_fault)
1534 *ptwrite = 1; 1785 *ptwrite = 1;
1535 kvm_x86_ops->tlb_flush(vcpu); 1786 kvm_x86_ops->tlb_flush(vcpu);
@@ -1586,7 +1837,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
1586 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { 1837 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
1587 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, 1838 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
1588 0, walk->write, 1, &walk->pt_write, 1839 0, walk->write, 1, &walk->pt_write,
1589 walk->largepage, gfn, walk->pfn, false); 1840 walk->largepage, 0, gfn, walk->pfn, false);
1590 ++vcpu->stat.pf_fixed; 1841 ++vcpu->stat.pf_fixed;
1591 return 1; 1842 return 1;
1592 } 1843 }
@@ -1773,6 +2024,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
1773 } 2024 }
1774} 2025}
1775 2026
2027static void mmu_sync_global(struct kvm_vcpu *vcpu)
2028{
2029 struct kvm *kvm = vcpu->kvm;
2030 struct kvm_mmu_page *sp, *n;
2031
2032 list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
2033 kvm_sync_page(vcpu, sp);
2034}
2035
1776void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) 2036void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
1777{ 2037{
1778 spin_lock(&vcpu->kvm->mmu_lock); 2038 spin_lock(&vcpu->kvm->mmu_lock);
@@ -1780,6 +2040,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
1780 spin_unlock(&vcpu->kvm->mmu_lock); 2040 spin_unlock(&vcpu->kvm->mmu_lock);
1781} 2041}
1782 2042
2043void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
2044{
2045 spin_lock(&vcpu->kvm->mmu_lock);
2046 mmu_sync_global(vcpu);
2047 spin_unlock(&vcpu->kvm->mmu_lock);
2048}
2049
1783static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) 2050static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
1784{ 2051{
1785 return vaddr; 2052 return vaddr;
@@ -2178,7 +2445,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
2178} 2445}
2179 2446
2180void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 2447void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2181 const u8 *new, int bytes) 2448 const u8 *new, int bytes,
2449 bool guest_initiated)
2182{ 2450{
2183 gfn_t gfn = gpa >> PAGE_SHIFT; 2451 gfn_t gfn = gpa >> PAGE_SHIFT;
2184 struct kvm_mmu_page *sp; 2452 struct kvm_mmu_page *sp;
@@ -2204,15 +2472,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2204 kvm_mmu_free_some_pages(vcpu); 2472 kvm_mmu_free_some_pages(vcpu);
2205 ++vcpu->kvm->stat.mmu_pte_write; 2473 ++vcpu->kvm->stat.mmu_pte_write;
2206 kvm_mmu_audit(vcpu, "pre pte write"); 2474 kvm_mmu_audit(vcpu, "pre pte write");
2207 if (gfn == vcpu->arch.last_pt_write_gfn 2475 if (guest_initiated) {
2208 && !last_updated_pte_accessed(vcpu)) { 2476 if (gfn == vcpu->arch.last_pt_write_gfn
2209 ++vcpu->arch.last_pt_write_count; 2477 && !last_updated_pte_accessed(vcpu)) {
2210 if (vcpu->arch.last_pt_write_count >= 3) 2478 ++vcpu->arch.last_pt_write_count;
2211 flooded = 1; 2479 if (vcpu->arch.last_pt_write_count >= 3)
2212 } else { 2480 flooded = 1;
2213 vcpu->arch.last_pt_write_gfn = gfn; 2481 } else {
2214 vcpu->arch.last_pt_write_count = 1; 2482 vcpu->arch.last_pt_write_gfn = gfn;
2215 vcpu->arch.last_pte_updated = NULL; 2483 vcpu->arch.last_pt_write_count = 1;
2484 vcpu->arch.last_pte_updated = NULL;
2485 }
2216 } 2486 }
2217 index = kvm_page_table_hashfn(gfn); 2487 index = kvm_page_table_hashfn(gfn);
2218 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 2488 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@@ -2352,9 +2622,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
2352 2622
2353void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) 2623void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
2354{ 2624{
2355 spin_lock(&vcpu->kvm->mmu_lock);
2356 vcpu->arch.mmu.invlpg(vcpu, gva); 2625 vcpu->arch.mmu.invlpg(vcpu, gva);
2357 spin_unlock(&vcpu->kvm->mmu_lock);
2358 kvm_mmu_flush_tlb(vcpu); 2626 kvm_mmu_flush_tlb(vcpu);
2359 ++vcpu->stat.invlpg; 2627 ++vcpu->stat.invlpg;
2360} 2628}
@@ -2451,7 +2719,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
2451 int i; 2719 int i;
2452 u64 *pt; 2720 u64 *pt;
2453 2721
2454 if (!test_bit(slot, &sp->slot_bitmap)) 2722 if (!test_bit(slot, sp->slot_bitmap))
2455 continue; 2723 continue;
2456 2724
2457 pt = sp->spt; 2725 pt = sp->spt;
@@ -2860,8 +3128,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
2860 if (sp->role.metaphysical) 3128 if (sp->role.metaphysical)
2861 continue; 3129 continue;
2862 3130
2863 slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
2864 gfn = unalias_gfn(vcpu->kvm, sp->gfn); 3131 gfn = unalias_gfn(vcpu->kvm, sp->gfn);
3132 slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
2865 rmapp = &slot->rmap[gfn - slot->base_gfn]; 3133 rmapp = &slot->rmap[gfn - slot->base_gfn];
2866 if (*rmapp) 3134 if (*rmapp)
2867 printk(KERN_ERR "%s: (%s) shadow page has writable" 3135 printk(KERN_ERR "%s: (%s) shadow page has writable"
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 84eee43bbe74..9fd78b6e17ad 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -82,6 +82,7 @@ struct shadow_walker {
82 int *ptwrite; 82 int *ptwrite;
83 pfn_t pfn; 83 pfn_t pfn;
84 u64 *sptep; 84 u64 *sptep;
85 gpa_t pte_gpa;
85}; 86};
86 87
87static gfn_t gpte_to_gfn(pt_element_t gpte) 88static gfn_t gpte_to_gfn(pt_element_t gpte)
@@ -222,7 +223,7 @@ walk:
222 if (ret) 223 if (ret)
223 goto walk; 224 goto walk;
224 pte |= PT_DIRTY_MASK; 225 pte |= PT_DIRTY_MASK;
225 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); 226 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
226 walker->ptes[walker->level - 1] = pte; 227 walker->ptes[walker->level - 1] = pte;
227 } 228 }
228 229
@@ -274,7 +275,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
274 return; 275 return;
275 kvm_get_pfn(pfn); 276 kvm_get_pfn(pfn);
276 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 277 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
277 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), 278 gpte & PT_DIRTY_MASK, NULL, largepage,
279 gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
278 pfn, true); 280 pfn, true);
279} 281}
280 282
@@ -301,8 +303,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
301 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, 303 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
302 sw->user_fault, sw->write_fault, 304 sw->user_fault, sw->write_fault,
303 gw->ptes[gw->level-1] & PT_DIRTY_MASK, 305 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
304 sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, 306 sw->ptwrite, sw->largepage,
305 false); 307 gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
308 gw->gfn, sw->pfn, false);
306 sw->sptep = sptep; 309 sw->sptep = sptep;
307 return 1; 310 return 1;
308 } 311 }
@@ -466,10 +469,22 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
466 struct kvm_vcpu *vcpu, u64 addr, 469 struct kvm_vcpu *vcpu, u64 addr,
467 u64 *sptep, int level) 470 u64 *sptep, int level)
468{ 471{
472 struct shadow_walker *sw =
473 container_of(_sw, struct shadow_walker, walker);
469 474
470 if (level == PT_PAGE_TABLE_LEVEL) { 475 /* FIXME: properly handle invlpg on large guest pages */
471 if (is_shadow_present_pte(*sptep)) 476 if (level == PT_PAGE_TABLE_LEVEL ||
477 ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
478 struct kvm_mmu_page *sp = page_header(__pa(sptep));
479
480 sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
481 sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
482
483 if (is_shadow_present_pte(*sptep)) {
472 rmap_remove(vcpu->kvm, sptep); 484 rmap_remove(vcpu->kvm, sptep);
485 if (is_large_pte(*sptep))
486 --vcpu->kvm->stat.lpages;
487 }
473 set_shadow_pte(sptep, shadow_trap_nonpresent_pte); 488 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
474 return 1; 489 return 1;
475 } 490 }
@@ -480,11 +495,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
480 495
481static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 496static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
482{ 497{
498 pt_element_t gpte;
483 struct shadow_walker walker = { 499 struct shadow_walker walker = {
484 .walker = { .entry = FNAME(shadow_invlpg_entry), }, 500 .walker = { .entry = FNAME(shadow_invlpg_entry), },
501 .pte_gpa = -1,
485 }; 502 };
486 503
504 spin_lock(&vcpu->kvm->mmu_lock);
487 walk_shadow(&walker.walker, vcpu, gva); 505 walk_shadow(&walker.walker, vcpu, gva);
506 spin_unlock(&vcpu->kvm->mmu_lock);
507 if (walker.pte_gpa == -1)
508 return;
509 if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
510 sizeof(pt_element_t)))
511 return;
512 if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
513 if (mmu_topup_memory_caches(vcpu))
514 return;
515 kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
516 sizeof(pt_element_t), 0);
517 }
488} 518}
489 519
490static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 520static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -580,7 +610,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
580 nr_present++; 610 nr_present++;
581 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 611 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
582 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 612 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
583 is_dirty_pte(gpte), 0, gfn, 613 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
584 spte_to_pfn(sp->spt[i]), true, false); 614 spte_to_pfn(sp->spt[i]), true, false);
585 } 615 }
586 616
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9c4ce657d963..1452851ae258 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,8 @@
28 28
29#include <asm/desc.h> 29#include <asm/desc.h>
30 30
31#include <asm/virtext.h>
32
31#define __ex(x) __kvm_handle_fault_on_reboot(x) 33#define __ex(x) __kvm_handle_fault_on_reboot(x)
32 34
33MODULE_AUTHOR("Qumranet"); 35MODULE_AUTHOR("Qumranet");
@@ -245,34 +247,19 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
245 247
246static int has_svm(void) 248static int has_svm(void)
247{ 249{
248 uint32_t eax, ebx, ecx, edx; 250 const char *msg;
249
250 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
251 printk(KERN_INFO "has_svm: not amd\n");
252 return 0;
253 }
254 251
255 cpuid(0x80000000, &eax, &ebx, &ecx, &edx); 252 if (!cpu_has_svm(&msg)) {
256 if (eax < SVM_CPUID_FUNC) { 253 printk(KERN_INFO "has_svn: %s\n", msg);
257 printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
258 return 0; 254 return 0;
259 } 255 }
260 256
261 cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
262 if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
263 printk(KERN_DEBUG "has_svm: svm not available\n");
264 return 0;
265 }
266 return 1; 257 return 1;
267} 258}
268 259
269static void svm_hardware_disable(void *garbage) 260static void svm_hardware_disable(void *garbage)
270{ 261{
271 uint64_t efer; 262 cpu_svm_disable();
272
273 wrmsrl(MSR_VM_HSAVE_PA, 0);
274 rdmsrl(MSR_EFER, efer);
275 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
276} 263}
277 264
278static void svm_hardware_enable(void *garbage) 265static void svm_hardware_enable(void *garbage)
@@ -772,6 +759,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
772 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 759 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
773 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 760 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
774 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 761 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
762
763 /*
764 * SVM always stores 0 for the 'G' bit in the CS selector in
765 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
766 * Intel's VMENTRY has a check on the 'G' bit.
767 */
768 if (seg == VCPU_SREG_CS)
769 var->g = s->limit > 0xfffff;
770
771 /*
772 * Work around a bug where the busy flag in the tr selector
773 * isn't exposed
774 */
775 if (seg == VCPU_SREG_TR)
776 var->type |= 0x2;
777
775 var->unusable = !var->present; 778 var->unusable = !var->present;
776} 779}
777 780
@@ -1099,6 +1102,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1099 rep = (io_info & SVM_IOIO_REP_MASK) != 0; 1102 rep = (io_info & SVM_IOIO_REP_MASK) != 0;
1100 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; 1103 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
1101 1104
1105 skip_emulated_instruction(&svm->vcpu);
1102 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1106 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
1103} 1107}
1104 1108
@@ -1912,6 +1916,11 @@ static int get_npt_level(void)
1912#endif 1916#endif
1913} 1917}
1914 1918
1919static int svm_get_mt_mask_shift(void)
1920{
1921 return 0;
1922}
1923
1915static struct kvm_x86_ops svm_x86_ops = { 1924static struct kvm_x86_ops svm_x86_ops = {
1916 .cpu_has_kvm_support = has_svm, 1925 .cpu_has_kvm_support = has_svm,
1917 .disabled_by_bios = is_disabled, 1926 .disabled_by_bios = is_disabled,
@@ -1967,6 +1976,7 @@ static struct kvm_x86_ops svm_x86_ops = {
1967 1976
1968 .set_tss_addr = svm_set_tss_addr, 1977 .set_tss_addr = svm_set_tss_addr,
1969 .get_tdp_level = get_npt_level, 1978 .get_tdp_level = get_npt_level,
1979 .get_mt_mask_shift = svm_get_mt_mask_shift,
1970}; 1980};
1971 1981
1972static int __init svm_init(void) 1982static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4018b01e1f9..6259d7467648 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -16,7 +16,6 @@
16 */ 16 */
17 17
18#include "irq.h" 18#include "irq.h"
19#include "vmx.h"
20#include "mmu.h" 19#include "mmu.h"
21 20
22#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
@@ -31,6 +30,8 @@
31 30
32#include <asm/io.h> 31#include <asm/io.h>
33#include <asm/desc.h> 32#include <asm/desc.h>
33#include <asm/vmx.h>
34#include <asm/virtext.h>
34 35
35#define __ex(x) __kvm_handle_fault_on_reboot(x) 36#define __ex(x) __kvm_handle_fault_on_reboot(x)
36 37
@@ -90,6 +91,11 @@ struct vcpu_vmx {
90 } rmode; 91 } rmode;
91 int vpid; 92 int vpid;
92 bool emulation_required; 93 bool emulation_required;
94
95 /* Support for vnmi-less CPUs */
96 int soft_vnmi_blocked;
97 ktime_t entry_time;
98 s64 vnmi_blocked_time;
93}; 99};
94 100
95static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 101static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -122,7 +128,7 @@ static struct vmcs_config {
122 u32 vmentry_ctrl; 128 u32 vmentry_ctrl;
123} vmcs_config; 129} vmcs_config;
124 130
125struct vmx_capability { 131static struct vmx_capability {
126 u32 ept; 132 u32 ept;
127 u32 vpid; 133 u32 vpid;
128} vmx_capability; 134} vmx_capability;
@@ -957,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
957 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); 963 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
958 964
959 break; 965 break;
966 case MSR_IA32_CR_PAT:
967 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
968 vmcs_write64(GUEST_IA32_PAT, data);
969 vcpu->arch.pat = data;
970 break;
971 }
972 /* Otherwise falls through to kvm_set_msr_common */
960 default: 973 default:
961 vmx_load_host_state(vmx); 974 vmx_load_host_state(vmx);
962 msr = find_msr_entry(vmx, msr_index); 975 msr = find_msr_entry(vmx, msr_index);
@@ -1032,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
1032 1045
1033static __init int cpu_has_kvm_support(void) 1046static __init int cpu_has_kvm_support(void)
1034{ 1047{
1035 unsigned long ecx = cpuid_ecx(1); 1048 return cpu_has_vmx();
1036 return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
1037} 1049}
1038 1050
1039static __init int vmx_disabled_by_bios(void) 1051static __init int vmx_disabled_by_bios(void)
@@ -1079,13 +1091,22 @@ static void vmclear_local_vcpus(void)
1079 __vcpu_clear(vmx); 1091 __vcpu_clear(vmx);
1080} 1092}
1081 1093
1082static void hardware_disable(void *garbage) 1094
1095/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
1096 * tricks.
1097 */
1098static void kvm_cpu_vmxoff(void)
1083{ 1099{
1084 vmclear_local_vcpus();
1085 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 1100 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
1086 write_cr4(read_cr4() & ~X86_CR4_VMXE); 1101 write_cr4(read_cr4() & ~X86_CR4_VMXE);
1087} 1102}
1088 1103
1104static void hardware_disable(void *garbage)
1105{
1106 vmclear_local_vcpus();
1107 kvm_cpu_vmxoff();
1108}
1109
1089static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, 1110static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
1090 u32 msr, u32 *result) 1111 u32 msr, u32 *result)
1091{ 1112{
@@ -1176,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1176#ifdef CONFIG_X86_64 1197#ifdef CONFIG_X86_64
1177 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 1198 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
1178#endif 1199#endif
1179 opt = 0; 1200 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
1180 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 1201 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
1181 &_vmexit_control) < 0) 1202 &_vmexit_control) < 0)
1182 return -EIO; 1203 return -EIO;
1183 1204
1184 min = opt = 0; 1205 min = 0;
1206 opt = VM_ENTRY_LOAD_IA32_PAT;
1185 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 1207 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
1186 &_vmentry_control) < 0) 1208 &_vmentry_control) < 0)
1187 return -EIO; 1209 return -EIO;
@@ -2087,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
2087 */ 2109 */
2088static int vmx_vcpu_setup(struct vcpu_vmx *vmx) 2110static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2089{ 2111{
2090 u32 host_sysenter_cs; 2112 u32 host_sysenter_cs, msr_low, msr_high;
2091 u32 junk; 2113 u32 junk;
2114 u64 host_pat;
2092 unsigned long a; 2115 unsigned long a;
2093 struct descriptor_table dt; 2116 struct descriptor_table dt;
2094 int i; 2117 int i;
@@ -2176,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2176 rdmsrl(MSR_IA32_SYSENTER_EIP, a); 2199 rdmsrl(MSR_IA32_SYSENTER_EIP, a);
2177 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ 2200 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
2178 2201
2202 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
2203 rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
2204 host_pat = msr_low | ((u64) msr_high << 32);
2205 vmcs_write64(HOST_IA32_PAT, host_pat);
2206 }
2207 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2208 rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
2209 host_pat = msr_low | ((u64) msr_high << 32);
2210 /* Write the default value follow host pat */
2211 vmcs_write64(GUEST_IA32_PAT, host_pat);
2212 /* Keep arch.pat sync with GUEST_IA32_PAT */
2213 vmx->vcpu.arch.pat = host_pat;
2214 }
2215
2179 for (i = 0; i < NR_VMX_MSR; ++i) { 2216 for (i = 0; i < NR_VMX_MSR; ++i) {
2180 u32 index = vmx_msr_index[i]; 2217 u32 index = vmx_msr_index[i];
2181 u32 data_low, data_high; 2218 u32 data_low, data_high;
@@ -2230,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2230 2267
2231 vmx->vcpu.arch.rmode.active = 0; 2268 vmx->vcpu.arch.rmode.active = 0;
2232 2269
2270 vmx->soft_vnmi_blocked = 0;
2271
2233 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 2272 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
2234 kvm_set_cr8(&vmx->vcpu, 0); 2273 kvm_set_cr8(&vmx->vcpu, 0);
2235 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 2274 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2335,6 +2374,29 @@ out:
2335 return ret; 2374 return ret;
2336} 2375}
2337 2376
2377static void enable_irq_window(struct kvm_vcpu *vcpu)
2378{
2379 u32 cpu_based_vm_exec_control;
2380
2381 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2382 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2383 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2384}
2385
2386static void enable_nmi_window(struct kvm_vcpu *vcpu)
2387{
2388 u32 cpu_based_vm_exec_control;
2389
2390 if (!cpu_has_virtual_nmis()) {
2391 enable_irq_window(vcpu);
2392 return;
2393 }
2394
2395 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2396 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
2397 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2398}
2399
2338static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) 2400static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2339{ 2401{
2340 struct vcpu_vmx *vmx = to_vmx(vcpu); 2402 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2358,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2358 2420
2359static void vmx_inject_nmi(struct kvm_vcpu *vcpu) 2421static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2360{ 2422{
2423 struct vcpu_vmx *vmx = to_vmx(vcpu);
2424
2425 if (!cpu_has_virtual_nmis()) {
2426 /*
2427 * Tracking the NMI-blocked state in software is built upon
2428 * finding the next open IRQ window. This, in turn, depends on
2429 * well-behaving guests: They have to keep IRQs disabled at
2430 * least as long as the NMI handler runs. Otherwise we may
2431 * cause NMI nesting, maybe breaking the guest. But as this is
2432 * highly unlikely, we can live with the residual risk.
2433 */
2434 vmx->soft_vnmi_blocked = 1;
2435 vmx->vnmi_blocked_time = 0;
2436 }
2437
2438 ++vcpu->stat.nmi_injections;
2439 if (vcpu->arch.rmode.active) {
2440 vmx->rmode.irq.pending = true;
2441 vmx->rmode.irq.vector = NMI_VECTOR;
2442 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
2443 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2444 NMI_VECTOR | INTR_TYPE_SOFT_INTR |
2445 INTR_INFO_VALID_MASK);
2446 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
2447 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
2448 return;
2449 }
2361 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2450 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2362 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 2451 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
2363} 2452}
2364 2453
2454static void vmx_update_window_states(struct kvm_vcpu *vcpu)
2455{
2456 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2457
2458 vcpu->arch.nmi_window_open =
2459 !(guest_intr & (GUEST_INTR_STATE_STI |
2460 GUEST_INTR_STATE_MOV_SS |
2461 GUEST_INTR_STATE_NMI));
2462 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
2463 vcpu->arch.nmi_window_open = 0;
2464
2465 vcpu->arch.interrupt_window_open =
2466 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2467 !(guest_intr & (GUEST_INTR_STATE_STI |
2468 GUEST_INTR_STATE_MOV_SS)));
2469}
2470
2365static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 2471static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2366{ 2472{
2367 int word_index = __ffs(vcpu->arch.irq_summary); 2473 int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2374,40 +2480,49 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2374 kvm_queue_interrupt(vcpu, irq); 2480 kvm_queue_interrupt(vcpu, irq);
2375} 2481}
2376 2482
2377
2378static void do_interrupt_requests(struct kvm_vcpu *vcpu, 2483static void do_interrupt_requests(struct kvm_vcpu *vcpu,
2379 struct kvm_run *kvm_run) 2484 struct kvm_run *kvm_run)
2380{ 2485{
2381 u32 cpu_based_vm_exec_control; 2486 vmx_update_window_states(vcpu);
2382
2383 vcpu->arch.interrupt_window_open =
2384 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2385 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
2386 2487
2387 if (vcpu->arch.interrupt_window_open && 2488 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
2388 vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) 2489 if (vcpu->arch.interrupt.pending) {
2389 kvm_do_inject_irq(vcpu); 2490 enable_nmi_window(vcpu);
2491 } else if (vcpu->arch.nmi_window_open) {
2492 vcpu->arch.nmi_pending = false;
2493 vcpu->arch.nmi_injected = true;
2494 } else {
2495 enable_nmi_window(vcpu);
2496 return;
2497 }
2498 }
2499 if (vcpu->arch.nmi_injected) {
2500 vmx_inject_nmi(vcpu);
2501 if (vcpu->arch.nmi_pending)
2502 enable_nmi_window(vcpu);
2503 else if (vcpu->arch.irq_summary
2504 || kvm_run->request_interrupt_window)
2505 enable_irq_window(vcpu);
2506 return;
2507 }
2390 2508
2391 if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) 2509 if (vcpu->arch.interrupt_window_open) {
2392 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 2510 if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
2511 kvm_do_inject_irq(vcpu);
2393 2512
2394 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 2513 if (vcpu->arch.interrupt.pending)
2514 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
2515 }
2395 if (!vcpu->arch.interrupt_window_open && 2516 if (!vcpu->arch.interrupt_window_open &&
2396 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) 2517 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
2397 /* 2518 enable_irq_window(vcpu);
2398 * Interrupts blocked. Wait for unblock.
2399 */
2400 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2401 else
2402 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2403 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2404} 2519}
2405 2520
2406static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) 2521static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
2407{ 2522{
2408 int ret; 2523 int ret;
2409 struct kvm_userspace_memory_region tss_mem = { 2524 struct kvm_userspace_memory_region tss_mem = {
2410 .slot = 8, 2525 .slot = TSS_PRIVATE_MEMSLOT,
2411 .guest_phys_addr = addr, 2526 .guest_phys_addr = addr,
2412 .memory_size = PAGE_SIZE * 3, 2527 .memory_size = PAGE_SIZE * 3,
2413 .flags = 0, 2528 .flags = 0,
@@ -2492,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2492 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); 2607 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
2493 } 2608 }
2494 2609
2495 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ 2610 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
2496 return 1; /* already handled by vmx_vcpu_run() */ 2611 return 1; /* already handled by vmx_vcpu_run() */
2497 2612
2498 if (is_no_device(intr_info)) { 2613 if (is_no_device(intr_info)) {
@@ -2581,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2581 rep = (exit_qualification & 32) != 0; 2696 rep = (exit_qualification & 32) != 0;
2582 port = exit_qualification >> 16; 2697 port = exit_qualification >> 16;
2583 2698
2699 skip_emulated_instruction(vcpu);
2584 return kvm_emulate_pio(vcpu, kvm_run, in, size, port); 2700 return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
2585} 2701}
2586 2702
@@ -2767,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2767 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2883 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2768 2884
2769 KVMTRACE_0D(PEND_INTR, vcpu, handler); 2885 KVMTRACE_0D(PEND_INTR, vcpu, handler);
2886 ++vcpu->stat.irq_window_exits;
2770 2887
2771 /* 2888 /*
2772 * If the user space waits to inject interrupts, exit as soon as 2889 * If the user space waits to inject interrupts, exit as soon as
@@ -2775,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2775 if (kvm_run->request_interrupt_window && 2892 if (kvm_run->request_interrupt_window &&
2776 !vcpu->arch.irq_summary) { 2893 !vcpu->arch.irq_summary) {
2777 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 2894 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
2778 ++vcpu->stat.irq_window_exits;
2779 return 0; 2895 return 0;
2780 } 2896 }
2781 return 1; 2897 return 1;
@@ -2832,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2832 2948
2833static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2949static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2834{ 2950{
2951 struct vcpu_vmx *vmx = to_vmx(vcpu);
2835 unsigned long exit_qualification; 2952 unsigned long exit_qualification;
2836 u16 tss_selector; 2953 u16 tss_selector;
2837 int reason; 2954 int reason;
@@ -2839,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2839 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 2956 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2840 2957
2841 reason = (u32)exit_qualification >> 30; 2958 reason = (u32)exit_qualification >> 30;
2959 if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
2960 (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
2961 (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
2962 == INTR_TYPE_NMI_INTR) {
2963 vcpu->arch.nmi_injected = false;
2964 if (cpu_has_virtual_nmis())
2965 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
2966 GUEST_INTR_STATE_NMI);
2967 }
2842 tss_selector = exit_qualification; 2968 tss_selector = exit_qualification;
2843 2969
2844 return kvm_task_switch(vcpu, tss_selector, reason); 2970 return kvm_task_switch(vcpu, tss_selector, reason);
@@ -2927,16 +3053,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
2927 while (!guest_state_valid(vcpu)) { 3053 while (!guest_state_valid(vcpu)) {
2928 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3054 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2929 3055
2930 switch (err) { 3056 if (err == EMULATE_DO_MMIO)
2931 case EMULATE_DONE: 3057 break;
2932 break; 3058
2933 case EMULATE_DO_MMIO: 3059 if (err != EMULATE_DONE) {
2934 kvm_report_emulation_failure(vcpu, "mmio"); 3060 kvm_report_emulation_failure(vcpu, "emulation failure");
2935 /* TODO: Handle MMIO */ 3061 return;
2936 return;
2937 default:
2938 kvm_report_emulation_failure(vcpu, "emulation failure");
2939 return;
2940 } 3062 }
2941 3063
2942 if (signal_pending(current)) 3064 if (signal_pending(current))
@@ -2948,8 +3070,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
2948 local_irq_disable(); 3070 local_irq_disable();
2949 preempt_disable(); 3071 preempt_disable();
2950 3072
2951 /* Guest state should be valid now, no more emulation should be needed */ 3073 /* Guest state should be valid now except if we need to
2952 vmx->emulation_required = 0; 3074 * emulate an MMIO */
3075 if (guest_state_valid(vcpu))
3076 vmx->emulation_required = 0;
2953} 3077}
2954 3078
2955/* 3079/*
@@ -2996,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2996 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), 3120 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
2997 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); 3121 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
2998 3122
3123 /* If we need to emulate an MMIO from handle_invalid_guest_state
3124 * we just return 0 */
3125 if (vmx->emulation_required && emulate_invalid_guest_state)
3126 return 0;
3127
2999 /* Access CR3 don't cause VMExit in paging mode, so we need 3128 /* Access CR3 don't cause VMExit in paging mode, so we need
3000 * to sync with guest real CR3. */ 3129 * to sync with guest real CR3. */
3001 if (vm_need_ept() && is_paging(vcpu)) { 3130 if (vm_need_ept() && is_paging(vcpu)) {
@@ -3012,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3012 3141
3013 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 3142 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
3014 (exit_reason != EXIT_REASON_EXCEPTION_NMI && 3143 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
3015 exit_reason != EXIT_REASON_EPT_VIOLATION)) 3144 exit_reason != EXIT_REASON_EPT_VIOLATION &&
3016 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 3145 exit_reason != EXIT_REASON_TASK_SWITCH))
3017 "exit reason is 0x%x\n", __func__, exit_reason); 3146 printk(KERN_WARNING "%s: unexpected, valid vectoring info "
3147 "(0x%x) and exit reason is 0x%x\n",
3148 __func__, vectoring_info, exit_reason);
3149
3150 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
3151 if (vcpu->arch.interrupt_window_open) {
3152 vmx->soft_vnmi_blocked = 0;
3153 vcpu->arch.nmi_window_open = 1;
3154 } else if (vmx->vnmi_blocked_time > 1000000000LL &&
3155 vcpu->arch.nmi_pending) {
3156 /*
3157 * This CPU don't support us in finding the end of an
3158 * NMI-blocked window if the guest runs with IRQs
3159 * disabled. So we pull the trigger after 1 s of
3160 * futile waiting, but inform the user about this.
3161 */
3162 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
3163 "state on VCPU %d after 1 s timeout\n",
3164 __func__, vcpu->vcpu_id);
3165 vmx->soft_vnmi_blocked = 0;
3166 vmx->vcpu.arch.nmi_window_open = 1;
3167 }
3168 }
3169
3018 if (exit_reason < kvm_vmx_max_exit_handlers 3170 if (exit_reason < kvm_vmx_max_exit_handlers
3019 && kvm_vmx_exit_handlers[exit_reason]) 3171 && kvm_vmx_exit_handlers[exit_reason])
3020 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); 3172 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3042,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
3042 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); 3194 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
3043} 3195}
3044 3196
3045static void enable_irq_window(struct kvm_vcpu *vcpu)
3046{
3047 u32 cpu_based_vm_exec_control;
3048
3049 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
3050 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
3051 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
3052}
3053
3054static void enable_nmi_window(struct kvm_vcpu *vcpu)
3055{
3056 u32 cpu_based_vm_exec_control;
3057
3058 if (!cpu_has_virtual_nmis())
3059 return;
3060
3061 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
3062 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
3063 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
3064}
3065
3066static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
3067{
3068 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3069 return !(guest_intr & (GUEST_INTR_STATE_NMI |
3070 GUEST_INTR_STATE_MOV_SS |
3071 GUEST_INTR_STATE_STI));
3072}
3073
3074static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
3075{
3076 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3077 return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
3078 GUEST_INTR_STATE_STI)) &&
3079 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
3080}
3081
3082static void enable_intr_window(struct kvm_vcpu *vcpu)
3083{
3084 if (vcpu->arch.nmi_pending)
3085 enable_nmi_window(vcpu);
3086 else if (kvm_cpu_has_interrupt(vcpu))
3087 enable_irq_window(vcpu);
3088}
3089
3090static void vmx_complete_interrupts(struct vcpu_vmx *vmx) 3197static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3091{ 3198{
3092 u32 exit_intr_info; 3199 u32 exit_intr_info;
@@ -3109,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3109 if (unblock_nmi && vector != DF_VECTOR) 3216 if (unblock_nmi && vector != DF_VECTOR)
3110 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3217 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3111 GUEST_INTR_STATE_NMI); 3218 GUEST_INTR_STATE_NMI);
3112 } 3219 } else if (unlikely(vmx->soft_vnmi_blocked))
3220 vmx->vnmi_blocked_time +=
3221 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
3113 3222
3114 idt_vectoring_info = vmx->idt_vectoring_info; 3223 idt_vectoring_info = vmx->idt_vectoring_info;
3115 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 3224 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3147,26 +3256,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3147{ 3256{
3148 update_tpr_threshold(vcpu); 3257 update_tpr_threshold(vcpu);
3149 3258
3150 if (cpu_has_virtual_nmis()) { 3259 vmx_update_window_states(vcpu);
3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3260
3152 if (vcpu->arch.interrupt.pending) { 3261 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3153 enable_nmi_window(vcpu); 3262 if (vcpu->arch.interrupt.pending) {
3154 } else if (vmx_nmi_enabled(vcpu)) { 3263 enable_nmi_window(vcpu);
3155 vcpu->arch.nmi_pending = false; 3264 } else if (vcpu->arch.nmi_window_open) {
3156 vcpu->arch.nmi_injected = true; 3265 vcpu->arch.nmi_pending = false;
3157 } else { 3266 vcpu->arch.nmi_injected = true;
3158 enable_intr_window(vcpu); 3267 } else {
3159 return; 3268 enable_nmi_window(vcpu);
3160 }
3161 }
3162 if (vcpu->arch.nmi_injected) {
3163 vmx_inject_nmi(vcpu);
3164 enable_intr_window(vcpu);
3165 return; 3269 return;
3166 } 3270 }
3167 } 3271 }
3272 if (vcpu->arch.nmi_injected) {
3273 vmx_inject_nmi(vcpu);
3274 if (vcpu->arch.nmi_pending)
3275 enable_nmi_window(vcpu);
3276 else if (kvm_cpu_has_interrupt(vcpu))
3277 enable_irq_window(vcpu);
3278 return;
3279 }
3168 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { 3280 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
3169 if (vmx_irq_enabled(vcpu)) 3281 if (vcpu->arch.interrupt_window_open)
3170 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); 3282 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
3171 else 3283 else
3172 enable_irq_window(vcpu); 3284 enable_irq_window(vcpu);
@@ -3174,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3174 if (vcpu->arch.interrupt.pending) { 3286 if (vcpu->arch.interrupt.pending) {
3175 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 3287 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
3176 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); 3288 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
3289 if (kvm_cpu_has_interrupt(vcpu))
3290 enable_irq_window(vcpu);
3177 } 3291 }
3178} 3292}
3179 3293
@@ -3213,6 +3327,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3213 struct vcpu_vmx *vmx = to_vmx(vcpu); 3327 struct vcpu_vmx *vmx = to_vmx(vcpu);
3214 u32 intr_info; 3328 u32 intr_info;
3215 3329
3330 /* Record the guest's net vcpu time for enforced NMI injections. */
3331 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
3332 vmx->entry_time = ktime_get();
3333
3216 /* Handle invalid guest state instead of entering VMX */ 3334 /* Handle invalid guest state instead of entering VMX */
3217 if (vmx->emulation_required && emulate_invalid_guest_state) { 3335 if (vmx->emulation_required && emulate_invalid_guest_state) {
3218 handle_invalid_guest_state(vcpu, kvm_run); 3336 handle_invalid_guest_state(vcpu, kvm_run);
@@ -3327,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3327 if (vmx->rmode.irq.pending) 3445 if (vmx->rmode.irq.pending)
3328 fixup_rmode_irq(vmx); 3446 fixup_rmode_irq(vmx);
3329 3447
3330 vcpu->arch.interrupt_window_open = 3448 vmx_update_window_states(vcpu);
3331 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
3332 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
3333 3449
3334 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 3450 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
3335 vmx->launched = 1; 3451 vmx->launched = 1;
@@ -3337,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3337 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3453 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
3338 3454
3339 /* We need to handle NMIs before interrupts are enabled */ 3455 /* We need to handle NMIs before interrupts are enabled */
3340 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && 3456 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
3341 (intr_info & INTR_INFO_VALID_MASK)) { 3457 (intr_info & INTR_INFO_VALID_MASK)) {
3342 KVMTRACE_0D(NMI, vcpu, handler); 3458 KVMTRACE_0D(NMI, vcpu, handler);
3343 asm("int $2"); 3459 asm("int $2");
@@ -3455,6 +3571,11 @@ static int get_ept_level(void)
3455 return VMX_EPT_DEFAULT_GAW + 1; 3571 return VMX_EPT_DEFAULT_GAW + 1;
3456} 3572}
3457 3573
3574static int vmx_get_mt_mask_shift(void)
3575{
3576 return VMX_EPT_MT_EPTE_SHIFT;
3577}
3578
3458static struct kvm_x86_ops vmx_x86_ops = { 3579static struct kvm_x86_ops vmx_x86_ops = {
3459 .cpu_has_kvm_support = cpu_has_kvm_support, 3580 .cpu_has_kvm_support = cpu_has_kvm_support,
3460 .disabled_by_bios = vmx_disabled_by_bios, 3581 .disabled_by_bios = vmx_disabled_by_bios,
@@ -3510,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
3510 3631
3511 .set_tss_addr = vmx_set_tss_addr, 3632 .set_tss_addr = vmx_set_tss_addr,
3512 .get_tdp_level = get_ept_level, 3633 .get_tdp_level = get_ept_level,
3634 .get_mt_mask_shift = vmx_get_mt_mask_shift,
3513}; 3635};
3514 3636
3515static int __init vmx_init(void) 3637static int __init vmx_init(void)
@@ -3566,10 +3688,10 @@ static int __init vmx_init(void)
3566 bypass_guest_pf = 0; 3688 bypass_guest_pf = 0;
3567 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3568 VMX_EPT_WRITABLE_MASK | 3690 VMX_EPT_WRITABLE_MASK |
3569 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
3570 VMX_EPT_IGMT_BIT); 3691 VMX_EPT_IGMT_BIT);
3571 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3692 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3572 VMX_EPT_EXECUTABLE_MASK); 3693 VMX_EPT_EXECUTABLE_MASK,
3694 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3573 kvm_enable_tdp(); 3695 kvm_enable_tdp();
3574 } else 3696 } else
3575 kvm_disable_tdp(); 3697 kvm_disable_tdp();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1f8ff2f1fa2..cc17546a2406 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -34,11 +34,13 @@
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/mman.h> 35#include <linux/mman.h>
36#include <linux/highmem.h> 36#include <linux/highmem.h>
37#include <linux/iommu.h>
37#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40#include <asm/msr.h> 41#include <asm/msr.h>
41#include <asm/desc.h> 42#include <asm/desc.h>
43#include <asm/mtrr.h>
42 44
43#define MAX_IO_MSRS 256 45#define MAX_IO_MSRS 256
44#define CR0_RESERVED_BITS \ 46#define CR0_RESERVED_BITS \
@@ -86,6 +88,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
86 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 88 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
87 { "hypercalls", VCPU_STAT(hypercalls) }, 89 { "hypercalls", VCPU_STAT(hypercalls) },
88 { "request_irq", VCPU_STAT(request_irq_exits) }, 90 { "request_irq", VCPU_STAT(request_irq_exits) },
91 { "request_nmi", VCPU_STAT(request_nmi_exits) },
89 { "irq_exits", VCPU_STAT(irq_exits) }, 92 { "irq_exits", VCPU_STAT(irq_exits) },
90 { "host_state_reload", VCPU_STAT(host_state_reload) }, 93 { "host_state_reload", VCPU_STAT(host_state_reload) },
91 { "efer_reload", VCPU_STAT(efer_reload) }, 94 { "efer_reload", VCPU_STAT(efer_reload) },
@@ -93,6 +96,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
93 { "insn_emulation", VCPU_STAT(insn_emulation) }, 96 { "insn_emulation", VCPU_STAT(insn_emulation) },
94 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, 97 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
95 { "irq_injections", VCPU_STAT(irq_injections) }, 98 { "irq_injections", VCPU_STAT(irq_injections) },
99 { "nmi_injections", VCPU_STAT(nmi_injections) },
96 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, 100 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
97 { "mmu_pte_write", VM_STAT(mmu_pte_write) }, 101 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
98 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, 102 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -101,6 +105,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
101 { "mmu_recycled", VM_STAT(mmu_recycled) }, 105 { "mmu_recycled", VM_STAT(mmu_recycled) },
102 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 106 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
103 { "mmu_unsync", VM_STAT(mmu_unsync) }, 107 { "mmu_unsync", VM_STAT(mmu_unsync) },
108 { "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
104 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 109 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
105 { "largepages", VM_STAT(lpages) }, 110 { "largepages", VM_STAT(lpages) },
106 { NULL } 111 { NULL }
@@ -312,6 +317,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
312 kvm_x86_ops->set_cr0(vcpu, cr0); 317 kvm_x86_ops->set_cr0(vcpu, cr0);
313 vcpu->arch.cr0 = cr0; 318 vcpu->arch.cr0 = cr0;
314 319
320 kvm_mmu_sync_global(vcpu);
315 kvm_mmu_reset_context(vcpu); 321 kvm_mmu_reset_context(vcpu);
316 return; 322 return;
317} 323}
@@ -355,6 +361,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
355 } 361 }
356 kvm_x86_ops->set_cr4(vcpu, cr4); 362 kvm_x86_ops->set_cr4(vcpu, cr4);
357 vcpu->arch.cr4 = cr4; 363 vcpu->arch.cr4 = cr4;
364 kvm_mmu_sync_global(vcpu);
358 kvm_mmu_reset_context(vcpu); 365 kvm_mmu_reset_context(vcpu);
359} 366}
360EXPORT_SYMBOL_GPL(kvm_set_cr4); 367EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -449,7 +456,7 @@ static u32 msrs_to_save[] = {
449 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 456 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
450#endif 457#endif
451 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 458 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
452 MSR_IA32_PERF_STATUS, 459 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
453}; 460};
454 461
455static unsigned num_msrs_to_save; 462static unsigned num_msrs_to_save;
@@ -648,10 +655,38 @@ static bool msr_mtrr_valid(unsigned msr)
648 655
649static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 656static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
650{ 657{
658 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
659
651 if (!msr_mtrr_valid(msr)) 660 if (!msr_mtrr_valid(msr))
652 return 1; 661 return 1;
653 662
654 vcpu->arch.mtrr[msr - 0x200] = data; 663 if (msr == MSR_MTRRdefType) {
664 vcpu->arch.mtrr_state.def_type = data;
665 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
666 } else if (msr == MSR_MTRRfix64K_00000)
667 p[0] = data;
668 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
669 p[1 + msr - MSR_MTRRfix16K_80000] = data;
670 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
671 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
672 else if (msr == MSR_IA32_CR_PAT)
673 vcpu->arch.pat = data;
674 else { /* Variable MTRRs */
675 int idx, is_mtrr_mask;
676 u64 *pt;
677
678 idx = (msr - 0x200) / 2;
679 is_mtrr_mask = msr - 0x200 - 2 * idx;
680 if (!is_mtrr_mask)
681 pt =
682 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
683 else
684 pt =
685 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
686 *pt = data;
687 }
688
689 kvm_mmu_reset_context(vcpu);
655 return 0; 690 return 0;
656} 691}
657 692
@@ -747,10 +782,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
747 782
748static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 783static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
749{ 784{
785 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
786
750 if (!msr_mtrr_valid(msr)) 787 if (!msr_mtrr_valid(msr))
751 return 1; 788 return 1;
752 789
753 *pdata = vcpu->arch.mtrr[msr - 0x200]; 790 if (msr == MSR_MTRRdefType)
791 *pdata = vcpu->arch.mtrr_state.def_type +
792 (vcpu->arch.mtrr_state.enabled << 10);
793 else if (msr == MSR_MTRRfix64K_00000)
794 *pdata = p[0];
795 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
796 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
797 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
798 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
799 else if (msr == MSR_IA32_CR_PAT)
800 *pdata = vcpu->arch.pat;
801 else { /* Variable MTRRs */
802 int idx, is_mtrr_mask;
803 u64 *pt;
804
805 idx = (msr - 0x200) / 2;
806 is_mtrr_mask = msr - 0x200 - 2 * idx;
807 if (!is_mtrr_mask)
808 pt =
809 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
810 else
811 pt =
812 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
813 *pdata = *pt;
814 }
815
754 return 0; 816 return 0;
755} 817}
756 818
@@ -903,7 +965,6 @@ int kvm_dev_ioctl_check_extension(long ext)
903 case KVM_CAP_IRQCHIP: 965 case KVM_CAP_IRQCHIP:
904 case KVM_CAP_HLT: 966 case KVM_CAP_HLT:
905 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
906 case KVM_CAP_USER_MEMORY:
907 case KVM_CAP_SET_TSS_ADDR: 968 case KVM_CAP_SET_TSS_ADDR:
908 case KVM_CAP_EXT_CPUID: 969 case KVM_CAP_EXT_CPUID:
909 case KVM_CAP_CLOCKSOURCE: 970 case KVM_CAP_CLOCKSOURCE:
@@ -929,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext)
929 r = !tdp_enabled; 990 r = !tdp_enabled;
930 break; 991 break;
931 case KVM_CAP_IOMMU: 992 case KVM_CAP_IOMMU:
932 r = intel_iommu_found(); 993 r = iommu_found();
933 break; 994 break;
934 default: 995 default:
935 r = 0; 996 r = 0;
@@ -1188,6 +1249,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1188 int t, times = entry->eax & 0xff; 1249 int t, times = entry->eax & 0xff;
1189 1250
1190 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1251 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
1252 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
1191 for (t = 1; t < times && *nent < maxnent; ++t) { 1253 for (t = 1; t < times && *nent < maxnent; ++t) {
1192 do_cpuid_1_ent(&entry[t], function, 0); 1254 do_cpuid_1_ent(&entry[t], function, 0);
1193 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1255 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -1218,7 +1280,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1218 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1280 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1219 /* read more entries until level_type is zero */ 1281 /* read more entries until level_type is zero */
1220 for (i = 1; *nent < maxnent; ++i) { 1282 for (i = 1; *nent < maxnent; ++i) {
1221 level_type = entry[i - 1].ecx & 0xff; 1283 level_type = entry[i - 1].ecx & 0xff00;
1222 if (!level_type) 1284 if (!level_type)
1223 break; 1285 break;
1224 do_cpuid_1_ent(&entry[i], function, i); 1286 do_cpuid_1_ent(&entry[i], function, i);
@@ -1318,6 +1380,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
1318 return 0; 1380 return 0;
1319} 1381}
1320 1382
1383static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
1384{
1385 vcpu_load(vcpu);
1386 kvm_inject_nmi(vcpu);
1387 vcpu_put(vcpu);
1388
1389 return 0;
1390}
1391
1321static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, 1392static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
1322 struct kvm_tpr_access_ctl *tac) 1393 struct kvm_tpr_access_ctl *tac)
1323{ 1394{
@@ -1377,6 +1448,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1377 r = 0; 1448 r = 0;
1378 break; 1449 break;
1379 } 1450 }
1451 case KVM_NMI: {
1452 r = kvm_vcpu_ioctl_nmi(vcpu);
1453 if (r)
1454 goto out;
1455 r = 0;
1456 break;
1457 }
1380 case KVM_SET_CPUID: { 1458 case KVM_SET_CPUID: {
1381 struct kvm_cpuid __user *cpuid_arg = argp; 1459 struct kvm_cpuid __user *cpuid_arg = argp;
1382 struct kvm_cpuid cpuid; 1460 struct kvm_cpuid cpuid;
@@ -1968,7 +2046,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1968 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 2046 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
1969 if (ret < 0) 2047 if (ret < 0)
1970 return 0; 2048 return 0;
1971 kvm_mmu_pte_write(vcpu, gpa, val, bytes); 2049 kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
1972 return 1; 2050 return 1;
1973} 2051}
1974 2052
@@ -2404,8 +2482,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2404 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 2482 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2405 memcpy(vcpu->arch.pio_data, &val, 4); 2483 memcpy(vcpu->arch.pio_data, &val, 4);
2406 2484
2407 kvm_x86_ops->skip_emulated_instruction(vcpu);
2408
2409 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); 2485 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
2410 if (pio_dev) { 2486 if (pio_dev) {
2411 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); 2487 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
@@ -2541,7 +2617,7 @@ int kvm_arch_init(void *opaque)
2541 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 2617 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
2542 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 2618 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2543 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 2619 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2544 PT_DIRTY_MASK, PT64_NX_MASK, 0); 2620 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
2545 return 0; 2621 return 0;
2546 2622
2547out: 2623out:
@@ -2729,7 +2805,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
2729 2805
2730 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; 2806 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
2731 /* when no next entry is found, the current entry[i] is reselected */ 2807 /* when no next entry is found, the current entry[i] is reselected */
2732 for (j = i + 1; j == i; j = (j + 1) % nent) { 2808 for (j = i + 1; ; j = (j + 1) % nent) {
2733 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; 2809 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
2734 if (ej->function == e->function) { 2810 if (ej->function == e->function) {
2735 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; 2811 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
@@ -2973,7 +3049,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2973 pr_debug("vcpu %d received sipi with vector # %x\n", 3049 pr_debug("vcpu %d received sipi with vector # %x\n",
2974 vcpu->vcpu_id, vcpu->arch.sipi_vector); 3050 vcpu->vcpu_id, vcpu->arch.sipi_vector);
2975 kvm_lapic_reset(vcpu); 3051 kvm_lapic_reset(vcpu);
2976 r = kvm_x86_ops->vcpu_reset(vcpu); 3052 r = kvm_arch_vcpu_reset(vcpu);
2977 if (r) 3053 if (r)
2978 return r; 3054 return r;
2979 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 3055 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -3275,9 +3351,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3275 kvm_desct->padding = 0; 3351 kvm_desct->padding = 0;
3276} 3352}
3277 3353
3278static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, 3354static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
3279 u16 selector, 3355 u16 selector,
3280 struct descriptor_table *dtable) 3356 struct descriptor_table *dtable)
3281{ 3357{
3282 if (selector & 1 << 2) { 3358 if (selector & 1 << 2) {
3283 struct kvm_segment kvm_seg; 3359 struct kvm_segment kvm_seg;
@@ -3302,7 +3378,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3302 struct descriptor_table dtable; 3378 struct descriptor_table dtable;
3303 u16 index = selector >> 3; 3379 u16 index = selector >> 3;
3304 3380
3305 get_segment_descritptor_dtable(vcpu, selector, &dtable); 3381 get_segment_descriptor_dtable(vcpu, selector, &dtable);
3306 3382
3307 if (dtable.limit < index * 8 + 7) { 3383 if (dtable.limit < index * 8 + 7) {
3308 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 3384 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@@ -3321,7 +3397,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3321 struct descriptor_table dtable; 3397 struct descriptor_table dtable;
3322 u16 index = selector >> 3; 3398 u16 index = selector >> 3;
3323 3399
3324 get_segment_descritptor_dtable(vcpu, selector, &dtable); 3400 get_segment_descriptor_dtable(vcpu, selector, &dtable);
3325 3401
3326 if (dtable.limit < index * 8 + 7) 3402 if (dtable.limit < index * 8 + 7)
3327 return 1; 3403 return 1;
@@ -3900,6 +3976,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
3900 /* We do fxsave: this must be aligned. */ 3976 /* We do fxsave: this must be aligned. */
3901 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); 3977 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
3902 3978
3979 vcpu->arch.mtrr_state.have_fixed = 1;
3903 vcpu_load(vcpu); 3980 vcpu_load(vcpu);
3904 r = kvm_arch_vcpu_reset(vcpu); 3981 r = kvm_arch_vcpu_reset(vcpu);
3905 if (r == 0) 3982 if (r == 0)
@@ -3925,6 +4002,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3925 4002
3926int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) 4003int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
3927{ 4004{
4005 vcpu->arch.nmi_pending = false;
4006 vcpu->arch.nmi_injected = false;
4007
3928 return kvm_x86_ops->vcpu_reset(vcpu); 4008 return kvm_x86_ops->vcpu_reset(vcpu);
3929} 4009}
3930 4010
@@ -4012,6 +4092,7 @@ struct kvm *kvm_arch_create_vm(void)
4012 return ERR_PTR(-ENOMEM); 4092 return ERR_PTR(-ENOMEM);
4013 4093
4014 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4094 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
4095 INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
4015 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 4096 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
4016 4097
4017 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 4098 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -4048,8 +4129,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
4048 4129
4049void kvm_arch_destroy_vm(struct kvm *kvm) 4130void kvm_arch_destroy_vm(struct kvm *kvm)
4050{ 4131{
4051 kvm_iommu_unmap_guest(kvm);
4052 kvm_free_all_assigned_devices(kvm); 4132 kvm_free_all_assigned_devices(kvm);
4133 kvm_iommu_unmap_guest(kvm);
4053 kvm_free_pit(kvm); 4134 kvm_free_pit(kvm);
4054 kfree(kvm->arch.vpic); 4135 kfree(kvm->arch.vpic);
4055 kfree(kvm->arch.vioapic); 4136 kfree(kvm->arch.vioapic);
@@ -4127,7 +4208,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
4127int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 4208int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4128{ 4209{
4129 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 4210 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
4130 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; 4211 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
4212 || vcpu->arch.nmi_pending;
4131} 4213}
4132 4214
4133static void vcpu_kick_intr(void *info) 4215static void vcpu_kick_intr(void *info)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ea051173b0da..d174db7a3370 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -58,6 +58,7 @@
58#define SrcMem32 (4<<4) /* Memory operand (32-bit). */ 58#define SrcMem32 (4<<4) /* Memory operand (32-bit). */
59#define SrcImm (5<<4) /* Immediate operand. */ 59#define SrcImm (5<<4) /* Immediate operand. */
60#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ 60#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
61#define SrcOne (7<<4) /* Implied '1' */
61#define SrcMask (7<<4) 62#define SrcMask (7<<4)
62/* Generic ModRM decode. */ 63/* Generic ModRM decode. */
63#define ModRM (1<<7) 64#define ModRM (1<<7)
@@ -70,17 +71,23 @@
70#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ 71#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
71#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ 72#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
72#define GroupMask 0xff /* Group number stored in bits 0:7 */ 73#define GroupMask 0xff /* Group number stored in bits 0:7 */
74/* Source 2 operand type */
75#define Src2None (0<<29)
76#define Src2CL (1<<29)
77#define Src2ImmByte (2<<29)
78#define Src2One (3<<29)
79#define Src2Mask (7<<29)
73 80
74enum { 81enum {
75 Group1_80, Group1_81, Group1_82, Group1_83, 82 Group1_80, Group1_81, Group1_82, Group1_83,
76 Group1A, Group3_Byte, Group3, Group4, Group5, Group7, 83 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
77}; 84};
78 85
79static u16 opcode_table[256] = { 86static u32 opcode_table[256] = {
80 /* 0x00 - 0x07 */ 87 /* 0x00 - 0x07 */
81 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 88 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
82 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 89 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
83 0, 0, 0, 0, 90 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
84 /* 0x08 - 0x0F */ 91 /* 0x08 - 0x0F */
85 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 92 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
86 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 93 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -195,7 +202,7 @@ static u16 opcode_table[256] = {
195 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, 202 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
196}; 203};
197 204
198static u16 twobyte_table[256] = { 205static u32 twobyte_table[256] = {
199 /* 0x00 - 0x0F */ 206 /* 0x00 - 0x0F */
200 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, 207 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
201 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 208 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -230,9 +237,14 @@ static u16 twobyte_table[256] = {
230 /* 0x90 - 0x9F */ 237 /* 0x90 - 0x9F */
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 238 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 /* 0xA0 - 0xA7 */ 239 /* 0xA0 - 0xA7 */
233 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, 240 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
241 DstMem | SrcReg | Src2ImmByte | ModRM,
242 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
234 /* 0xA8 - 0xAF */ 243 /* 0xA8 - 0xAF */
235 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, 244 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
245 DstMem | SrcReg | Src2ImmByte | ModRM,
246 DstMem | SrcReg | Src2CL | ModRM,
247 ModRM, 0,
236 /* 0xB0 - 0xB7 */ 248 /* 0xB0 - 0xB7 */
237 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, 249 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
238 DstMem | SrcReg | ModRM | BitOp, 250 DstMem | SrcReg | ModRM | BitOp,
@@ -253,7 +265,7 @@ static u16 twobyte_table[256] = {
253 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
254}; 266};
255 267
256static u16 group_table[] = { 268static u32 group_table[] = {
257 [Group1_80*8] = 269 [Group1_80*8] =
258 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 270 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
259 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 271 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@@ -297,9 +309,9 @@ static u16 group_table[] = {
297 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, 309 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
298}; 310};
299 311
300static u16 group2_table[] = { 312static u32 group2_table[] = {
301 [Group7*8] = 313 [Group7*8] =
302 SrcNone | ModRM, 0, 0, 0, 314 SrcNone | ModRM, 0, 0, SrcNone | ModRM,
303 SrcNone | ModRM | DstMem | Mov, 0, 315 SrcNone | ModRM | DstMem | Mov, 0,
304 SrcMem16 | ModRM | Mov, 0, 316 SrcMem16 | ModRM | Mov, 0,
305}; 317};
@@ -359,49 +371,48 @@ static u16 group2_table[] = {
359 "andl %"_msk",%"_LO32 _tmp"; " \ 371 "andl %"_msk",%"_LO32 _tmp"; " \
360 "orl %"_LO32 _tmp",%"_sav"; " 372 "orl %"_LO32 _tmp",%"_sav"; "
361 373
374#ifdef CONFIG_X86_64
375#define ON64(x) x
376#else
377#define ON64(x)
378#endif
379
380#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
381 do { \
382 __asm__ __volatile__ ( \
383 _PRE_EFLAGS("0", "4", "2") \
384 _op _suffix " %"_x"3,%1; " \
385 _POST_EFLAGS("0", "4", "2") \
386 : "=m" (_eflags), "=m" ((_dst).val), \
387 "=&r" (_tmp) \
388 : _y ((_src).val), "i" (EFLAGS_MASK)); \
389 } while (0)
390
391
362/* Raw emulation: instruction has two explicit operands. */ 392/* Raw emulation: instruction has two explicit operands. */
363#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ 393#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
364 do { \ 394 do { \
365 unsigned long _tmp; \ 395 unsigned long _tmp; \
366 \ 396 \
367 switch ((_dst).bytes) { \ 397 switch ((_dst).bytes) { \
368 case 2: \ 398 case 2: \
369 __asm__ __volatile__ ( \ 399 ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
370 _PRE_EFLAGS("0", "4", "2") \ 400 break; \
371 _op"w %"_wx"3,%1; " \ 401 case 4: \
372 _POST_EFLAGS("0", "4", "2") \ 402 ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
373 : "=m" (_eflags), "=m" ((_dst).val), \ 403 break; \
374 "=&r" (_tmp) \ 404 case 8: \
375 : _wy ((_src).val), "i" (EFLAGS_MASK)); \ 405 ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
376 break; \ 406 break; \
377 case 4: \ 407 } \
378 __asm__ __volatile__ ( \
379 _PRE_EFLAGS("0", "4", "2") \
380 _op"l %"_lx"3,%1; " \
381 _POST_EFLAGS("0", "4", "2") \
382 : "=m" (_eflags), "=m" ((_dst).val), \
383 "=&r" (_tmp) \
384 : _ly ((_src).val), "i" (EFLAGS_MASK)); \
385 break; \
386 case 8: \
387 __emulate_2op_8byte(_op, _src, _dst, \
388 _eflags, _qx, _qy); \
389 break; \
390 } \
391 } while (0) 408 } while (0)
392 409
393#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ 410#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
394 do { \ 411 do { \
395 unsigned long __tmp; \ 412 unsigned long _tmp; \
396 switch ((_dst).bytes) { \ 413 switch ((_dst).bytes) { \
397 case 1: \ 414 case 1: \
398 __asm__ __volatile__ ( \ 415 ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
399 _PRE_EFLAGS("0", "4", "2") \
400 _op"b %"_bx"3,%1; " \
401 _POST_EFLAGS("0", "4", "2") \
402 : "=m" (_eflags), "=m" ((_dst).val), \
403 "=&r" (__tmp) \
404 : _by ((_src).val), "i" (EFLAGS_MASK)); \
405 break; \ 416 break; \
406 default: \ 417 default: \
407 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ 418 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
@@ -425,71 +436,68 @@ static u16 group2_table[] = {
425 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ 436 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
426 "w", "r", _LO32, "r", "", "r") 437 "w", "r", _LO32, "r", "", "r")
427 438
428/* Instruction has only one explicit operand (no source operand). */ 439/* Instruction has three operands and one operand is stored in ECX register */
429#define emulate_1op(_op, _dst, _eflags) \ 440#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
430 do { \ 441 do { \
431 unsigned long _tmp; \ 442 unsigned long _tmp; \
432 \ 443 _type _clv = (_cl).val; \
433 switch ((_dst).bytes) { \ 444 _type _srcv = (_src).val; \
434 case 1: \ 445 _type _dstv = (_dst).val; \
435 __asm__ __volatile__ ( \ 446 \
436 _PRE_EFLAGS("0", "3", "2") \ 447 __asm__ __volatile__ ( \
437 _op"b %1; " \ 448 _PRE_EFLAGS("0", "5", "2") \
438 _POST_EFLAGS("0", "3", "2") \ 449 _op _suffix " %4,%1 \n" \
439 : "=m" (_eflags), "=m" ((_dst).val), \ 450 _POST_EFLAGS("0", "5", "2") \
440 "=&r" (_tmp) \ 451 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
441 : "i" (EFLAGS_MASK)); \ 452 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
442 break; \ 453 ); \
443 case 2: \ 454 \
444 __asm__ __volatile__ ( \ 455 (_cl).val = (unsigned long) _clv; \
445 _PRE_EFLAGS("0", "3", "2") \ 456 (_src).val = (unsigned long) _srcv; \
446 _op"w %1; " \ 457 (_dst).val = (unsigned long) _dstv; \
447 _POST_EFLAGS("0", "3", "2") \
448 : "=m" (_eflags), "=m" ((_dst).val), \
449 "=&r" (_tmp) \
450 : "i" (EFLAGS_MASK)); \
451 break; \
452 case 4: \
453 __asm__ __volatile__ ( \
454 _PRE_EFLAGS("0", "3", "2") \
455 _op"l %1; " \
456 _POST_EFLAGS("0", "3", "2") \
457 : "=m" (_eflags), "=m" ((_dst).val), \
458 "=&r" (_tmp) \
459 : "i" (EFLAGS_MASK)); \
460 break; \
461 case 8: \
462 __emulate_1op_8byte(_op, _dst, _eflags); \
463 break; \
464 } \
465 } while (0) 458 } while (0)
466 459
467/* Emulate an instruction with quadword operands (x86/64 only). */ 460#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
468#if defined(CONFIG_X86_64) 461 do { \
469#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ 462 switch ((_dst).bytes) { \
470 do { \ 463 case 2: \
471 __asm__ __volatile__ ( \ 464 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
472 _PRE_EFLAGS("0", "4", "2") \ 465 "w", unsigned short); \
473 _op"q %"_qx"3,%1; " \ 466 break; \
474 _POST_EFLAGS("0", "4", "2") \ 467 case 4: \
475 : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ 468 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
476 : _qy ((_src).val), "i" (EFLAGS_MASK)); \ 469 "l", unsigned int); \
470 break; \
471 case 8: \
472 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
473 "q", unsigned long)); \
474 break; \
475 } \
477 } while (0) 476 } while (0)
478 477
479#define __emulate_1op_8byte(_op, _dst, _eflags) \ 478#define __emulate_1op(_op, _dst, _eflags, _suffix) \
480 do { \ 479 do { \
481 __asm__ __volatile__ ( \ 480 unsigned long _tmp; \
482 _PRE_EFLAGS("0", "3", "2") \ 481 \
483 _op"q %1; " \ 482 __asm__ __volatile__ ( \
484 _POST_EFLAGS("0", "3", "2") \ 483 _PRE_EFLAGS("0", "3", "2") \
485 : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ 484 _op _suffix " %1; " \
486 : "i" (EFLAGS_MASK)); \ 485 _POST_EFLAGS("0", "3", "2") \
486 : "=m" (_eflags), "+m" ((_dst).val), \
487 "=&r" (_tmp) \
488 : "i" (EFLAGS_MASK)); \
487 } while (0) 489 } while (0)
488 490
489#elif defined(__i386__) 491/* Instruction has only one explicit operand (no source operand). */
490#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) 492#define emulate_1op(_op, _dst, _eflags) \
491#define __emulate_1op_8byte(_op, _dst, _eflags) 493 do { \
492#endif /* __i386__ */ 494 switch ((_dst).bytes) { \
495 case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
496 case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
497 case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
498 case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
499 } \
500 } while (0)
493 501
494/* Fetch next part of the instruction being emulated. */ 502/* Fetch next part of the instruction being emulated. */
495#define insn_fetch(_type, _size, _eip) \ 503#define insn_fetch(_type, _size, _eip) \
@@ -1041,6 +1049,33 @@ done_prefixes:
1041 c->src.bytes = 1; 1049 c->src.bytes = 1;
1042 c->src.val = insn_fetch(s8, 1, c->eip); 1050 c->src.val = insn_fetch(s8, 1, c->eip);
1043 break; 1051 break;
1052 case SrcOne:
1053 c->src.bytes = 1;
1054 c->src.val = 1;
1055 break;
1056 }
1057
1058 /*
1059 * Decode and fetch the second source operand: register, memory
1060 * or immediate.
1061 */
1062 switch (c->d & Src2Mask) {
1063 case Src2None:
1064 break;
1065 case Src2CL:
1066 c->src2.bytes = 1;
1067 c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1068 break;
1069 case Src2ImmByte:
1070 c->src2.type = OP_IMM;
1071 c->src2.ptr = (unsigned long *)c->eip;
1072 c->src2.bytes = 1;
1073 c->src2.val = insn_fetch(u8, 1, c->eip);
1074 break;
1075 case Src2One:
1076 c->src2.bytes = 1;
1077 c->src2.val = 1;
1078 break;
1044 } 1079 }
1045 1080
1046 /* Decode and fetch the destination operand: register or memory. */ 1081 /* Decode and fetch the destination operand: register or memory. */
@@ -1100,20 +1135,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1100 c->regs[VCPU_REGS_RSP]); 1135 c->regs[VCPU_REGS_RSP]);
1101} 1136}
1102 1137
1103static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, 1138static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1104 struct x86_emulate_ops *ops) 1139 struct x86_emulate_ops *ops)
1105{ 1140{
1106 struct decode_cache *c = &ctxt->decode; 1141 struct decode_cache *c = &ctxt->decode;
1107 int rc; 1142 int rc;
1108 1143
1109 rc = ops->read_std(register_address(c, ss_base(ctxt), 1144 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1110 c->regs[VCPU_REGS_RSP]), 1145 c->regs[VCPU_REGS_RSP]),
1111 &c->dst.val, c->dst.bytes, ctxt->vcpu); 1146 &c->src.val, c->src.bytes, ctxt->vcpu);
1112 if (rc != 0) 1147 if (rc != 0)
1113 return rc; 1148 return rc;
1114 1149
1115 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); 1150 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
1151 return rc;
1152}
1153
1154static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1155 struct x86_emulate_ops *ops)
1156{
1157 struct decode_cache *c = &ctxt->decode;
1158 int rc;
1116 1159
1160 c->src.bytes = c->dst.bytes;
1161 rc = emulate_pop(ctxt, ops);
1162 if (rc != 0)
1163 return rc;
1164 c->dst.val = c->src.val;
1117 return 0; 1165 return 0;
1118} 1166}
1119 1167
@@ -1415,24 +1463,15 @@ special_insn:
1415 emulate_1op("dec", c->dst, ctxt->eflags); 1463 emulate_1op("dec", c->dst, ctxt->eflags);
1416 break; 1464 break;
1417 case 0x50 ... 0x57: /* push reg */ 1465 case 0x50 ... 0x57: /* push reg */
1418 c->dst.type = OP_MEM; 1466 emulate_push(ctxt);
1419 c->dst.bytes = c->op_bytes;
1420 c->dst.val = c->src.val;
1421 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1422 -c->op_bytes);
1423 c->dst.ptr = (void *) register_address(
1424 c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
1425 break; 1467 break;
1426 case 0x58 ... 0x5f: /* pop reg */ 1468 case 0x58 ... 0x5f: /* pop reg */
1427 pop_instruction: 1469 pop_instruction:
1428 if ((rc = ops->read_std(register_address(c, ss_base(ctxt), 1470 c->src.bytes = c->op_bytes;
1429 c->regs[VCPU_REGS_RSP]), c->dst.ptr, 1471 rc = emulate_pop(ctxt, ops);
1430 c->op_bytes, ctxt->vcpu)) != 0) 1472 if (rc != 0)
1431 goto done; 1473 goto done;
1432 1474 c->dst.val = c->src.val;
1433 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1434 c->op_bytes);
1435 c->dst.type = OP_NONE; /* Disable writeback. */
1436 break; 1475 break;
1437 case 0x63: /* movsxd */ 1476 case 0x63: /* movsxd */
1438 if (ctxt->mode != X86EMUL_MODE_PROT64) 1477 if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1591,7 +1630,9 @@ special_insn:
1591 emulate_push(ctxt); 1630 emulate_push(ctxt);
1592 break; 1631 break;
1593 case 0x9d: /* popf */ 1632 case 0x9d: /* popf */
1633 c->dst.type = OP_REG;
1594 c->dst.ptr = (unsigned long *) &ctxt->eflags; 1634 c->dst.ptr = (unsigned long *) &ctxt->eflags;
1635 c->dst.bytes = c->op_bytes;
1595 goto pop_instruction; 1636 goto pop_instruction;
1596 case 0xa0 ... 0xa1: /* mov */ 1637 case 0xa0 ... 0xa1: /* mov */
1597 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 1638 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
@@ -1689,7 +1730,9 @@ special_insn:
1689 emulate_grp2(ctxt); 1730 emulate_grp2(ctxt);
1690 break; 1731 break;
1691 case 0xc3: /* ret */ 1732 case 0xc3: /* ret */
1733 c->dst.type = OP_REG;
1692 c->dst.ptr = &c->eip; 1734 c->dst.ptr = &c->eip;
1735 c->dst.bytes = c->op_bytes;
1693 goto pop_instruction; 1736 goto pop_instruction;
1694 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ 1737 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
1695 mov: 1738 mov:
@@ -1778,7 +1821,7 @@ special_insn:
1778 c->eip = saved_eip; 1821 c->eip = saved_eip;
1779 goto cannot_emulate; 1822 goto cannot_emulate;
1780 } 1823 }
1781 return 0; 1824 break;
1782 case 0xf4: /* hlt */ 1825 case 0xf4: /* hlt */
1783 ctxt->vcpu->arch.halt_request = 1; 1826 ctxt->vcpu->arch.halt_request = 1;
1784 break; 1827 break;
@@ -1999,12 +2042,20 @@ twobyte_insn:
1999 c->src.val &= (c->dst.bytes << 3) - 1; 2042 c->src.val &= (c->dst.bytes << 3) - 1;
2000 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); 2043 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
2001 break; 2044 break;
2045 case 0xa4: /* shld imm8, r, r/m */
2046 case 0xa5: /* shld cl, r, r/m */
2047 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
2048 break;
2002 case 0xab: 2049 case 0xab:
2003 bts: /* bts */ 2050 bts: /* bts */
2004 /* only subword offset */ 2051 /* only subword offset */
2005 c->src.val &= (c->dst.bytes << 3) - 1; 2052 c->src.val &= (c->dst.bytes << 3) - 1;
2006 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); 2053 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
2007 break; 2054 break;
2055 case 0xac: /* shrd imm8, r, r/m */
2056 case 0xad: /* shrd cl, r, r/m */
2057 emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
2058 break;
2008 case 0xae: /* clflush */ 2059 case 0xae: /* clflush */
2009 break; 2060 break;
2010 case 0xb0 ... 0xb1: /* cmpxchg */ 2061 case 0xb0 ... 0xb1: /* cmpxchg */
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 37b9ae4d44c5..df167f265622 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -133,29 +133,28 @@ void __init time_init_hook(void)
133 **/ 133 **/
134void mca_nmi_hook(void) 134void mca_nmi_hook(void)
135{ 135{
136 /* If I recall correctly, there's a whole bunch of other things that 136 /*
137 * If I recall correctly, there's a whole bunch of other things that
137 * we can do to check for NMI problems, but that's all I know about 138 * we can do to check for NMI problems, but that's all I know about
138 * at the moment. 139 * at the moment.
139 */ 140 */
140 141 pr_warning("NMI generated from unknown source!\n");
141 printk("NMI generated from unknown source!\n");
142} 142}
143#endif 143#endif
144 144
145static __init int no_ipi_broadcast(char *str) 145static __init int no_ipi_broadcast(char *str)
146{ 146{
147 get_option(&str, &no_broadcast); 147 get_option(&str, &no_broadcast);
148 printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : 148 pr_info("Using %s mode\n",
149 "IPI Broadcast"); 149 no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
150 return 1; 150 return 1;
151} 151}
152
153__setup("no_ipi_broadcast=", no_ipi_broadcast); 152__setup("no_ipi_broadcast=", no_ipi_broadcast);
154 153
155static int __init print_ipi_mode(void) 154static int __init print_ipi_mode(void)
156{ 155{
157 printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : 156 pr_info("Using IPI %s mode\n",
158 "Shortcut"); 157 no_broadcast ? "No-Shortcut" : "Shortcut");
159 return 0; 158 return 0;
160} 159}
161 160
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8655b5bb0963..f99a6c6c432e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -435,8 +435,12 @@ static void __init set_highmem_pages_init(void)
435#endif /* !CONFIG_NUMA */ 435#endif /* !CONFIG_NUMA */
436 436
437#else 437#else
438# define permanent_kmaps_init(pgd_base) do { } while (0) 438static inline void permanent_kmaps_init(pgd_t *pgd_base)
439# define set_highmem_pages_init() do { } while (0) 439{
440}
441static inline void set_highmem_pages_init(void)
442{
443}
440#endif /* CONFIG_HIGHMEM */ 444#endif /* CONFIG_HIGHMEM */
441 445
442void __init native_pagetable_setup_start(pgd_t *base) 446void __init native_pagetable_setup_start(pgd_t *base)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 1d88d2b39771..9e5752fe4d15 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -4,7 +4,7 @@
4#include <linux/irq.h> 4#include <linux/irq.h>
5#include <linux/dmi.h> 5#include <linux/dmi.h>
6#include <asm/numa.h> 6#include <asm/numa.h>
7#include "pci.h" 7#include <asm/pci_x86.h>
8 8
9struct pci_root_info { 9struct pci_root_info {
10 char *name; 10 char *name;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 22e057665e55..9bb09823b362 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,7 +2,7 @@
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <linux/topology.h> 3#include <linux/topology.h>
4#include <linux/cpu.h> 4#include <linux/cpu.h>
5#include "pci.h" 5#include <asm/pci_x86.h>
6 6
7#ifdef CONFIG_X86_64 7#ifdef CONFIG_X86_64
8#include <asm/pci-direct.h> 8#include <asm/pci-direct.h>
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index bb1a01f089e2..62ddb73e09ed 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -14,8 +14,7 @@
14#include <asm/segment.h> 14#include <asm/segment.h>
15#include <asm/io.h> 15#include <asm/io.h>
16#include <asm/smp.h> 16#include <asm/smp.h>
17 17#include <asm/pci_x86.h>
18#include "pci.h"
19 18
20unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | 19unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
21 PCI_PROBE_MMCONF; 20 PCI_PROBE_MMCONF;
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9a5af6c8fbe9..bd13c3e4c6db 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -5,7 +5,7 @@
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/dmi.h> 7#include <linux/dmi.h>
8#include "pci.h" 8#include <asm/pci_x86.h>
9 9
10/* 10/*
11 * Functions for accessing PCI base (first 256 bytes) and extended 11 * Functions for accessing PCI base (first 256 bytes) and extended
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 86631ccbc25a..f6adf2c6d751 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -2,7 +2,7 @@
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <asm/pci-direct.h> 3#include <asm/pci-direct.h>
4#include <asm/io.h> 4#include <asm/io.h>
5#include "pci.h" 5#include <asm/pci_x86.h>
6 6
7/* Direct PCI access. This is used for PCI accesses in early boot before 7/* Direct PCI access. This is used for PCI accesses in early boot before
8 the PCI subsystem works. */ 8 the PCI subsystem works. */
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 2051dc96b8e9..7d388d5cf548 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -6,8 +6,7 @@
6#include <linux/dmi.h> 6#include <linux/dmi.h>
7#include <linux/pci.h> 7#include <linux/pci.h>
8#include <linux/init.h> 8#include <linux/init.h>
9#include "pci.h" 9#include <asm/pci_x86.h>
10
11 10
12static void __devinit pci_fixup_i450nx(struct pci_dev *d) 11static void __devinit pci_fixup_i450nx(struct pci_dev *d)
13{ 12{
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 844df0cbbd3e..e51bf2cda4b0 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -34,8 +34,8 @@
34 34
35#include <asm/pat.h> 35#include <asm/pat.h>
36#include <asm/e820.h> 36#include <asm/e820.h>
37#include <asm/pci_x86.h>
37 38
38#include "pci.h"
39 39
40static int 40static int
41skip_isa_ioresource_align(struct pci_dev *dev) { 41skip_isa_ioresource_align(struct pci_dev *dev) {
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index d6c950f81858..bec3b048e72b 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -1,6 +1,6 @@
1#include <linux/pci.h> 1#include <linux/pci.h>
2#include <linux/init.h> 2#include <linux/init.h>
3#include "pci.h" 3#include <asm/pci_x86.h>
4 4
5/* arch_initcall has too random ordering, so call the initializers 5/* arch_initcall has too random ordering, so call the initializers
6 in the right sequence from here. */ 6 in the right sequence from here. */
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index bf69dbe08bff..373b9afe6d44 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -16,8 +16,7 @@
16#include <asm/io_apic.h> 16#include <asm/io_apic.h>
17#include <linux/irq.h> 17#include <linux/irq.h>
18#include <linux/acpi.h> 18#include <linux/acpi.h>
19 19#include <asm/pci_x86.h>
20#include "pci.h"
21 20
22#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) 21#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
23#define PIRQ_VERSION 0x0100 22#define PIRQ_VERSION 0x0100
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index b722dd481b39..f1065b129e9c 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -3,7 +3,7 @@
3 */ 3 */
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include "pci.h" 6#include <asm/pci_x86.h>
7 7
8/* 8/*
9 * Discover remaining PCI buses in case there are peer host bridges. 9 * Discover remaining PCI buses in case there are peer host bridges.
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 654a2234f8f3..89bf9242c80a 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -15,8 +15,7 @@
15#include <linux/acpi.h> 15#include <linux/acpi.h>
16#include <linux/bitmap.h> 16#include <linux/bitmap.h>
17#include <asm/e820.h> 17#include <asm/e820.h>
18 18#include <asm/pci_x86.h>
19#include "pci.h"
20 19
21/* aperture is up to 256MB but BIOS may reserve less */ 20/* aperture is up to 256MB but BIOS may reserve less */
22#define MMCONFIG_APER_MIN (2 * 1024*1024) 21#define MMCONFIG_APER_MIN (2 * 1024*1024)
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index f3c761dce695..8b2d561046a3 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -13,7 +13,7 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/acpi.h> 14#include <linux/acpi.h>
15#include <asm/e820.h> 15#include <asm/e820.h>
16#include "pci.h" 16#include <asm/pci_x86.h>
17 17
18/* Assume systems with more busses have correct MCFG */ 18/* Assume systems with more busses have correct MCFG */
19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) 19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index a1994163c99d..30007ffc8e11 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -10,8 +10,7 @@
10#include <linux/acpi.h> 10#include <linux/acpi.h>
11#include <linux/bitmap.h> 11#include <linux/bitmap.h>
12#include <asm/e820.h> 12#include <asm/e820.h>
13 13#include <asm/pci_x86.h>
14#include "pci.h"
15 14
16/* Static virtual mapping of the MMCONFIG aperture */ 15/* Static virtual mapping of the MMCONFIG aperture */
17struct mmcfg_virt { 16struct mmcfg_virt {
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 1177845d3186..2089354968a2 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -7,7 +7,7 @@
7#include <linux/nodemask.h> 7#include <linux/nodemask.h>
8#include <mach_apic.h> 8#include <mach_apic.h>
9#include <asm/mpspec.h> 9#include <asm/mpspec.h>
10#include "pci.h" 10#include <asm/pci_x86.h>
11 11
12#define XQUAD_PORTIO_BASE 0xfe400000 12#define XQUAD_PORTIO_BASE 0xfe400000
13#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ 13#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index e11e9e803d5f..b889d824f7c6 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -29,7 +29,7 @@
29#include <linux/init.h> 29#include <linux/init.h>
30#include <asm/olpc.h> 30#include <asm/olpc.h>
31#include <asm/geode.h> 31#include <asm/geode.h>
32#include "pci.h" 32#include <asm/pci_x86.h>
33 33
34/* 34/*
35 * In the tables below, the first two line (8 longwords) are the 35 * In the tables below, the first two line (8 longwords) are the
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 37472fc6f729..b82cae970dfd 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -6,9 +6,8 @@
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/uaccess.h> 8#include <linux/uaccess.h>
9#include "pci.h" 9#include <asm/pci_x86.h>
10#include "pci-functions.h" 10#include <asm/mach-default/pci-functions.h>
11
12 11
13/* BIOS32 signature: "_32_" */ 12/* BIOS32 signature: "_32_" */
14#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) 13#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 42f4cb19faca..16d0c0eb0d19 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -9,11 +9,10 @@
9#include <linux/init.h> 9#include <linux/init.h>
10 10
11#include <asm/setup.h> 11#include <asm/setup.h>
12#include <asm/pci_x86.h>
12#include <asm/visws/cobalt.h> 13#include <asm/visws/cobalt.h>
13#include <asm/visws/lithium.h> 14#include <asm/visws/lithium.h>
14 15
15#include "pci.h"
16
17static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } 16static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
18static void pci_visws_disable_irq(struct pci_dev *dev) { } 17static void pci_visws_disable_irq(struct pci_dev *dev) { }
19 18
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 65d75a6be0ba..14f240623497 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -132,8 +132,7 @@ static void do_stolen_accounting(void)
132 *snap = state; 132 *snap = state;
133 133
134 /* Add the appropriate number of ticks of stolen time, 134 /* Add the appropriate number of ticks of stolen time,
135 including any left-overs from last time. Passing NULL to 135 including any left-overs from last time. */
136 account_steal_time accounts the time as stolen. */
137 stolen = runnable + offline + __get_cpu_var(residual_stolen); 136 stolen = runnable + offline + __get_cpu_var(residual_stolen);
138 137
139 if (stolen < 0) 138 if (stolen < 0)
@@ -141,11 +140,10 @@ static void do_stolen_accounting(void)
141 140
142 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); 141 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
143 __get_cpu_var(residual_stolen) = stolen; 142 __get_cpu_var(residual_stolen) = stolen;
144 account_steal_time(NULL, ticks); 143 account_steal_ticks(ticks);
145 144
146 /* Add the appropriate number of ticks of blocked time, 145 /* Add the appropriate number of ticks of blocked time,
147 including any left-overs from last time. Passing idle to 146 including any left-overs from last time. */
148 account_steal_time accounts the time as idle/wait. */
149 blocked += __get_cpu_var(residual_blocked); 147 blocked += __get_cpu_var(residual_blocked);
150 148
151 if (blocked < 0) 149 if (blocked < 0)
@@ -153,7 +151,7 @@ static void do_stolen_accounting(void)
153 151
154 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); 152 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
155 __get_cpu_var(residual_blocked) = blocked; 153 __get_cpu_var(residual_blocked) = blocked;
156 account_steal_time(idle_task(smp_processor_id()), ticks); 154 account_idle_ticks(ticks);
157} 155}
158 156
159/* 157/*
diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c
index 3df469dbe814..e07f5c9fcd35 100644
--- a/arch/xtensa/kernel/init_task.c
+++ b/arch/xtensa/kernel/init_task.c
@@ -21,7 +21,6 @@
21 21
22#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23 23
24static struct fs_struct init_fs = INIT_FS;
25static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 24static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
26static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 25static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
27struct mm_struct init_mm = INIT_MM(init_mm); 26struct mm_struct init_mm = INIT_MM(init_mm);