aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/smp.h1
-rw-r--r--arch/alpha/kernel/Makefile2
-rw-r--r--arch/alpha/kernel/binfmt_loader.c51
-rw-r--r--arch/alpha/kernel/irq.c2
-rw-r--r--arch/alpha/kernel/process.c2
-rw-r--r--arch/alpha/kernel/smp.c7
-rw-r--r--arch/alpha/kernel/sys_dp264.c8
-rw-r--r--arch/alpha/kernel/sys_titan.c4
-rw-r--r--arch/arm/common/gic.c4
-rw-r--r--arch/arm/kernel/irq.c2
-rw-r--r--arch/arm/kernel/smp.c10
-rw-r--r--arch/arm/mach-at91/at91rm9200_time.c3
-rw-r--r--arch/arm/mach-at91/at91sam926x_time.c2
-rw-r--r--arch/arm/mach-davinci/time.c2
-rw-r--r--arch/arm/mach-imx/time.c2
-rw-r--r--arch/arm/mach-ixp4xx/common.c2
-rw-r--r--arch/arm/mach-msm/timer.c2
-rw-r--r--arch/arm/mach-ns9xxx/time-ns9360.c2
-rw-r--r--arch/arm/mach-omap1/time.c2
-rw-r--r--arch/arm/mach-omap1/timer32k.c2
-rw-r--r--arch/arm/mach-omap2/timer-gp.c2
-rw-r--r--arch/arm/mach-pxa/time.c2
-rw-r--r--arch/arm/mach-realview/core.c2
-rw-r--r--arch/arm/mach-realview/localtimer.c4
-rw-r--r--arch/arm/mach-sa1100/time.c2
-rw-r--r--arch/arm/mach-versatile/core.c2
-rw-r--r--arch/arm/oprofile/op_model_mpcore.c4
-rw-r--r--arch/arm/plat-mxc/time.c2
-rw-r--r--arch/arm/plat-orion/time.c2
-rw-r--r--arch/avr32/kernel/time.c2
-rw-r--r--arch/blackfin/kernel/time-ts.c2
-rw-r--r--arch/cris/arch-v32/kernel/irq.c4
-rw-r--r--arch/cris/arch-v32/kernel/smp.c4
-rw-r--r--arch/cris/include/asm/smp.h1
-rw-r--r--arch/ia64/hp/sim/hpsim_irq.c2
-rw-r--r--arch/ia64/include/asm/kvm.h6
-rw-r--r--arch/ia64/include/asm/kvm_host.h196
-rw-r--r--arch/ia64/include/asm/smp.h1
-rw-r--r--arch/ia64/include/asm/topology.h2
-rw-r--r--arch/ia64/kernel/iosapic.c12
-rw-r--r--arch/ia64/kernel/irq.c9
-rw-r--r--arch/ia64/kernel/msi_ia64.c12
-rw-r--r--arch/ia64/kernel/smpboot.c10
-rw-r--r--arch/ia64/kernel/time.c18
-rw-r--r--arch/ia64/kernel/topology.c2
-rw-r--r--arch/ia64/kvm/Makefile2
-rw-r--r--arch/ia64/kvm/asm-offsets.c11
-rw-r--r--arch/ia64/kvm/kvm-ia64.c107
-rw-r--r--arch/ia64/kvm/kvm_lib.c15
-rw-r--r--arch/ia64/kvm/kvm_minstate.h4
-rw-r--r--arch/ia64/kvm/misc.h3
-rw-r--r--arch/ia64/kvm/mmio.c38
-rw-r--r--arch/ia64/kvm/process.c29
-rw-r--r--arch/ia64/kvm/vcpu.c76
-rw-r--r--arch/ia64/kvm/vcpu.h5
-rw-r--r--arch/ia64/kvm/vmm.c29
-rw-r--r--arch/ia64/kvm/vmm_ivt.S1469
-rw-r--r--arch/ia64/kvm/vtlb.c4
-rw-r--r--arch/ia64/sn/kernel/irq.c6
-rw-r--r--arch/ia64/sn/kernel/msi_sn.c7
-rw-r--r--arch/m32r/Kconfig1
-rw-r--r--arch/m32r/kernel/smpboot.c6
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/m68knommu/platform/coldfire/pit.c2
-rw-r--r--arch/mips/include/asm/irq.h3
-rw-r--r--arch/mips/include/asm/mach-ip27/topology.h1
-rw-r--r--arch/mips/include/asm/smp.h3
-rw-r--r--arch/mips/jazz/irq.c2
-rw-r--r--arch/mips/kernel/cevt-bcm1480.c4
-rw-r--r--arch/mips/kernel/cevt-ds1287.c2
-rw-r--r--arch/mips/kernel/cevt-gt641xx.c2
-rw-r--r--arch/mips/kernel/cevt-r4k.c2
-rw-r--r--arch/mips/kernel/cevt-sb1250.c4
-rw-r--r--arch/mips/kernel/cevt-smtc.c2
-rw-r--r--arch/mips/kernel/cevt-txx9.c2
-rw-r--r--arch/mips/kernel/i8253.c2
-rw-r--r--arch/mips/kernel/irq-gic.c6
-rw-r--r--arch/mips/kernel/smp-cmp.c6
-rw-r--r--arch/mips/kernel/smp-mt.c2
-rw-r--r--arch/mips/kernel/smp.c7
-rw-r--r--arch/mips/kernel/smtc.c6
-rw-r--r--arch/mips/mti-malta/malta-smtc.c6
-rw-r--r--arch/mips/nxp/pnx8550/common/time.c1
-rw-r--r--arch/mips/pmc-sierra/yosemite/smp.c6
-rw-r--r--arch/mips/sgi-ip27/ip27-smp.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-timer.c2
-rw-r--r--arch/mips/sibyte/bcm1480/irq.c8
-rw-r--r--arch/mips/sibyte/bcm1480/smp.c8
-rw-r--r--arch/mips/sibyte/sb1250/irq.c8
-rw-r--r--arch/mips/sibyte/sb1250/smp.c8
-rw-r--r--arch/mips/sni/time.c2
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/kernel/irq.c6
-rw-r--r--arch/parisc/kernel/smp.c15
-rw-r--r--arch/powerpc/include/asm/disassemble.h80
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h61
-rw-r--r--arch/powerpc/include/asm/kvm_host.h116
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h83
-rw-r--r--arch/powerpc/include/asm/mmu-44x.h1
-rw-r--r--arch/powerpc/include/asm/topology.h1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c21
-rw-r--r--arch/powerpc/kernel/irq.c2
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/smp.c4
-rw-r--r--arch/powerpc/kernel/time.c20
-rw-r--r--arch/powerpc/kvm/44x.c228
-rw-r--r--arch/powerpc/kvm/44x_emulate.c371
-rw-r--r--arch/powerpc/kvm/44x_tlb.c463
-rw-r--r--arch/powerpc/kvm/44x_tlb.h26
-rw-r--r--arch/powerpc/kvm/Kconfig28
-rw-r--r--arch/powerpc/kvm/Makefile12
-rw-r--r--arch/powerpc/kvm/booke.c (renamed from arch/powerpc/kvm/booke_guest.c)418
-rw-r--r--arch/powerpc/kvm/booke.h60
-rw-r--r--arch/powerpc/kvm/booke_host.c83
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S72
-rw-r--r--arch/powerpc/kvm/emulate.c447
-rw-r--r--arch/powerpc/kvm/powerpc.c130
-rw-r--r--arch/powerpc/kvm/timing.c239
-rw-r--r--arch/powerpc/kvm/timing.h102
-rw-r--r--arch/powerpc/platforms/pseries/xics.c4
-rw-r--r--arch/powerpc/sysdev/mpic.c4
-rw-r--r--arch/powerpc/sysdev/mpic.h2
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/cpu.h7
-rw-r--r--arch/s390/include/asm/cputime.h42
-rw-r--r--arch/s390/include/asm/lowcore.h49
-rw-r--r--arch/s390/include/asm/system.h4
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/include/asm/timer.h16
-rw-r--r--arch/s390/include/asm/vdso.h15
-rw-r--r--arch/s390/kernel/asm-offsets.c5
-rw-r--r--arch/s390/kernel/entry.S5
-rw-r--r--arch/s390/kernel/entry64.S50
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/process.c43
-rw-r--r--arch/s390/kernel/s390_ext.c2
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/smp.c40
-rw-r--r--arch/s390/kernel/time.c2
-rw-r--r--arch/s390/kernel/vdso.c123
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S5
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S39
-rw-r--r--arch/s390/kernel/vtime.c486
-rw-r--r--arch/s390/kvm/kvm-s390.c41
-rw-r--r--arch/sh/include/asm/smp.h2
-rw-r--r--arch/sh/include/asm/topology.h1
-rw-r--r--arch/sh/kernel/smp.c10
-rw-r--r--arch/sh/kernel/timers/timer-broadcast.c2
-rw-r--r--arch/sh/kernel/timers/timer-tmu.c2
-rw-r--r--arch/sparc/include/asm/smp_32.h2
-rw-r--r--arch/sparc/kernel/irq_64.c11
-rw-r--r--arch/sparc/kernel/of_device_64.c2
-rw-r--r--arch/sparc/kernel/pci_msi.c2
-rw-r--r--arch/sparc/kernel/smp_32.c6
-rw-r--r--arch/sparc/kernel/smp_64.c4
-rw-r--r--arch/sparc/kernel/sparc_ksyms_32.c4
-rw-r--r--arch/sparc/kernel/time_64.c2
-rw-r--r--arch/um/kernel/smp.c7
-rw-r--r--arch/um/kernel/time.c2
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/ia32/ia32_signal.c3
-rw-r--r--arch/x86/ia32/ipc32.c1
-rw-r--r--arch/x86/ia32/sys_ia32.c2
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/bigsmp/apic.h32
-rw-r--r--arch/x86/include/asm/bigsmp/ipi.h13
-rw-r--r--arch/x86/include/asm/desc.h10
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/es7000/apic.h82
-rw-r--r--arch/x86/include/asm/es7000/ipi.h12
-rw-r--r--arch/x86/include/asm/genapic_32.h13
-rw-r--r--arch/x86/include/asm/genapic_64.h14
-rw-r--r--arch/x86/include/asm/ipi.h23
-rw-r--r--arch/x86/include/asm/irq.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h45
-rw-r--r--arch/x86/include/asm/kvm_x86_emulate.h11
-rw-r--r--arch/x86/include/asm/mach-default/mach_apic.h28
-rw-r--r--arch/x86/include/asm/mach-default/mach_ipi.h18
-rw-r--r--arch/x86/include/asm/mach-generic/mach_apic.h1
-rw-r--r--arch/x86/include/asm/mpspec.h2
-rw-r--r--arch/x86/include/asm/mtrr.h25
-rw-r--r--arch/x86/include/asm/numaq/apic.h12
-rw-r--r--arch/x86/include/asm/numaq/ipi.h13
-rw-r--r--arch/x86/include/asm/pci_x86.h (renamed from arch/x86/pci/pci.h)17
-rw-r--r--arch/x86/include/asm/smp.h6
-rw-r--r--arch/x86/include/asm/summit/apic.h55
-rw-r--r--arch/x86/include/asm/summit/ipi.h9
-rw-r--r--arch/x86/include/asm/svm.h (renamed from arch/x86/kvm/svm.h)0
-rw-r--r--arch/x86/include/asm/sys_ia32.h101
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h46
-rw-r--r--arch/x86/include/asm/virtext.h132
-rw-r--r--arch/x86/include/asm/vmx.h (renamed from arch/x86/kvm/vmx.h)27
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c4
-rw-r--r--arch/x86/kernel/apic.c42
-rw-r--r--arch/x86/kernel/bios_uv.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c45
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c108
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h18
-rw-r--r--arch/x86/kernel/cpuid.c6
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/genapic_flat_64.c107
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c81
-rw-r--r--arch/x86/kernel/genx2apic_phys.c78
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c61
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/io_apic.c372
-rw-r--r--arch/x86/kernel/ipi.c28
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irq_32.c13
-rw-r--r--arch/x86/kernel/irq_64.c15
-rw-r--r--arch/x86/kernel/irqinit_32.c16
-rw-r--r--arch/x86/kernel/irqinit_64.c13
-rw-r--r--arch/x86/kernel/kvmclock.c10
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/mfgpt_32.c2
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c3
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/reboot.c69
-rw-r--r--arch/x86/kernel/setup_percpu.c19
-rw-r--r--arch/x86/kernel/smp.c8
-rw-r--r--arch/x86/kernel/smpboot.c33
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/traps.c45
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/i8254.c19
-rw-r--r--arch/x86/kvm/i8259.c52
-rw-r--r--arch/x86/kvm/irq.h6
-rw-r--r--arch/x86/kvm/kvm_svm.h2
-rw-r--r--arch/x86/kvm/lapic.c58
-rw-r--r--arch/x86/kvm/mmu.c444
-rw-r--r--arch/x86/kvm/paging_tmpl.h44
-rw-r--r--arch/x86/kvm/svm.c48
-rw-r--r--arch/x86/kvm/vmx.c350
-rw-r--r--arch/x86/kvm/x86.c117
-rw-r--r--arch/x86/kvm/x86_emulate.c297
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/mach-default/setup.c15
-rw-r--r--arch/x86/mach-generic/bigsmp.c5
-rw-r--r--arch/x86/mach-generic/es7000.c5
-rw-r--r--arch/x86/mach-generic/numaq.c5
-rw-r--r--arch/x86/mach-generic/summit.c5
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c9
-rw-r--r--arch/x86/mm/init_32.c8
-rw-r--r--arch/x86/mm/numa_64.c4
-rw-r--r--arch/x86/mm/srat_64.c2
-rw-r--r--arch/x86/pci/acpi.c2
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/common.c3
-rw-r--r--arch/x86/pci/direct.c2
-rw-r--r--arch/x86/pci/early.c2
-rw-r--r--arch/x86/pci/fixup.c3
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/pci/init.c2
-rw-r--r--arch/x86/pci/irq.c3
-rw-r--r--arch/x86/pci/legacy.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c3
-rw-r--r--arch/x86/pci/mmconfig_32.c2
-rw-r--r--arch/x86/pci/mmconfig_64.c3
-rw-r--r--arch/x86/pci/numaq_32.c2
-rw-r--r--arch/x86/pci/olpc.c2
-rw-r--r--arch/x86/pci/pcbios.c5
-rw-r--r--arch/x86/pci/visws.c3
-rw-r--r--arch/x86/xen/mmu.c20
-rw-r--r--arch/x86/xen/smp.c27
-rw-r--r--arch/x86/xen/suspend.c3
-rw-r--r--arch/x86/xen/time.c12
-rw-r--r--arch/x86/xen/xen-ops.h2
279 files changed, 6327 insertions, 3825 deletions
diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h
index 544c69af8168..547e90951cec 100644
--- a/arch/alpha/include/asm/smp.h
+++ b/arch/alpha/include/asm/smp.h
@@ -45,7 +45,6 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS];
45#define raw_smp_processor_id() (current_thread_info()->cpu) 45#define raw_smp_processor_id() (current_thread_info()->cpu)
46 46
47extern int smp_num_cpus; 47extern int smp_num_cpus;
48#define cpu_possible_map cpu_present_map
49 48
50extern void arch_send_call_function_single_ipi(int cpu); 49extern void arch_send_call_function_single_ipi(int cpu);
51extern void arch_send_call_function_ipi(cpumask_t mask); 50extern void arch_send_call_function_ipi(cpumask_t mask);
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index ac706c1d7ada..b4697759a123 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare
8 8
9obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ 9obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
10 irq_alpha.o signal.o setup.o ptrace.o time.o \ 10 irq_alpha.o signal.o setup.o ptrace.o time.o \
11 alpha_ksyms.o systbls.o err_common.o io.o 11 alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o
12 12
13obj-$(CONFIG_VGA_HOSE) += console.o 13obj-$(CONFIG_VGA_HOSE) += console.o
14obj-$(CONFIG_SMP) += smp.o 14obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c
new file mode 100644
index 000000000000..4a0af906b00a
--- /dev/null
+++ b/arch/alpha/kernel/binfmt_loader.c
@@ -0,0 +1,51 @@
1#include <linux/init.h>
2#include <linux/fs.h>
3#include <linux/file.h>
4#include <linux/mm_types.h>
5#include <linux/binfmts.h>
6#include <linux/a.out.h>
7
8static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs)
9{
10 struct exec *eh = (struct exec *)bprm->buf;
11 unsigned long loader;
12 struct file *file;
13 int retval;
14
15 if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000)
16 return -ENOEXEC;
17
18 if (bprm->loader)
19 return -ENOEXEC;
20
21 allow_write_access(bprm->file);
22 fput(bprm->file);
23 bprm->file = NULL;
24
25 loader = bprm->vma->vm_end - sizeof(void *);
26
27 file = open_exec("/sbin/loader");
28 retval = PTR_ERR(file);
29 if (IS_ERR(file))
30 return retval;
31
32 /* Remember if the application is TASO. */
33 bprm->taso = eh->ah.entry < 0x100000000UL;
34
35 bprm->file = file;
36 bprm->loader = loader;
37 retval = prepare_binprm(bprm);
38 if (retval < 0)
39 return retval;
40 return search_binary_handler(bprm,regs);
41}
42
43static struct linux_binfmt loader_format = {
44 .load_binary = load_binary,
45};
46
47static int __init init_loader_binfmt(void)
48{
49 return register_binfmt(&loader_format);
50}
51arch_initcall(init_loader_binfmt);
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index c626a821cdcb..d0f1620007f7 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq)
55 last_cpu = cpu; 55 last_cpu = cpu;
56 56
57 irq_desc[irq].affinity = cpumask_of_cpu(cpu); 57 irq_desc[irq].affinity = cpumask_of_cpu(cpu);
58 irq_desc[irq].chip->set_affinity(irq, cpumask_of_cpu(cpu)); 58 irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu));
59 return 0; 59 return 0;
60} 60}
61#endif /* CONFIG_SMP */ 61#endif /* CONFIG_SMP */
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 351407e07e71..f238370c907d 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -94,6 +94,7 @@ common_shutdown_1(void *generic_ptr)
94 flags |= 0x00040000UL; /* "remain halted" */ 94 flags |= 0x00040000UL; /* "remain halted" */
95 *pflags = flags; 95 *pflags = flags;
96 cpu_clear(cpuid, cpu_present_map); 96 cpu_clear(cpuid, cpu_present_map);
97 cpu_clear(cpuid, cpu_possible_map);
97 halt(); 98 halt();
98 } 99 }
99#endif 100#endif
@@ -120,6 +121,7 @@ common_shutdown_1(void *generic_ptr)
120#ifdef CONFIG_SMP 121#ifdef CONFIG_SMP
121 /* Wait for the secondaries to halt. */ 122 /* Wait for the secondaries to halt. */
122 cpu_clear(boot_cpuid, cpu_present_map); 123 cpu_clear(boot_cpuid, cpu_present_map);
124 cpu_clear(boot_cpuid, cpu_possible_map);
123 while (cpus_weight(cpu_present_map)) 125 while (cpus_weight(cpu_present_map))
124 barrier(); 126 barrier();
125#endif 127#endif
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index cf7da10097bb..d953e510f68d 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -70,11 +70,6 @@ enum ipi_message_type {
70/* Set to a secondary's cpuid when it comes online. */ 70/* Set to a secondary's cpuid when it comes online. */
71static int smp_secondary_alive __devinitdata = 0; 71static int smp_secondary_alive __devinitdata = 0;
72 72
73/* Which cpus ids came online. */
74cpumask_t cpu_online_map;
75
76EXPORT_SYMBOL(cpu_online_map);
77
78int smp_num_probed; /* Internal processor count */ 73int smp_num_probed; /* Internal processor count */
79int smp_num_cpus = 1; /* Number that came online. */ 74int smp_num_cpus = 1; /* Number that came online. */
80EXPORT_SYMBOL(smp_num_cpus); 75EXPORT_SYMBOL(smp_num_cpus);
@@ -440,6 +435,7 @@ setup_smp(void)
440 ((char *)cpubase + i*hwrpb->processor_size); 435 ((char *)cpubase + i*hwrpb->processor_size);
441 if ((cpu->flags & 0x1cc) == 0x1cc) { 436 if ((cpu->flags & 0x1cc) == 0x1cc) {
442 smp_num_probed++; 437 smp_num_probed++;
438 cpu_set(i, cpu_possible_map);
443 cpu_set(i, cpu_present_map); 439 cpu_set(i, cpu_present_map);
444 cpu->pal_revision = boot_cpu_palrev; 440 cpu->pal_revision = boot_cpu_palrev;
445 } 441 }
@@ -473,6 +469,7 @@ smp_prepare_cpus(unsigned int max_cpus)
473 469
474 /* Nothing to do on a UP box, or when told not to. */ 470 /* Nothing to do on a UP box, or when told not to. */
475 if (smp_num_probed == 1 || max_cpus == 0) { 471 if (smp_num_probed == 1 || max_cpus == 0) {
472 cpu_possible_map = cpumask_of_cpu(boot_cpuid);
476 cpu_present_map = cpumask_of_cpu(boot_cpuid); 473 cpu_present_map = cpumask_of_cpu(boot_cpuid);
477 printk(KERN_INFO "SMP mode deactivated.\n"); 474 printk(KERN_INFO "SMP mode deactivated.\n");
478 return; 475 return;
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index c71b0fd7a61f..ab44c164d9d4 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -177,19 +177,19 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
177} 177}
178 178
179static void 179static void
180dp264_set_affinity(unsigned int irq, cpumask_t affinity) 180dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
181{ 181{
182 spin_lock(&dp264_irq_lock); 182 spin_lock(&dp264_irq_lock);
183 cpu_set_irq_affinity(irq, affinity); 183 cpu_set_irq_affinity(irq, *affinity);
184 tsunami_update_irq_hw(cached_irq_mask); 184 tsunami_update_irq_hw(cached_irq_mask);
185 spin_unlock(&dp264_irq_lock); 185 spin_unlock(&dp264_irq_lock);
186} 186}
187 187
188static void 188static void
189clipper_set_affinity(unsigned int irq, cpumask_t affinity) 189clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
190{ 190{
191 spin_lock(&dp264_irq_lock); 191 spin_lock(&dp264_irq_lock);
192 cpu_set_irq_affinity(irq - 16, affinity); 192 cpu_set_irq_affinity(irq - 16, *affinity);
193 tsunami_update_irq_hw(cached_irq_mask); 193 tsunami_update_irq_hw(cached_irq_mask);
194 spin_unlock(&dp264_irq_lock); 194 spin_unlock(&dp264_irq_lock);
195} 195}
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 52c91ccc1648..27f840a4ad3d 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -158,10 +158,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
158} 158}
159 159
160static void 160static void
161titan_set_irq_affinity(unsigned int irq, cpumask_t affinity) 161titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
162{ 162{
163 spin_lock(&titan_irq_lock); 163 spin_lock(&titan_irq_lock);
164 titan_cpu_set_irq_affinity(irq - 16, affinity); 164 titan_cpu_set_irq_affinity(irq - 16, *affinity);
165 titan_update_irq_hw(titan_cached_irq_mask); 165 titan_update_irq_hw(titan_cached_irq_mask);
166 spin_unlock(&titan_irq_lock); 166 spin_unlock(&titan_irq_lock);
167} 167}
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 7fc9860a97d7..c6884ba1d5ed 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -109,11 +109,11 @@ static void gic_unmask_irq(unsigned int irq)
109} 109}
110 110
111#ifdef CONFIG_SMP 111#ifdef CONFIG_SMP
112static void gic_set_cpu(unsigned int irq, cpumask_t mask_val) 112static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
113{ 113{
114 void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3); 114 void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
115 unsigned int shift = (irq % 4) * 8; 115 unsigned int shift = (irq % 4) * 8;
116 unsigned int cpu = first_cpu(mask_val); 116 unsigned int cpu = cpumask_first(mask_val);
117 u32 val; 117 u32 val;
118 118
119 spin_lock(&irq_controller_lock); 119 spin_lock(&irq_controller_lock);
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 2f3eb795fa6e..7141cee1fab7 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -174,7 +174,7 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu)
174 pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu); 174 pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu);
175 175
176 spin_lock_irq(&desc->lock); 176 spin_lock_irq(&desc->lock);
177 desc->chip->set_affinity(irq, cpumask_of_cpu(cpu)); 177 desc->chip->set_affinity(irq, cpumask_of(cpu));
178 spin_unlock_irq(&desc->lock); 178 spin_unlock_irq(&desc->lock);
179} 179}
180 180
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 019237d21622..55fa7ff96a3e 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -34,16 +34,6 @@
34#include <asm/ptrace.h> 34#include <asm/ptrace.h>
35 35
36/* 36/*
37 * bitmask of present and online CPUs.
38 * The present bitmask indicates that the CPU is physically present.
39 * The online bitmask indicates that the CPU is up and running.
40 */
41cpumask_t cpu_possible_map;
42EXPORT_SYMBOL(cpu_possible_map);
43cpumask_t cpu_online_map;
44EXPORT_SYMBOL(cpu_online_map);
45
46/*
47 * as from 2.5, kernels no longer have an init_tasks structure 37 * as from 2.5, kernels no longer have an init_tasks structure
48 * so we need some other way of telling a new secondary core 38 * so we need some other way of telling a new secondary core
49 * where to place its SVC stack 39 * where to place its SVC stack
diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
index d140eae53ded..1ff1bda0a894 100644
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@ -178,7 +178,6 @@ static struct clock_event_device clkevt = {
178 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 178 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
179 .shift = 32, 179 .shift = 32,
180 .rating = 150, 180 .rating = 150,
181 .cpumask = CPU_MASK_CPU0,
182 .set_next_event = clkevt32k_next_event, 181 .set_next_event = clkevt32k_next_event,
183 .set_mode = clkevt32k_mode, 182 .set_mode = clkevt32k_mode,
184}; 183};
@@ -206,7 +205,7 @@ void __init at91rm9200_timer_init(void)
206 clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift); 205 clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift);
207 clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt); 206 clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt);
208 clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1; 207 clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1;
209 clkevt.cpumask = cpumask_of_cpu(0); 208 clkevt.cpumask = cpumask_of(0);
210 clockevents_register_device(&clkevt); 209 clockevents_register_device(&clkevt);
211 210
212 /* register clocksource */ 211 /* register clocksource */
diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c
index 122fd77ed580..b63e1d5f1bad 100644
--- a/arch/arm/mach-at91/at91sam926x_time.c
+++ b/arch/arm/mach-at91/at91sam926x_time.c
@@ -91,7 +91,6 @@ static struct clock_event_device pit_clkevt = {
91 .features = CLOCK_EVT_FEAT_PERIODIC, 91 .features = CLOCK_EVT_FEAT_PERIODIC,
92 .shift = 32, 92 .shift = 32,
93 .rating = 100, 93 .rating = 100,
94 .cpumask = CPU_MASK_CPU0,
95 .set_mode = pit_clkevt_mode, 94 .set_mode = pit_clkevt_mode,
96}; 95};
97 96
@@ -173,6 +172,7 @@ static void __init at91sam926x_pit_init(void)
173 172
174 /* Set up and register clockevents */ 173 /* Set up and register clockevents */
175 pit_clkevt.mult = div_sc(pit_rate, NSEC_PER_SEC, pit_clkevt.shift); 174 pit_clkevt.mult = div_sc(pit_rate, NSEC_PER_SEC, pit_clkevt.shift);
175 pit_clkevt.cpumask = cpumask_of(0);
176 clockevents_register_device(&pit_clkevt); 176 clockevents_register_device(&pit_clkevt);
177} 177}
178 178
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 3b9a296b5c4b..f8bcd29d17a6 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -322,7 +322,7 @@ static void __init davinci_timer_init(void)
322 clockevent_davinci.min_delta_ns = 322 clockevent_davinci.min_delta_ns =
323 clockevent_delta2ns(1, &clockevent_davinci); 323 clockevent_delta2ns(1, &clockevent_davinci);
324 324
325 clockevent_davinci.cpumask = cpumask_of_cpu(0); 325 clockevent_davinci.cpumask = cpumask_of(0);
326 clockevents_register_device(&clockevent_davinci); 326 clockevents_register_device(&clockevent_davinci);
327} 327}
328 328
diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c
index a11765f5f23b..aff0ebcfa847 100644
--- a/arch/arm/mach-imx/time.c
+++ b/arch/arm/mach-imx/time.c
@@ -184,7 +184,7 @@ static int __init imx_clockevent_init(unsigned long rate)
184 clockevent_imx.min_delta_ns = 184 clockevent_imx.min_delta_ns =
185 clockevent_delta2ns(0xf, &clockevent_imx); 185 clockevent_delta2ns(0xf, &clockevent_imx);
186 186
187 clockevent_imx.cpumask = cpumask_of_cpu(0); 187 clockevent_imx.cpumask = cpumask_of(0);
188 188
189 clockevents_register_device(&clockevent_imx); 189 clockevents_register_device(&clockevent_imx);
190 190
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 7766f469456b..f4656d2ac8a8 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -487,7 +487,7 @@ static int __init ixp4xx_clockevent_init(void)
487 clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx); 487 clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx);
488 clockevent_ixp4xx.min_delta_ns = 488 clockevent_ixp4xx.min_delta_ns =
489 clockevent_delta2ns(0xf, &clockevent_ixp4xx); 489 clockevent_delta2ns(0xf, &clockevent_ixp4xx);
490 clockevent_ixp4xx.cpumask = cpumask_of_cpu(0); 490 clockevent_ixp4xx.cpumask = cpumask_of(0);
491 491
492 clockevents_register_device(&clockevent_ixp4xx); 492 clockevents_register_device(&clockevent_ixp4xx);
493 return 0; 493 return 0;
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 345a14cb73c3..444d9c0f5ca6 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -182,7 +182,7 @@ static void __init msm_timer_init(void)
182 clockevent_delta2ns(0xf0000000 >> clock->shift, ce); 182 clockevent_delta2ns(0xf0000000 >> clock->shift, ce);
183 /* 4 gets rounded down to 3 */ 183 /* 4 gets rounded down to 3 */
184 ce->min_delta_ns = clockevent_delta2ns(4, ce); 184 ce->min_delta_ns = clockevent_delta2ns(4, ce);
185 ce->cpumask = cpumask_of_cpu(0); 185 ce->cpumask = cpumask_of(0);
186 186
187 cs->mult = clocksource_hz2mult(clock->freq, cs->shift); 187 cs->mult = clocksource_hz2mult(clock->freq, cs->shift);
188 res = clocksource_register(cs); 188 res = clocksource_register(cs);
diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c
index a63424d083d9..41df69721769 100644
--- a/arch/arm/mach-ns9xxx/time-ns9360.c
+++ b/arch/arm/mach-ns9xxx/time-ns9360.c
@@ -173,7 +173,7 @@ static void __init ns9360_timer_init(void)
173 ns9360_clockevent_device.min_delta_ns = 173 ns9360_clockevent_device.min_delta_ns =
174 clockevent_delta2ns(1, &ns9360_clockevent_device); 174 clockevent_delta2ns(1, &ns9360_clockevent_device);
175 175
176 ns9360_clockevent_device.cpumask = cpumask_of_cpu(0); 176 ns9360_clockevent_device.cpumask = cpumask_of(0);
177 clockevents_register_device(&ns9360_clockevent_device); 177 clockevents_register_device(&ns9360_clockevent_device);
178 178
179 setup_irq(IRQ_NS9360_TIMER0 + TIMER_CLOCKEVENT, 179 setup_irq(IRQ_NS9360_TIMER0 + TIMER_CLOCKEVENT,
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 2cf7e32bd293..495a32c287b4 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -173,7 +173,7 @@ static __init void omap_init_mpu_timer(unsigned long rate)
173 clockevent_mpu_timer1.min_delta_ns = 173 clockevent_mpu_timer1.min_delta_ns =
174 clockevent_delta2ns(1, &clockevent_mpu_timer1); 174 clockevent_delta2ns(1, &clockevent_mpu_timer1);
175 175
176 clockevent_mpu_timer1.cpumask = cpumask_of_cpu(0); 176 clockevent_mpu_timer1.cpumask = cpumask_of(0);
177 clockevents_register_device(&clockevent_mpu_timer1); 177 clockevents_register_device(&clockevent_mpu_timer1);
178} 178}
179 179
diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c
index 705367ece174..fd3f7396e162 100644
--- a/arch/arm/mach-omap1/timer32k.c
+++ b/arch/arm/mach-omap1/timer32k.c
@@ -187,7 +187,7 @@ static __init void omap_init_32k_timer(void)
187 clockevent_32k_timer.min_delta_ns = 187 clockevent_32k_timer.min_delta_ns =
188 clockevent_delta2ns(1, &clockevent_32k_timer); 188 clockevent_delta2ns(1, &clockevent_32k_timer);
189 189
190 clockevent_32k_timer.cpumask = cpumask_of_cpu(0); 190 clockevent_32k_timer.cpumask = cpumask_of(0);
191 clockevents_register_device(&clockevent_32k_timer); 191 clockevents_register_device(&clockevent_32k_timer);
192} 192}
193 193
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 589393bedade..ae6036300f60 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -120,7 +120,7 @@ static void __init omap2_gp_clockevent_init(void)
120 clockevent_gpt.min_delta_ns = 120 clockevent_gpt.min_delta_ns =
121 clockevent_delta2ns(1, &clockevent_gpt); 121 clockevent_delta2ns(1, &clockevent_gpt);
122 122
123 clockevent_gpt.cpumask = cpumask_of_cpu(0); 123 clockevent_gpt.cpumask = cpumask_of(0);
124 clockevents_register_device(&clockevent_gpt); 124 clockevents_register_device(&clockevent_gpt);
125} 125}
126 126
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 001624158519..95656a72268d 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -122,7 +122,6 @@ static struct clock_event_device ckevt_pxa_osmr0 = {
122 .features = CLOCK_EVT_FEAT_ONESHOT, 122 .features = CLOCK_EVT_FEAT_ONESHOT,
123 .shift = 32, 123 .shift = 32,
124 .rating = 200, 124 .rating = 200,
125 .cpumask = CPU_MASK_CPU0,
126 .set_next_event = pxa_osmr0_set_next_event, 125 .set_next_event = pxa_osmr0_set_next_event,
127 .set_mode = pxa_osmr0_set_mode, 126 .set_mode = pxa_osmr0_set_mode,
128}; 127};
@@ -163,6 +162,7 @@ static void __init pxa_timer_init(void)
163 clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0); 162 clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0);
164 ckevt_pxa_osmr0.min_delta_ns = 163 ckevt_pxa_osmr0.min_delta_ns =
165 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1; 164 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
165 ckevt_pxa_osmr0.cpumask = cpumask_of(0);
166 166
167 cksrc_pxa_oscr0.mult = 167 cksrc_pxa_oscr0.mult =
168 clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift); 168 clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 5f1d55963ced..bd2aa4f16141 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -624,7 +624,7 @@ static struct clock_event_device timer0_clockevent = {
624 .set_mode = timer_set_mode, 624 .set_mode = timer_set_mode,
625 .set_next_event = timer_set_next_event, 625 .set_next_event = timer_set_next_event,
626 .rating = 300, 626 .rating = 300,
627 .cpumask = CPU_MASK_ALL, 627 .cpumask = cpu_all_mask,
628}; 628};
629 629
630static void __init realview_clockevents_init(unsigned int timer_irq) 630static void __init realview_clockevents_init(unsigned int timer_irq)
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index 9019ef2e5611..67d6d9cc68b2 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -154,7 +154,7 @@ void __cpuinit local_timer_setup(void)
154 clk->set_mode = local_timer_set_mode; 154 clk->set_mode = local_timer_set_mode;
155 clk->set_next_event = local_timer_set_next_event; 155 clk->set_next_event = local_timer_set_next_event;
156 clk->irq = IRQ_LOCALTIMER; 156 clk->irq = IRQ_LOCALTIMER;
157 clk->cpumask = cpumask_of_cpu(cpu); 157 clk->cpumask = cpumask_of(cpu);
158 clk->shift = 20; 158 clk->shift = 20;
159 clk->mult = div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift); 159 clk->mult = div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
160 clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk); 160 clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk);
@@ -193,7 +193,7 @@ void __cpuinit local_timer_setup(void)
193 clk->rating = 200; 193 clk->rating = 200;
194 clk->set_mode = dummy_timer_set_mode; 194 clk->set_mode = dummy_timer_set_mode;
195 clk->broadcast = smp_timer_broadcast; 195 clk->broadcast = smp_timer_broadcast;
196 clk->cpumask = cpumask_of_cpu(cpu); 196 clk->cpumask = cpumask_of(cpu);
197 197
198 clockevents_register_device(clk); 198 clockevents_register_device(clk);
199} 199}
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 8c5e727f3b75..711c0295c66f 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -73,7 +73,6 @@ static struct clock_event_device ckevt_sa1100_osmr0 = {
73 .features = CLOCK_EVT_FEAT_ONESHOT, 73 .features = CLOCK_EVT_FEAT_ONESHOT,
74 .shift = 32, 74 .shift = 32,
75 .rating = 200, 75 .rating = 200,
76 .cpumask = CPU_MASK_CPU0,
77 .set_next_event = sa1100_osmr0_set_next_event, 76 .set_next_event = sa1100_osmr0_set_next_event,
78 .set_mode = sa1100_osmr0_set_mode, 77 .set_mode = sa1100_osmr0_set_mode,
79}; 78};
@@ -110,6 +109,7 @@ static void __init sa1100_timer_init(void)
110 clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0); 109 clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0);
111 ckevt_sa1100_osmr0.min_delta_ns = 110 ckevt_sa1100_osmr0.min_delta_ns =
112 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1; 111 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
112 ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
113 113
114 cksrc_sa1100_oscr.mult = 114 cksrc_sa1100_oscr.mult =
115 clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift); 115 clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index df25aa138509..1c43494f5c42 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -1005,7 +1005,7 @@ static void __init versatile_timer_init(void)
1005 timer0_clockevent.min_delta_ns = 1005 timer0_clockevent.min_delta_ns =
1006 clockevent_delta2ns(0xf, &timer0_clockevent); 1006 clockevent_delta2ns(0xf, &timer0_clockevent);
1007 1007
1008 timer0_clockevent.cpumask = cpumask_of_cpu(0); 1008 timer0_clockevent.cpumask = cpumask_of(0);
1009 clockevents_register_device(&timer0_clockevent); 1009 clockevents_register_device(&timer0_clockevent);
1010} 1010}
1011 1011
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 4de366e8b4c5..6d6bd5899240 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -260,10 +260,10 @@ static void em_stop(void)
260static void em_route_irq(int irq, unsigned int cpu) 260static void em_route_irq(int irq, unsigned int cpu)
261{ 261{
262 struct irq_desc *desc = irq_desc + irq; 262 struct irq_desc *desc = irq_desc + irq;
263 cpumask_t mask = cpumask_of_cpu(cpu); 263 const struct cpumask *mask = cpumask_of(cpu);
264 264
265 spin_lock_irq(&desc->lock); 265 spin_lock_irq(&desc->lock);
266 desc->affinity = mask; 266 desc->affinity = *mask;
267 desc->chip->set_affinity(irq, mask); 267 desc->chip->set_affinity(irq, mask);
268 spin_unlock_irq(&desc->lock); 268 spin_unlock_irq(&desc->lock);
269} 269}
diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c
index fd28f5194f71..758a1293bcfa 100644
--- a/arch/arm/plat-mxc/time.c
+++ b/arch/arm/plat-mxc/time.c
@@ -190,7 +190,7 @@ static int __init mxc_clockevent_init(void)
190 clockevent_mxc.min_delta_ns = 190 clockevent_mxc.min_delta_ns =
191 clockevent_delta2ns(0xff, &clockevent_mxc); 191 clockevent_delta2ns(0xff, &clockevent_mxc);
192 192
193 clockevent_mxc.cpumask = cpumask_of_cpu(0); 193 clockevent_mxc.cpumask = cpumask_of(0);
194 194
195 clockevents_register_device(&clockevent_mxc); 195 clockevents_register_device(&clockevent_mxc);
196 196
diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index 544d6b327f3a..6fa2923e6dca 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -149,7 +149,6 @@ static struct clock_event_device orion_clkevt = {
149 .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, 149 .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
150 .shift = 32, 150 .shift = 32,
151 .rating = 300, 151 .rating = 300,
152 .cpumask = CPU_MASK_CPU0,
153 .set_next_event = orion_clkevt_next_event, 152 .set_next_event = orion_clkevt_next_event,
154 .set_mode = orion_clkevt_mode, 153 .set_mode = orion_clkevt_mode,
155}; 154};
@@ -199,5 +198,6 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
199 orion_clkevt.mult = div_sc(tclk, NSEC_PER_SEC, orion_clkevt.shift); 198 orion_clkevt.mult = div_sc(tclk, NSEC_PER_SEC, orion_clkevt.shift);
200 orion_clkevt.max_delta_ns = clockevent_delta2ns(0xfffffffe, &orion_clkevt); 199 orion_clkevt.max_delta_ns = clockevent_delta2ns(0xfffffffe, &orion_clkevt);
201 orion_clkevt.min_delta_ns = clockevent_delta2ns(1, &orion_clkevt); 200 orion_clkevt.min_delta_ns = clockevent_delta2ns(1, &orion_clkevt);
201 orion_clkevt.cpumask = cpumask_of(0);
202 clockevents_register_device(&orion_clkevt); 202 clockevents_register_device(&orion_clkevt);
203} 203}
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index 283481d74a5b..0ff46bf873b0 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -106,7 +106,6 @@ static struct clock_event_device comparator = {
106 .features = CLOCK_EVT_FEAT_ONESHOT, 106 .features = CLOCK_EVT_FEAT_ONESHOT,
107 .shift = 16, 107 .shift = 16,
108 .rating = 50, 108 .rating = 50,
109 .cpumask = CPU_MASK_CPU0,
110 .set_next_event = comparator_next_event, 109 .set_next_event = comparator_next_event,
111 .set_mode = comparator_mode, 110 .set_mode = comparator_mode,
112}; 111};
@@ -134,6 +133,7 @@ void __init time_init(void)
134 comparator.mult = div_sc(counter_hz, NSEC_PER_SEC, comparator.shift); 133 comparator.mult = div_sc(counter_hz, NSEC_PER_SEC, comparator.shift);
135 comparator.max_delta_ns = clockevent_delta2ns((u32)~0, &comparator); 134 comparator.max_delta_ns = clockevent_delta2ns((u32)~0, &comparator);
136 comparator.min_delta_ns = clockevent_delta2ns(50, &comparator) + 1; 135 comparator.min_delta_ns = clockevent_delta2ns(50, &comparator) + 1;
136 comparator.cpumask = cpumask_of(0);
137 137
138 sysreg_write(COMPARE, 0); 138 sysreg_write(COMPARE, 0);
139 timer_irqaction.dev_id = &comparator; 139 timer_irqaction.dev_id = &comparator;
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index e887efc86c29..0ed2badfd746 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -162,7 +162,6 @@ static struct clock_event_device clockevent_bfin = {
162 .name = "bfin_core_timer", 162 .name = "bfin_core_timer",
163 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 163 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
164 .shift = 32, 164 .shift = 32,
165 .cpumask = CPU_MASK_CPU0,
166 .set_next_event = bfin_timer_set_next_event, 165 .set_next_event = bfin_timer_set_next_event,
167 .set_mode = bfin_timer_set_mode, 166 .set_mode = bfin_timer_set_mode,
168}; 167};
@@ -193,6 +192,7 @@ static int __init bfin_clockevent_init(void)
193 clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift); 192 clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift);
194 clockevent_bfin.max_delta_ns = clockevent_delta2ns(-1, &clockevent_bfin); 193 clockevent_bfin.max_delta_ns = clockevent_delta2ns(-1, &clockevent_bfin);
195 clockevent_bfin.min_delta_ns = clockevent_delta2ns(100, &clockevent_bfin); 194 clockevent_bfin.min_delta_ns = clockevent_delta2ns(100, &clockevent_bfin);
195 clockevent_bfin.cpumask = cpumask_of(0);
196 clockevents_register_device(&clockevent_bfin); 196 clockevents_register_device(&clockevent_bfin);
197 197
198 return 0; 198 return 0;
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index 173c141ac9ba..295131fee710 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -325,11 +325,11 @@ static void end_crisv32_irq(unsigned int irq)
325{ 325{
326} 326}
327 327
328void set_affinity_crisv32_irq(unsigned int irq, cpumask_t dest) 328void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
329{ 329{
330 unsigned long flags; 330 unsigned long flags;
331 spin_lock_irqsave(&irq_lock, flags); 331 spin_lock_irqsave(&irq_lock, flags);
332 irq_allocations[irq - FIRST_IRQ].mask = dest; 332 irq_allocations[irq - FIRST_IRQ].mask = *dest;
333 spin_unlock_irqrestore(&irq_lock, flags); 333 spin_unlock_irqrestore(&irq_lock, flags);
334} 334}
335 335
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 52e16c6436f9..9dac17334640 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -29,11 +29,7 @@
29spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; 29spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED};
30 30
31/* CPU masks */ 31/* CPU masks */
32cpumask_t cpu_online_map = CPU_MASK_NONE;
33EXPORT_SYMBOL(cpu_online_map);
34cpumask_t phys_cpu_present_map = CPU_MASK_NONE; 32cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
35cpumask_t cpu_possible_map;
36EXPORT_SYMBOL(cpu_possible_map);
37EXPORT_SYMBOL(phys_cpu_present_map); 33EXPORT_SYMBOL(phys_cpu_present_map);
38 34
39/* Variables used during SMP boot */ 35/* Variables used during SMP boot */
diff --git a/arch/cris/include/asm/smp.h b/arch/cris/include/asm/smp.h
index dba33aba3e95..c615a06dd757 100644
--- a/arch/cris/include/asm/smp.h
+++ b/arch/cris/include/asm/smp.h
@@ -4,7 +4,6 @@
4#include <linux/cpumask.h> 4#include <linux/cpumask.h>
5 5
6extern cpumask_t phys_cpu_present_map; 6extern cpumask_t phys_cpu_present_map;
7extern cpumask_t cpu_possible_map;
8 7
9#define raw_smp_processor_id() (current_thread_info()->cpu) 8#define raw_smp_processor_id() (current_thread_info()->cpu)
10 9
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index c2f58ff364e7..cc0a3182db3c 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -22,7 +22,7 @@ hpsim_irq_noop (unsigned int irq)
22} 22}
23 23
24static void 24static void
25hpsim_set_affinity_noop (unsigned int a, cpumask_t b) 25hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
26{ 26{
27} 27}
28 28
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index f38472ac2267..68aa6da807c1 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -166,8 +166,6 @@ struct saved_vpd {
166}; 166};
167 167
168struct kvm_regs { 168struct kvm_regs {
169 char *saved_guest;
170 char *saved_stack;
171 struct saved_vpd vpd; 169 struct saved_vpd vpd;
172 /*Arch-regs*/ 170 /*Arch-regs*/
173 int mp_state; 171 int mp_state;
@@ -200,6 +198,10 @@ struct kvm_regs {
200 unsigned long fp_psr; /*used for lazy float register */ 198 unsigned long fp_psr; /*used for lazy float register */
201 unsigned long saved_gp; 199 unsigned long saved_gp;
202 /*for phycial emulation */ 200 /*for phycial emulation */
201
202 union context saved_guest;
203
204 unsigned long reserved[64]; /* for future use */
203}; 205};
204 206
205struct kvm_sregs { 207struct kvm_sregs {
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index c60d324da540..0560f3fae538 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -23,17 +23,6 @@
23#ifndef __ASM_KVM_HOST_H 23#ifndef __ASM_KVM_HOST_H
24#define __ASM_KVM_HOST_H 24#define __ASM_KVM_HOST_H
25 25
26
27#include <linux/types.h>
28#include <linux/mm.h>
29#include <linux/kvm.h>
30#include <linux/kvm_para.h>
31#include <linux/kvm_types.h>
32
33#include <asm/pal.h>
34#include <asm/sal.h>
35
36#define KVM_MAX_VCPUS 4
37#define KVM_MEMORY_SLOTS 32 26#define KVM_MEMORY_SLOTS 32
38/* memory slots that does not exposed to userspace */ 27/* memory slots that does not exposed to userspace */
39#define KVM_PRIVATE_MEM_SLOTS 4 28#define KVM_PRIVATE_MEM_SLOTS 4
@@ -50,70 +39,132 @@
50#define EXIT_REASON_EXTERNAL_INTERRUPT 6 39#define EXIT_REASON_EXTERNAL_INTERRUPT 6
51#define EXIT_REASON_IPI 7 40#define EXIT_REASON_IPI 7
52#define EXIT_REASON_PTC_G 8 41#define EXIT_REASON_PTC_G 8
42#define EXIT_REASON_DEBUG 20
53 43
54/*Define vmm address space and vm data space.*/ 44/*Define vmm address space and vm data space.*/
55#define KVM_VMM_SIZE (16UL<<20) 45#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
56#define KVM_VMM_SHIFT 24 46#define KVM_VMM_SHIFT 24
57#define KVM_VMM_BASE 0xD000000000000000UL 47#define KVM_VMM_BASE 0xD000000000000000
58#define VMM_SIZE (8UL<<20) 48#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
59 49
60/* 50/*
61 * Define vm_buffer, used by PAL Services, base address. 51 * Define vm_buffer, used by PAL Services, base address.
62 * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M 52 * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
63 */ 53 */
64#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) 54#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
65#define KVM_VM_BUFFER_SIZE (8UL<<20) 55#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
66 56
67/*Define Virtual machine data layout.*/ 57/*
68#define KVM_VM_DATA_SHIFT 24 58 * kvm guest's data area looks as follow:
69#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT) 59 *
70#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE) 60 * +----------------------+ ------- KVM_VM_DATA_SIZE
71 61 * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET
72 62 * | | | / |
73#define KVM_P2M_BASE KVM_VM_DATA_BASE 63 * | .......... | | /vcpu's struct&stack |
74#define KVM_P2M_OFS 0 64 * | .......... | | /---------------------|---- 0
75#define KVM_P2M_SIZE (8UL << 20) 65 * | vcpu[5]'s data | | / vpd |
76 66 * | vcpu[4]'s data | |/-----------------------|
77#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE) 67 * | vcpu[3]'s data | / vtlb |
78#define KVM_VHPT_OFS KVM_P2M_SIZE 68 * | vcpu[2]'s data | /|------------------------|
79#define KVM_VHPT_BLOCK_SIZE (2UL << 20) 69 * | vcpu[1]'s data |/ | vhpt |
80#define VHPT_SHIFT 18 70 * | vcpu[0]'s data |____________________________|
81#define VHPT_SIZE (1UL << VHPT_SHIFT) 71 * +----------------------+ |
82#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5)) 72 * | memory dirty log | |
83 73 * +----------------------+ |
84#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE) 74 * | vm's data struct | |
85#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE) 75 * +----------------------+ |
86#define KVM_VTLB_BLOCK_SIZE (1UL<<20) 76 * | | |
87#define VTLB_SHIFT 17 77 * | | |
88#define VTLB_SIZE (1UL<<VTLB_SHIFT) 78 * | | |
89#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5)) 79 * | | |
90 80 * | | |
91#define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE) 81 * | | |
92#define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE) 82 * | | |
93#define KVM_VPD_BLOCK_SIZE (2UL<<20) 83 * | vm's p2m table | |
94#define VPD_SHIFT 16 84 * | | |
95#define VPD_SIZE (1UL<<VPD_SHIFT) 85 * | | |
96 86 * | | | |
97#define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE) 87 * vm's data->| | | |
98#define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE) 88 * +----------------------+ ------- 0
99#define KVM_VCPU_BLOCK_SIZE (2UL<<20) 89 * To support large memory, needs to increase the size of p2m.
100#define VCPU_SHIFT 18 90 * To support more vcpus, needs to ensure it has enough space to
101#define VCPU_SIZE (1UL<<VCPU_SHIFT) 91 * hold vcpus' data.
102#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE 92 */
103 93
104#define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE) 94#define KVM_VM_DATA_SHIFT 26
105#define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE) 95#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
106#define KVM_VM_BLOCK_SIZE (1UL<<19) 96#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE)
107 97
108#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE) 98#define KVM_P2M_BASE KVM_VM_DATA_BASE
109#define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE) 99#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20)
110#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19) 100
111 101#define VHPT_SHIFT 16
112/* Get vpd, vhpt, tlb, vcpu, base*/ 102#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT)
113#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE) 103#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
114#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE) 104
115#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE) 105#define VTLB_SHIFT 16
116#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE) 106#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT)
107#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5))
108
109#define VPD_SHIFT 16
110#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT)
111
112#define VCPU_STRUCT_SHIFT 16
113#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
114
115#define KVM_STK_OFFSET VCPU_STRUCT_SIZE
116
117#define KVM_VM_STRUCT_SHIFT 19
118#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
119
120#define KVM_MEM_DIRY_LOG_SHIFT 19
121#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
122
123#ifndef __ASSEMBLY__
124
125/*Define the max vcpus and memory for Guests.*/
126#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
127 KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
128#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
129
130#define VMM_LOG_LEN 256
131
132#include <linux/types.h>
133#include <linux/mm.h>
134#include <linux/kvm.h>
135#include <linux/kvm_para.h>
136#include <linux/kvm_types.h>
137
138#include <asm/pal.h>
139#include <asm/sal.h>
140#include <asm/page.h>
141
142struct kvm_vcpu_data {
143 char vcpu_vhpt[VHPT_SIZE];
144 char vcpu_vtlb[VTLB_SIZE];
145 char vcpu_vpd[VPD_SIZE];
146 char vcpu_struct[VCPU_STRUCT_SIZE];
147};
148
149struct kvm_vm_data {
150 char kvm_p2m[KVM_P2M_SIZE];
151 char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
152 char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
153 struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
154};
155
156#define VCPU_BASE(n) KVM_VM_DATA_BASE + \
157 offsetof(struct kvm_vm_data, vcpu_data[n])
158#define VM_BASE KVM_VM_DATA_BASE + \
159 offsetof(struct kvm_vm_data, kvm_vm_struct)
160#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
161 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
162
163#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
164#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
165#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
166#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \
167 offsetof(struct kvm_vcpu_data, vcpu_struct))
117 168
118/*IO section definitions*/ 169/*IO section definitions*/
119#define IOREQ_READ 1 170#define IOREQ_READ 1
@@ -389,6 +440,7 @@ struct kvm_vcpu_arch {
389 440
390 unsigned long opcode; 441 unsigned long opcode;
391 unsigned long cause; 442 unsigned long cause;
443 char log_buf[VMM_LOG_LEN];
392 union context host; 444 union context host;
393 union context guest; 445 union context guest;
394}; 446};
@@ -403,14 +455,13 @@ struct kvm_sal_data {
403}; 455};
404 456
405struct kvm_arch { 457struct kvm_arch {
458 spinlock_t dirty_log_lock;
459
406 unsigned long vm_base; 460 unsigned long vm_base;
407 unsigned long metaphysical_rr0; 461 unsigned long metaphysical_rr0;
408 unsigned long metaphysical_rr4; 462 unsigned long metaphysical_rr4;
409 unsigned long vmm_init_rr; 463 unsigned long vmm_init_rr;
410 unsigned long vhpt_base; 464
411 unsigned long vtlb_base;
412 unsigned long vpd_base;
413 spinlock_t dirty_log_lock;
414 struct kvm_ioapic *vioapic; 465 struct kvm_ioapic *vioapic;
415 struct kvm_vm_stat stat; 466 struct kvm_vm_stat stat;
416 struct kvm_sal_data rdv_sal_data; 467 struct kvm_sal_data rdv_sal_data;
@@ -512,7 +563,7 @@ struct kvm_pt_regs {
512 563
513static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) 564static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
514{ 565{
515 return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; 566 return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
516} 567}
517 568
518typedef int kvm_vmm_entry(void); 569typedef int kvm_vmm_entry(void);
@@ -531,5 +582,6 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
531void kvm_sal_emul(struct kvm_vcpu *vcpu); 582void kvm_sal_emul(struct kvm_vcpu *vcpu);
532 583
533static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {} 584static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
585#endif /* __ASSEMBLY__*/
534 586
535#endif 587#endif
diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h
index 12d96e0cd513..21c402365d0e 100644
--- a/arch/ia64/include/asm/smp.h
+++ b/arch/ia64/include/asm/smp.h
@@ -57,7 +57,6 @@ extern struct smp_boot_data {
57 57
58extern char no_int_routing __devinitdata; 58extern char no_int_routing __devinitdata;
59 59
60extern cpumask_t cpu_online_map;
61extern cpumask_t cpu_core_map[NR_CPUS]; 60extern cpumask_t cpu_core_map[NR_CPUS];
62DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); 61DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
63extern int smp_num_siblings; 62extern int smp_num_siblings;
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 35bcb641c9e5..a3cc9f65f954 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -55,7 +55,6 @@
55void build_cpu_to_node_map(void); 55void build_cpu_to_node_map(void);
56 56
57#define SD_CPU_INIT (struct sched_domain) { \ 57#define SD_CPU_INIT (struct sched_domain) { \
58 .span = CPU_MASK_NONE, \
59 .parent = NULL, \ 58 .parent = NULL, \
60 .child = NULL, \ 59 .child = NULL, \
61 .groups = NULL, \ 60 .groups = NULL, \
@@ -80,7 +79,6 @@ void build_cpu_to_node_map(void);
80 79
81/* sched_domains SD_NODE_INIT for IA64 NUMA machines */ 80/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
82#define SD_NODE_INIT (struct sched_domain) { \ 81#define SD_NODE_INIT (struct sched_domain) { \
83 .span = CPU_MASK_NONE, \
84 .parent = NULL, \ 82 .parent = NULL, \
85 .child = NULL, \ 83 .child = NULL, \
86 .groups = NULL, \ 84 .groups = NULL, \
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 5c4674ae8aea..c8adecd5b416 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -330,25 +330,25 @@ unmask_irq (unsigned int irq)
330 330
331 331
332static void 332static void
333iosapic_set_affinity (unsigned int irq, cpumask_t mask) 333iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
334{ 334{
335#ifdef CONFIG_SMP 335#ifdef CONFIG_SMP
336 u32 high32, low32; 336 u32 high32, low32;
337 int dest, rte_index; 337 int cpu, dest, rte_index;
338 int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; 338 int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
339 struct iosapic_rte_info *rte; 339 struct iosapic_rte_info *rte;
340 struct iosapic *iosapic; 340 struct iosapic *iosapic;
341 341
342 irq &= (~IA64_IRQ_REDIRECTED); 342 irq &= (~IA64_IRQ_REDIRECTED);
343 343
344 cpus_and(mask, mask, cpu_online_map); 344 cpu = cpumask_first_and(cpu_online_mask, mask);
345 if (cpus_empty(mask)) 345 if (cpu >= nr_cpu_ids)
346 return; 346 return;
347 347
348 if (irq_prepare_move(irq, first_cpu(mask))) 348 if (irq_prepare_move(irq, cpu))
349 return; 349 return;
350 350
351 dest = cpu_physical_id(first_cpu(mask)); 351 dest = cpu_physical_id(cpu);
352 352
353 if (!iosapic_intr_info[irq].count) 353 if (!iosapic_intr_info[irq].count)
354 return; /* not an IOSAPIC interrupt */ 354 return; /* not an IOSAPIC interrupt */
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 7fd18f54c056..0b6db53fedcf 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -133,7 +133,6 @@ unsigned int vectors_in_migration[NR_IRQS];
133 */ 133 */
134static void migrate_irqs(void) 134static void migrate_irqs(void)
135{ 135{
136 cpumask_t mask;
137 irq_desc_t *desc; 136 irq_desc_t *desc;
138 int irq, new_cpu; 137 int irq, new_cpu;
139 138
@@ -152,15 +151,14 @@ static void migrate_irqs(void)
152 if (desc->status == IRQ_PER_CPU) 151 if (desc->status == IRQ_PER_CPU)
153 continue; 152 continue;
154 153
155 cpus_and(mask, irq_desc[irq].affinity, cpu_online_map); 154 if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask)
156 if (any_online_cpu(mask) == NR_CPUS) { 155 >= nr_cpu_ids) {
157 /* 156 /*
158 * Save it for phase 2 processing 157 * Save it for phase 2 processing
159 */ 158 */
160 vectors_in_migration[irq] = irq; 159 vectors_in_migration[irq] = irq;
161 160
162 new_cpu = any_online_cpu(cpu_online_map); 161 new_cpu = any_online_cpu(cpu_online_map);
163 mask = cpumask_of_cpu(new_cpu);
164 162
165 /* 163 /*
166 * Al three are essential, currently WARN_ON.. maybe panic? 164 * Al three are essential, currently WARN_ON.. maybe panic?
@@ -168,7 +166,8 @@ static void migrate_irqs(void)
168 if (desc->chip && desc->chip->disable && 166 if (desc->chip && desc->chip->disable &&
169 desc->chip->enable && desc->chip->set_affinity) { 167 desc->chip->enable && desc->chip->set_affinity) {
170 desc->chip->disable(irq); 168 desc->chip->disable(irq);
171 desc->chip->set_affinity(irq, mask); 169 desc->chip->set_affinity(irq,
170 cpumask_of(new_cpu));
172 desc->chip->enable(irq); 171 desc->chip->enable(irq);
173 } else { 172 } else {
174 WARN_ON((!(desc->chip) || !(desc->chip->disable) || 173 WARN_ON((!(desc->chip) || !(desc->chip->disable) ||
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 702a09c13238..890339339035 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -49,11 +49,12 @@
49static struct irq_chip ia64_msi_chip; 49static struct irq_chip ia64_msi_chip;
50 50
51#ifdef CONFIG_SMP 51#ifdef CONFIG_SMP
52static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) 52static void ia64_set_msi_irq_affinity(unsigned int irq,
53 const cpumask_t *cpu_mask)
53{ 54{
54 struct msi_msg msg; 55 struct msi_msg msg;
55 u32 addr, data; 56 u32 addr, data;
56 int cpu = first_cpu(cpu_mask); 57 int cpu = first_cpu(*cpu_mask);
57 58
58 if (!cpu_online(cpu)) 59 if (!cpu_online(cpu))
59 return; 60 return;
@@ -166,12 +167,11 @@ void arch_teardown_msi_irq(unsigned int irq)
166 167
167#ifdef CONFIG_DMAR 168#ifdef CONFIG_DMAR
168#ifdef CONFIG_SMP 169#ifdef CONFIG_SMP
169static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) 170static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
170{ 171{
171 struct irq_cfg *cfg = irq_cfg + irq; 172 struct irq_cfg *cfg = irq_cfg + irq;
172 struct msi_msg msg; 173 struct msi_msg msg;
173 int cpu = first_cpu(mask); 174 int cpu = cpumask_first(mask);
174
175 175
176 if (!cpu_online(cpu)) 176 if (!cpu_online(cpu))
177 return; 177 return;
@@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
187 msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); 187 msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
188 188
189 dmar_msi_write(irq, &msg); 189 dmar_msi_write(irq, &msg);
190 irq_desc[irq].affinity = mask; 190 irq_desc[irq].affinity = *mask;
191} 191}
192#endif /* CONFIG_SMP */ 192#endif /* CONFIG_SMP */
193 193
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 1dcbb85fc4ee..11463994a7d5 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -131,12 +131,6 @@ struct task_struct *task_for_booting_cpu;
131 */ 131 */
132DEFINE_PER_CPU(int, cpu_state); 132DEFINE_PER_CPU(int, cpu_state);
133 133
134/* Bitmasks of currently online, and possible CPUs */
135cpumask_t cpu_online_map;
136EXPORT_SYMBOL(cpu_online_map);
137cpumask_t cpu_possible_map = CPU_MASK_NONE;
138EXPORT_SYMBOL(cpu_possible_map);
139
140cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; 134cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
141EXPORT_SYMBOL(cpu_core_map); 135EXPORT_SYMBOL(cpu_core_map);
142DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); 136DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
@@ -688,7 +682,7 @@ int migrate_platform_irqs(unsigned int cpu)
688{ 682{
689 int new_cpei_cpu; 683 int new_cpei_cpu;
690 irq_desc_t *desc = NULL; 684 irq_desc_t *desc = NULL;
691 cpumask_t mask; 685 const struct cpumask *mask;
692 int retval = 0; 686 int retval = 0;
693 687
694 /* 688 /*
@@ -701,7 +695,7 @@ int migrate_platform_irqs(unsigned int cpu)
701 * Now re-target the CPEI to a different processor 695 * Now re-target the CPEI to a different processor
702 */ 696 */
703 new_cpei_cpu = any_online_cpu(cpu_online_map); 697 new_cpei_cpu = any_online_cpu(cpu_online_map);
704 mask = cpumask_of_cpu(new_cpei_cpu); 698 mask = cpumask_of(new_cpei_cpu);
705 set_cpei_target_cpu(new_cpei_cpu); 699 set_cpei_target_cpu(new_cpei_cpu);
706 desc = irq_desc + ia64_cpe_irq; 700 desc = irq_desc + ia64_cpe_irq;
707 /* 701 /*
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 65c10a42c88f..f0ebb342409d 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -93,13 +93,14 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
93 now = ia64_get_itc(); 93 now = ia64_get_itc();
94 94
95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); 95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
96 account_system_time(prev, 0, delta_stime); 96 if (idle_task(smp_processor_id()) != prev)
97 account_system_time_scaled(prev, delta_stime); 97 account_system_time(prev, 0, delta_stime, delta_stime);
98 else
99 account_idle_time(delta_stime);
98 100
99 if (pi->ac_utime) { 101 if (pi->ac_utime) {
100 delta_utime = cycle_to_cputime(pi->ac_utime); 102 delta_utime = cycle_to_cputime(pi->ac_utime);
101 account_user_time(prev, delta_utime); 103 account_user_time(prev, delta_utime, delta_utime);
102 account_user_time_scaled(prev, delta_utime);
103 } 104 }
104 105
105 pi->ac_stamp = ni->ac_stamp = now; 106 pi->ac_stamp = ni->ac_stamp = now;
@@ -122,8 +123,10 @@ void account_system_vtime(struct task_struct *tsk)
122 now = ia64_get_itc(); 123 now = ia64_get_itc();
123 124
124 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); 125 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
125 account_system_time(tsk, 0, delta_stime); 126 if (irq_count() || idle_task(smp_processor_id()) != tsk)
126 account_system_time_scaled(tsk, delta_stime); 127 account_system_time(tsk, 0, delta_stime, delta_stime);
128 else
129 account_idle_time(delta_stime);
127 ti->ac_stime = 0; 130 ti->ac_stime = 0;
128 131
129 ti->ac_stamp = now; 132 ti->ac_stamp = now;
@@ -143,8 +146,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
143 146
144 if (ti->ac_utime) { 147 if (ti->ac_utime) {
145 delta_utime = cycle_to_cputime(ti->ac_utime); 148 delta_utime = cycle_to_cputime(ti->ac_utime);
146 account_user_time(p, delta_utime); 149 account_user_time(p, delta_utime, delta_utime);
147 account_user_time_scaled(p, delta_utime);
148 ti->ac_utime = 0; 150 ti->ac_utime = 0;
149 } 151 }
150} 152}
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index c75b914f2d6b..a8d61a3e9a94 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -219,7 +219,7 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
219 cpumask_t shared_cpu_map; 219 cpumask_t shared_cpu_map;
220 220
221 cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map); 221 cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
222 len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map); 222 len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
223 len += sprintf(buf+len, "\n"); 223 len += sprintf(buf+len, "\n");
224 return len; 224 return len;
225} 225}
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 92cef66ca268..76464dc312e6 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o
60 60
61CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 61CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
62kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ 62kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
63 vtlb.o process.o 63 vtlb.o process.o kvm_lib.o
64#Add link memcpy and memset to avoid possible structure assignment error 64#Add link memcpy and memset to avoid possible structure assignment error
65kvm-intel-objs += memcpy.o memset.o 65kvm-intel-objs += memcpy.o memset.o
66obj-$(CONFIG_KVM_INTEL) += kvm-intel.o 66obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
index 4e3dc13a619c..0c3564a7a033 100644
--- a/arch/ia64/kvm/asm-offsets.c
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -24,19 +24,10 @@
24 24
25#include <linux/autoconf.h> 25#include <linux/autoconf.h>
26#include <linux/kvm_host.h> 26#include <linux/kvm_host.h>
27#include <linux/kbuild.h>
27 28
28#include "vcpu.h" 29#include "vcpu.h"
29 30
30#define task_struct kvm_vcpu
31
32#define DEFINE(sym, val) \
33 asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
34
35#define BLANK() asm volatile("\n->" : :)
36
37#define OFFSET(_sym, _str, _mem) \
38 DEFINE(_sym, offsetof(_str, _mem));
39
40void foo(void) 31void foo(void)
41{ 32{
42 DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); 33 DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index af1464f7a6ad..0f5ebd948437 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -180,7 +180,6 @@ int kvm_dev_ioctl_check_extension(long ext)
180 180
181 switch (ext) { 181 switch (ext) {
182 case KVM_CAP_IRQCHIP: 182 case KVM_CAP_IRQCHIP:
183 case KVM_CAP_USER_MEMORY:
184 case KVM_CAP_MP_STATE: 183 case KVM_CAP_MP_STATE:
185 184
186 r = 1; 185 r = 1;
@@ -439,7 +438,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
439 expires = div64_u64(itc_diff, cyc_per_usec); 438 expires = div64_u64(itc_diff, cyc_per_usec);
440 kt = ktime_set(0, 1000 * expires); 439 kt = ktime_set(0, 1000 * expires);
441 440
442 down_read(&vcpu->kvm->slots_lock);
443 vcpu->arch.ht_active = 1; 441 vcpu->arch.ht_active = 1;
444 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); 442 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
445 443
@@ -452,7 +450,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
452 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 450 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
453 vcpu->arch.mp_state = 451 vcpu->arch.mp_state =
454 KVM_MP_STATE_RUNNABLE; 452 KVM_MP_STATE_RUNNABLE;
455 up_read(&vcpu->kvm->slots_lock);
456 453
457 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) 454 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
458 return -EINTR; 455 return -EINTR;
@@ -476,6 +473,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
476 return 1; 473 return 1;
477} 474}
478 475
476static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
477 struct kvm_run *kvm_run)
478{
479 printk("VMM: %s", vcpu->arch.log_buf);
480 return 1;
481}
482
479static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, 483static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
480 struct kvm_run *kvm_run) = { 484 struct kvm_run *kvm_run) = {
481 [EXIT_REASON_VM_PANIC] = handle_vm_error, 485 [EXIT_REASON_VM_PANIC] = handle_vm_error,
@@ -487,6 +491,7 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
487 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 491 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
488 [EXIT_REASON_IPI] = handle_ipi, 492 [EXIT_REASON_IPI] = handle_ipi,
489 [EXIT_REASON_PTC_G] = handle_global_purge, 493 [EXIT_REASON_PTC_G] = handle_global_purge,
494 [EXIT_REASON_DEBUG] = handle_vcpu_debug,
490 495
491}; 496};
492 497
@@ -698,27 +703,24 @@ out:
698 return r; 703 return r;
699} 704}
700 705
701/*
702 * Allocate 16M memory for every vm to hold its specific data.
703 * Its memory map is defined in kvm_host.h.
704 */
705static struct kvm *kvm_alloc_kvm(void) 706static struct kvm *kvm_alloc_kvm(void)
706{ 707{
707 708
708 struct kvm *kvm; 709 struct kvm *kvm;
709 uint64_t vm_base; 710 uint64_t vm_base;
710 711
712 BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
713
711 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); 714 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
712 715
713 if (!vm_base) 716 if (!vm_base)
714 return ERR_PTR(-ENOMEM); 717 return ERR_PTR(-ENOMEM);
715 printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
716 718
717 /* Zero all pages before use! */
718 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); 719 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
719 720 kvm = (struct kvm *)(vm_base +
720 kvm = (struct kvm *)(vm_base + KVM_VM_OFS); 721 offsetof(struct kvm_vm_data, kvm_vm_struct));
721 kvm->arch.vm_base = vm_base; 722 kvm->arch.vm_base = vm_base;
723 printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
722 724
723 return kvm; 725 return kvm;
724} 726}
@@ -760,21 +762,12 @@ static void kvm_build_io_pmt(struct kvm *kvm)
760 762
761static void kvm_init_vm(struct kvm *kvm) 763static void kvm_init_vm(struct kvm *kvm)
762{ 764{
763 long vm_base;
764
765 BUG_ON(!kvm); 765 BUG_ON(!kvm);
766 766
767 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; 767 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
768 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; 768 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
769 kvm->arch.vmm_init_rr = VMM_INIT_RR; 769 kvm->arch.vmm_init_rr = VMM_INIT_RR;
770 770
771 vm_base = kvm->arch.vm_base;
772 if (vm_base) {
773 kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
774 kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
775 kvm->arch.vpd_base = vm_base + KVM_VPD_OFS;
776 }
777
778 /* 771 /*
779 *Fill P2M entries for MMIO/IO ranges 772 *Fill P2M entries for MMIO/IO ranges
780 */ 773 */
@@ -838,9 +831,8 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
838 831
839int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 832int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
840{ 833{
841 int i;
842 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 834 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
843 int r; 835 int i;
844 836
845 vcpu_load(vcpu); 837 vcpu_load(vcpu);
846 838
@@ -857,18 +849,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
857 849
858 vpd->vpr = regs->vpd.vpr; 850 vpd->vpr = regs->vpd.vpr;
859 851
860 r = -EFAULT; 852 memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
861 r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
862 sizeof(union context));
863 if (r)
864 goto out;
865 r = copy_from_user(vcpu + 1, regs->saved_stack +
866 sizeof(struct kvm_vcpu),
867 IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
868 if (r)
869 goto out;
870 vcpu->arch.exit_data =
871 ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
872 853
873 RESTORE_REGS(mp_state); 854 RESTORE_REGS(mp_state);
874 RESTORE_REGS(vmm_rr); 855 RESTORE_REGS(vmm_rr);
@@ -902,9 +883,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
902 set_bit(KVM_REQ_RESUME, &vcpu->requests); 883 set_bit(KVM_REQ_RESUME, &vcpu->requests);
903 884
904 vcpu_put(vcpu); 885 vcpu_put(vcpu);
905 r = 0; 886
906out: 887 return 0;
907 return r;
908} 888}
909 889
910long kvm_arch_vm_ioctl(struct file *filp, 890long kvm_arch_vm_ioctl(struct file *filp,
@@ -1166,10 +1146,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1166 /*Set entry address for first run.*/ 1146 /*Set entry address for first run.*/
1167 regs->cr_iip = PALE_RESET_ENTRY; 1147 regs->cr_iip = PALE_RESET_ENTRY;
1168 1148
1169 /*Initilize itc offset for vcpus*/ 1149 /*Initialize itc offset for vcpus*/
1170 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); 1150 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
1171 for (i = 0; i < MAX_VCPU_NUM; i++) { 1151 for (i = 0; i < KVM_MAX_VCPUS; i++) {
1172 v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); 1152 v = (struct kvm_vcpu *)((char *)vcpu +
1153 sizeof(struct kvm_vcpu_data) * i);
1173 v->arch.itc_offset = itc_offset; 1154 v->arch.itc_offset = itc_offset;
1174 v->arch.last_itc = 0; 1155 v->arch.last_itc = 0;
1175 } 1156 }
@@ -1183,7 +1164,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1183 vcpu->arch.apic->vcpu = vcpu; 1164 vcpu->arch.apic->vcpu = vcpu;
1184 1165
1185 p_ctx->gr[1] = 0; 1166 p_ctx->gr[1] = 0;
1186 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); 1167 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
1187 p_ctx->gr[13] = (unsigned long)vmm_vcpu; 1168 p_ctx->gr[13] = (unsigned long)vmm_vcpu;
1188 p_ctx->psr = 0x1008522000UL; 1169 p_ctx->psr = 0x1008522000UL;
1189 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ 1170 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
@@ -1218,12 +1199,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1218 vcpu->arch.hlt_timer.function = hlt_timer_fn; 1199 vcpu->arch.hlt_timer.function = hlt_timer_fn;
1219 1200
1220 vcpu->arch.last_run_cpu = -1; 1201 vcpu->arch.last_run_cpu = -1;
1221 vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); 1202 vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
1222 vcpu->arch.vsa_base = kvm_vsa_base; 1203 vcpu->arch.vsa_base = kvm_vsa_base;
1223 vcpu->arch.__gp = kvm_vmm_gp; 1204 vcpu->arch.__gp = kvm_vmm_gp;
1224 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); 1205 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
1225 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); 1206 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
1226 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); 1207 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
1227 init_ptce_info(vcpu); 1208 init_ptce_info(vcpu);
1228 1209
1229 r = 0; 1210 r = 0;
@@ -1273,12 +1254,22 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1273 int r; 1254 int r;
1274 int cpu; 1255 int cpu;
1275 1256
1257 BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
1258
1259 r = -EINVAL;
1260 if (id >= KVM_MAX_VCPUS) {
1261 printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
1262 KVM_MAX_VCPUS);
1263 goto fail;
1264 }
1265
1276 r = -ENOMEM; 1266 r = -ENOMEM;
1277 if (!vm_base) { 1267 if (!vm_base) {
1278 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); 1268 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
1279 goto fail; 1269 goto fail;
1280 } 1270 }
1281 vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); 1271 vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
1272 vcpu_data[id].vcpu_struct));
1282 vcpu->kvm = kvm; 1273 vcpu->kvm = kvm;
1283 1274
1284 cpu = get_cpu(); 1275 cpu = get_cpu();
@@ -1374,9 +1365,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1374 1365
1375int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1366int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1376{ 1367{
1377 int i;
1378 int r;
1379 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1368 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1369 int i;
1370
1380 vcpu_load(vcpu); 1371 vcpu_load(vcpu);
1381 1372
1382 for (i = 0; i < 16; i++) { 1373 for (i = 0; i < 16; i++) {
@@ -1391,14 +1382,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1391 regs->vpd.vpsr = vpd->vpsr; 1382 regs->vpd.vpsr = vpd->vpsr;
1392 regs->vpd.vpr = vpd->vpr; 1383 regs->vpd.vpr = vpd->vpr;
1393 1384
1394 r = -EFAULT; 1385 memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
1395 r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, 1386
1396 sizeof(union context));
1397 if (r)
1398 goto out;
1399 r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
1400 if (r)
1401 goto out;
1402 SAVE_REGS(mp_state); 1387 SAVE_REGS(mp_state);
1403 SAVE_REGS(vmm_rr); 1388 SAVE_REGS(vmm_rr);
1404 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); 1389 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
@@ -1426,10 +1411,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1426 SAVE_REGS(metaphysical_saved_rr4); 1411 SAVE_REGS(metaphysical_saved_rr4);
1427 SAVE_REGS(fp_psr); 1412 SAVE_REGS(fp_psr);
1428 SAVE_REGS(saved_gp); 1413 SAVE_REGS(saved_gp);
1414
1429 vcpu_put(vcpu); 1415 vcpu_put(vcpu);
1430 r = 0; 1416 return 0;
1431out:
1432 return r;
1433} 1417}
1434 1418
1435void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 1419void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
@@ -1457,6 +1441,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
1457 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; 1441 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
1458 unsigned long base_gfn = memslot->base_gfn; 1442 unsigned long base_gfn = memslot->base_gfn;
1459 1443
1444 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
1445 return -ENOMEM;
1446
1460 for (i = 0; i < npages; i++) { 1447 for (i = 0; i < npages; i++) {
1461 pfn = gfn_to_pfn(kvm, base_gfn + i); 1448 pfn = gfn_to_pfn(kvm, base_gfn + i);
1462 if (!kvm_is_mmio_pfn(pfn)) { 1449 if (!kvm_is_mmio_pfn(pfn)) {
@@ -1631,8 +1618,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1631 struct kvm_memory_slot *memslot; 1618 struct kvm_memory_slot *memslot;
1632 int r, i; 1619 int r, i;
1633 long n, base; 1620 long n, base;
1634 unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS 1621 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
1635 + KVM_MEM_DIRTY_LOG_OFS); 1622 offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
1636 1623
1637 r = -EINVAL; 1624 r = -EINVAL;
1638 if (log->slot >= KVM_MEMORY_SLOTS) 1625 if (log->slot >= KVM_MEMORY_SLOTS)
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
new file mode 100644
index 000000000000..a85cb611ecd7
--- /dev/null
+++ b/arch/ia64/kvm/kvm_lib.c
@@ -0,0 +1,15 @@
1/*
2 * kvm_lib.c: Compile some libraries for kvm-intel module.
3 *
4 * Just include kernel's library, and disable symbols export.
5 * Copyright (C) 2008, Intel Corporation.
6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 */
13#undef CONFIG_MODULES
14#include "../../../lib/vsprintf.c"
15#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
index 2cc41d17cf99..b2bcaa2787aa 100644
--- a/arch/ia64/kvm/kvm_minstate.h
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -24,6 +24,8 @@
24#include <asm/asmmacro.h> 24#include <asm/asmmacro.h>
25#include <asm/types.h> 25#include <asm/types.h>
26#include <asm/kregs.h> 26#include <asm/kregs.h>
27#include <asm/kvm_host.h>
28
27#include "asm-offsets.h" 29#include "asm-offsets.h"
28 30
29#define KVM_MINSTATE_START_SAVE_MIN \ 31#define KVM_MINSTATE_START_SAVE_MIN \
@@ -33,7 +35,7 @@
33 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ 35 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \
34 ;; \ 36 ;; \
35 lfetch.fault.excl.nt1 [r22]; \ 37 lfetch.fault.excl.nt1 [r22]; \
36 addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ 38 addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \
37 mov r23 = ar.bspstore; /* save ar.bspstore */ \ 39 mov r23 = ar.bspstore; /* save ar.bspstore */ \
38 ;; \ 40 ;; \
39 mov ar.bspstore = r22; /* switch to kernel RBS */\ 41 mov ar.bspstore = r22; /* switch to kernel RBS */\
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
index e585c4607344..dd979e00b574 100644
--- a/arch/ia64/kvm/misc.h
+++ b/arch/ia64/kvm/misc.h
@@ -27,7 +27,8 @@
27 */ 27 */
28static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) 28static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
29{ 29{
30 return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); 30 return (uint64_t *)(kvm->arch.vm_base +
31 offsetof(struct kvm_vm_data, kvm_p2m));
31} 32}
32 33
33static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, 34static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 7f1a858bc69f..21f63fffc379 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -66,31 +66,25 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
66 66
67 switch (addr) { 67 switch (addr) {
68 case PIB_OFST_INTA: 68 case PIB_OFST_INTA:
69 /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ 69 panic_vm(v, "Undefined write on PIB INTA\n");
70 panic_vm(v);
71 break; 70 break;
72 case PIB_OFST_XTP: 71 case PIB_OFST_XTP:
73 if (length == 1) { 72 if (length == 1) {
74 vlsapic_write_xtp(v, val); 73 vlsapic_write_xtp(v, val);
75 } else { 74 } else {
76 /*panic_domain(NULL, 75 panic_vm(v, "Undefined write on PIB XTP\n");
77 "Undefined write on PIB XTP\n");*/
78 panic_vm(v);
79 } 76 }
80 break; 77 break;
81 default: 78 default:
82 if (PIB_LOW_HALF(addr)) { 79 if (PIB_LOW_HALF(addr)) {
83 /*lower half */ 80 /*Lower half */
84 if (length != 8) 81 if (length != 8)
85 /*panic_domain(NULL, 82 panic_vm(v, "Can't LHF write with size %ld!\n",
86 "Can't LHF write with size %ld!\n", 83 length);
87 length);*/
88 panic_vm(v);
89 else 84 else
90 vlsapic_write_ipi(v, addr, val); 85 vlsapic_write_ipi(v, addr, val);
91 } else { /* upper half 86 } else { /*Upper half */
92 printk("IPI-UHF write %lx\n",addr);*/ 87 panic_vm(v, "IPI-UHF write %lx\n", addr);
93 panic_vm(v);
94 } 88 }
95 break; 89 break;
96 } 90 }
@@ -108,22 +102,18 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
108 if (length == 1) /* 1 byte load */ 102 if (length == 1) /* 1 byte load */
109 ; /* There is no i8259, there is no INTA access*/ 103 ; /* There is no i8259, there is no INTA access*/
110 else 104 else
111 /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ 105 panic_vm(v, "Undefined read on PIB INTA\n");
112 panic_vm(v);
113 106
114 break; 107 break;
115 case PIB_OFST_XTP: 108 case PIB_OFST_XTP:
116 if (length == 1) { 109 if (length == 1) {
117 result = VLSAPIC_XTP(v); 110 result = VLSAPIC_XTP(v);
118 /* printk("read xtp %lx\n", result); */
119 } else { 111 } else {
120 /*panic_domain(NULL, 112 panic_vm(v, "Undefined read on PIB XTP\n");
121 "Undefined read on PIB XTP\n");*/
122 panic_vm(v);
123 } 113 }
124 break; 114 break;
125 default: 115 default:
126 panic_vm(v); 116 panic_vm(v, "Undefined addr access for lsapic!\n");
127 break; 117 break;
128 } 118 }
129 return result; 119 return result;
@@ -162,7 +152,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
162 /* it's necessary to ensure zero extending */ 152 /* it's necessary to ensure zero extending */
163 *dest = p->u.ioreq.data & (~0UL >> (64-(s*8))); 153 *dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
164 } else 154 } else
165 panic_vm(vcpu); 155 panic_vm(vcpu, "Unhandled mmio access returned!\n");
166out: 156out:
167 local_irq_restore(psr); 157 local_irq_restore(psr);
168 return ; 158 return ;
@@ -324,7 +314,9 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
324 return; 314 return;
325 } else { 315 } else {
326 inst_type = -1; 316 inst_type = -1;
327 panic_vm(vcpu); 317 panic_vm(vcpu, "Unsupported MMIO access instruction! \
318 Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
319 bundle.i64[0], bundle.i64[1]);
328 } 320 }
329 321
330 size = 1 << size; 322 size = 1 << size;
@@ -335,7 +327,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
335 if (inst_type == SL_INTEGER) 327 if (inst_type == SL_INTEGER)
336 vcpu_set_gr(vcpu, inst.M1.r1, data, 0); 328 vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
337 else 329 else
338 panic_vm(vcpu); 330 panic_vm(vcpu, "Unsupported instruction type!\n");
339 331
340 } 332 }
341 vcpu_increment_iip(vcpu); 333 vcpu_increment_iip(vcpu);
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 800817307b7b..552d07724207 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -527,7 +527,8 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
527 vector = vec2off[vec]; 527 vector = vec2off[vec];
528 528
529 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { 529 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
530 panic_vm(vcpu); 530 panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
531 "with psr.ic = 0\n", vector);
531 return; 532 return;
532 } 533 }
533 534
@@ -586,7 +587,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu)
586 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); 587 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
587 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); 588 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
588 } else 589 } else
589 panic_vm(vcpu); 590 panic_vm(vcpu, "Mis-set for exit reason!\n");
590} 591}
591 592
592static void set_sal_call_data(struct kvm_vcpu *vcpu) 593static void set_sal_call_data(struct kvm_vcpu *vcpu)
@@ -614,7 +615,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu)
614 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); 615 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
615 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); 616 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
616 } else 617 } else
617 panic_vm(vcpu); 618 panic_vm(vcpu, "Mis-set for exit reason!\n");
618} 619}
619 620
620void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, 621void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
@@ -680,7 +681,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu)
680 vpsr = VCPU(vcpu, vpsr); 681 vpsr = VCPU(vcpu, vpsr);
681 isr = vpsr & IA64_PSR_RI; 682 isr = vpsr & IA64_PSR_RI;
682 if (!(vpsr & IA64_PSR_IC)) 683 if (!(vpsr & IA64_PSR_IC))
683 panic_vm(vcpu); 684 panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
684 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ 685 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
685} 686}
686 687
@@ -941,8 +942,20 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu)
941 ia64_set_pta(vcpu->arch.vhpt.pta.val); 942 ia64_set_pta(vcpu->arch.vhpt.pta.val);
942} 943}
943 944
945static void vmm_sanity_check(struct kvm_vcpu *vcpu)
946{
947 struct exit_ctl_data *p = &vcpu->arch.exit_data;
948
949 if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
950 panic_vm(vcpu, "Failed to do vmm sanity check,"
951 "it maybe caused by crashed vmm!!\n\n");
952 }
953}
954
944static void kvm_do_resume_op(struct kvm_vcpu *vcpu) 955static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
945{ 956{
957 vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/
958
946 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { 959 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
947 vcpu_do_resume(vcpu); 960 vcpu_do_resume(vcpu);
948 return; 961 return;
@@ -968,3 +981,11 @@ void vmm_transition(struct kvm_vcpu *vcpu)
968 1, 0, 0, 0, 0, 0); 981 1, 0, 0, 0, 0, 0);
969 kvm_do_resume_op(vcpu); 982 kvm_do_resume_op(vcpu);
970} 983}
984
985void vmm_panic_handler(u64 vec)
986{
987 struct kvm_vcpu *vcpu = current_vcpu;
988 vmm_sanity = 0;
989 panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
990 vec2off[vec]);
991}
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index e44027ce5667..ecd526b55323 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -816,8 +816,9 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
816 unsigned long vitv = VCPU(vcpu, itv); 816 unsigned long vitv = VCPU(vcpu, itv);
817 817
818 if (vcpu->vcpu_id == 0) { 818 if (vcpu->vcpu_id == 0) {
819 for (i = 0; i < MAX_VCPU_NUM; i++) { 819 for (i = 0; i < KVM_MAX_VCPUS; i++) {
820 v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); 820 v = (struct kvm_vcpu *)((char *)vcpu +
821 sizeof(struct kvm_vcpu_data) * i);
821 VMX(v, itc_offset) = itc_offset; 822 VMX(v, itc_offset) = itc_offset;
822 VMX(v, last_itc) = 0; 823 VMX(v, last_itc) = 0;
823 } 824 }
@@ -1650,7 +1651,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
1650 * Otherwise panic 1651 * Otherwise panic
1651 */ 1652 */
1652 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) 1653 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
1653 panic_vm(vcpu); 1654 panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
1655 & vpsr.is=0\n");
1654 1656
1655 /* 1657 /*
1656 * For those IA64_PSR bits: id/da/dd/ss/ed/ia 1658 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
@@ -2103,7 +2105,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu)
2103 2105
2104 if (is_physical_mode(vcpu)) { 2106 if (is_physical_mode(vcpu)) {
2105 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) 2107 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
2106 panic_vm(vcpu); 2108 panic_vm(vcpu, "Machine Status conflicts!\n");
2107 2109
2108 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); 2110 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
2109 ia64_dv_serialize_data(); 2111 ia64_dv_serialize_data();
@@ -2152,10 +2154,70 @@ int vmm_entry(void)
2152 return 0; 2154 return 0;
2153} 2155}
2154 2156
2155void panic_vm(struct kvm_vcpu *v) 2157static void kvm_show_registers(struct kvm_pt_regs *regs)
2156{ 2158{
2159 unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
2160
2161 struct kvm_vcpu *vcpu = current_vcpu;
2162 if (vcpu != NULL)
2163 printk("vcpu 0x%p vcpu %d\n",
2164 vcpu, vcpu->vcpu_id);
2165
2166 printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n",
2167 regs->cr_ipsr, regs->cr_ifs, ip);
2168
2169 printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
2170 regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
2171 printk("rnat: %016lx bspstore: %016lx pr : %016lx\n",
2172 regs->ar_rnat, regs->ar_bspstore, regs->pr);
2173 printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
2174 regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
2175 printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
2176 printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0,
2177 regs->b6, regs->b7);
2178 printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
2179 regs->f6.u.bits[1], regs->f6.u.bits[0],
2180 regs->f7.u.bits[1], regs->f7.u.bits[0]);
2181 printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
2182 regs->f8.u.bits[1], regs->f8.u.bits[0],
2183 regs->f9.u.bits[1], regs->f9.u.bits[0]);
2184 printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
2185 regs->f10.u.bits[1], regs->f10.u.bits[0],
2186 regs->f11.u.bits[1], regs->f11.u.bits[0]);
2187
2188 printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1,
2189 regs->r2, regs->r3);
2190 printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8,
2191 regs->r9, regs->r10);
2192 printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
2193 regs->r12, regs->r13);
2194 printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
2195 regs->r15, regs->r16);
2196 printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
2197 regs->r18, regs->r19);
2198 printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
2199 regs->r21, regs->r22);
2200 printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
2201 regs->r24, regs->r25);
2202 printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
2203 regs->r27, regs->r28);
2204 printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
2205 regs->r30, regs->r31);
2206
2207}
2208
2209void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
2210{
2211 va_list args;
2212 char buf[256];
2213
2214 struct kvm_pt_regs *regs = vcpu_regs(v);
2157 struct exit_ctl_data *p = &v->arch.exit_data; 2215 struct exit_ctl_data *p = &v->arch.exit_data;
2158 2216 va_start(args, fmt);
2217 vsnprintf(buf, sizeof(buf), fmt, args);
2218 va_end(args);
2219 printk(buf);
2220 kvm_show_registers(regs);
2159 p->exit_reason = EXIT_REASON_VM_PANIC; 2221 p->exit_reason = EXIT_REASON_VM_PANIC;
2160 vmm_transition(v); 2222 vmm_transition(v);
2161 /*Never to return*/ 2223 /*Never to return*/
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index e9b2a4e121c0..b2f12a562bdf 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -737,9 +737,12 @@ void kvm_init_vtlb(struct kvm_vcpu *v);
737void kvm_init_vhpt(struct kvm_vcpu *v); 737void kvm_init_vhpt(struct kvm_vcpu *v);
738void thash_init(struct thash_cb *hcb, u64 sz); 738void thash_init(struct thash_cb *hcb, u64 sz);
739 739
740void panic_vm(struct kvm_vcpu *v); 740void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
741 741
742extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, 742extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
743 u64 arg4, u64 arg5, u64 arg6, u64 arg7); 743 u64 arg4, u64 arg5, u64 arg6, u64 arg7);
744
745extern long vmm_sanity;
746
744#endif 747#endif
745#endif /* __VCPU_H__ */ 748#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 2275bf4e681a..9eee5c04bacc 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -20,6 +20,7 @@
20 */ 20 */
21 21
22 22
23#include<linux/kernel.h>
23#include<linux/module.h> 24#include<linux/module.h>
24#include<asm/fpswa.h> 25#include<asm/fpswa.h>
25 26
@@ -31,6 +32,8 @@ MODULE_LICENSE("GPL");
31extern char kvm_ia64_ivt; 32extern char kvm_ia64_ivt;
32extern fpswa_interface_t *vmm_fpswa_interface; 33extern fpswa_interface_t *vmm_fpswa_interface;
33 34
35long vmm_sanity = 1;
36
34struct kvm_vmm_info vmm_info = { 37struct kvm_vmm_info vmm_info = {
35 .module = THIS_MODULE, 38 .module = THIS_MODULE,
36 .vmm_entry = vmm_entry, 39 .vmm_entry = vmm_entry,
@@ -62,5 +65,31 @@ void vmm_spin_unlock(spinlock_t *lock)
62{ 65{
63 _vmm_raw_spin_unlock(lock); 66 _vmm_raw_spin_unlock(lock);
64} 67}
68
69static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
70{
71 struct exit_ctl_data *p = &vcpu->arch.exit_data;
72 long psr;
73
74 local_irq_save(psr);
75 p->exit_reason = EXIT_REASON_DEBUG;
76 vmm_transition(vcpu);
77 local_irq_restore(psr);
78}
79
80asmlinkage int printk(const char *fmt, ...)
81{
82 struct kvm_vcpu *vcpu = current_vcpu;
83 va_list args;
84 int r;
85
86 memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
87 va_start(args, fmt);
88 r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
89 va_end(args);
90 vcpu_debug_exit(vcpu);
91 return r;
92}
93
65module_init(kvm_vmm_init) 94module_init(kvm_vmm_init)
66module_exit(kvm_vmm_exit) 95module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index c1d7251a1480..3ef1a017a318 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -1,5 +1,5 @@
1/* 1/*
2 * /ia64/kvm_ivt.S 2 * arch/ia64/kvm/vmm_ivt.S
3 * 3 *
4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co 4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com> 5 * Stephane Eranian <eranian@hpl.hp.com>
@@ -70,32 +70,39 @@
70# define PSR_DEFAULT_BITS 0 70# define PSR_DEFAULT_BITS 0
71#endif 71#endif
72 72
73
74#define KVM_FAULT(n) \ 73#define KVM_FAULT(n) \
75 kvm_fault_##n:; \ 74 kvm_fault_##n:; \
76 mov r19=n;; \ 75 mov r19=n;; \
77 br.sptk.many kvm_fault_##n; \ 76 br.sptk.many kvm_vmm_panic; \
78 ;; \ 77 ;; \
79
80 78
81#define KVM_REFLECT(n) \ 79#define KVM_REFLECT(n) \
82 mov r31=pr; \ 80 mov r31=pr; \
83 mov r19=n; /* prepare to save predicates */ \ 81 mov r19=n; /* prepare to save predicates */ \
84 mov r29=cr.ipsr; \ 82 mov r29=cr.ipsr; \
85 ;; \ 83 ;; \
86 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ 84 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
87(p7)br.sptk.many kvm_dispatch_reflection; \ 85(p7) br.sptk.many kvm_dispatch_reflection; \
88 br.sptk.many kvm_panic; \ 86 br.sptk.many kvm_vmm_panic; \
89 87
90 88GLOBAL_ENTRY(kvm_vmm_panic)
91GLOBAL_ENTRY(kvm_panic) 89 KVM_SAVE_MIN_WITH_COVER_R19
92 br.sptk.many kvm_panic 90 alloc r14=ar.pfs,0,0,1,0
93 ;; 91 mov out0=r15
94END(kvm_panic) 92 adds r3=8,r2 // set up second base pointer
95 93 ;;
96 94 ssm psr.ic
97 95 ;;
98 96 srlz.i // guarantee that interruption collection is on
97 ;;
98 //(p15) ssm psr.i // restore psr.i
99 addl r14=@gprel(ia64_leave_hypervisor),gp
100 ;;
101 KVM_SAVE_REST
102 mov rp=r14
103 ;;
104 br.call.sptk.many b6=vmm_panic_handler;
105END(kvm_vmm_panic)
99 106
100 .section .text.ivt,"ax" 107 .section .text.ivt,"ax"
101 108
@@ -105,308 +112,307 @@ kvm_ia64_ivt:
105/////////////////////////////////////////////////////////////// 112///////////////////////////////////////////////////////////////
106// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) 113// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
107ENTRY(kvm_vhpt_miss) 114ENTRY(kvm_vhpt_miss)
108 KVM_FAULT(0) 115 KVM_FAULT(0)
109END(kvm_vhpt_miss) 116END(kvm_vhpt_miss)
110 117
111
112 .org kvm_ia64_ivt+0x400 118 .org kvm_ia64_ivt+0x400
113//////////////////////////////////////////////////////////////// 119////////////////////////////////////////////////////////////////
114// 0x0400 Entry 1 (size 64 bundles) ITLB (21) 120// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
115ENTRY(kvm_itlb_miss) 121ENTRY(kvm_itlb_miss)
116 mov r31 = pr 122 mov r31 = pr
117 mov r29=cr.ipsr; 123 mov r29=cr.ipsr;
118 ;; 124 ;;
119 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; 125 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
120 (p6) br.sptk kvm_alt_itlb_miss 126(p6) br.sptk kvm_alt_itlb_miss
121 mov r19 = 1 127 mov r19 = 1
122 br.sptk kvm_itlb_miss_dispatch 128 br.sptk kvm_itlb_miss_dispatch
123 KVM_FAULT(1); 129 KVM_FAULT(1);
124END(kvm_itlb_miss) 130END(kvm_itlb_miss)
125 131
126 .org kvm_ia64_ivt+0x0800 132 .org kvm_ia64_ivt+0x0800
127////////////////////////////////////////////////////////////////// 133//////////////////////////////////////////////////////////////////
128// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) 134// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
129ENTRY(kvm_dtlb_miss) 135ENTRY(kvm_dtlb_miss)
130 mov r31 = pr 136 mov r31 = pr
131 mov r29=cr.ipsr; 137 mov r29=cr.ipsr;
132 ;; 138 ;;
133 tbit.z p6,p7=r29,IA64_PSR_VM_BIT; 139 tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
134(p6)br.sptk kvm_alt_dtlb_miss 140(p6) br.sptk kvm_alt_dtlb_miss
135 br.sptk kvm_dtlb_miss_dispatch 141 br.sptk kvm_dtlb_miss_dispatch
136END(kvm_dtlb_miss) 142END(kvm_dtlb_miss)
137 143
138 .org kvm_ia64_ivt+0x0c00 144 .org kvm_ia64_ivt+0x0c00
139//////////////////////////////////////////////////////////////////// 145////////////////////////////////////////////////////////////////////
140// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) 146// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
141ENTRY(kvm_alt_itlb_miss) 147ENTRY(kvm_alt_itlb_miss)
142 mov r16=cr.ifa // get address that caused the TLB miss 148 mov r16=cr.ifa // get address that caused the TLB miss
143 ;; 149 ;;
144 movl r17=PAGE_KERNEL 150 movl r17=PAGE_KERNEL
145 mov r24=cr.ipsr 151 mov r24=cr.ipsr
146 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 152 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
147 ;; 153 ;;
148 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits 154 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
149 ;; 155 ;;
150 or r19=r17,r19 // insert PTE control bits into r19 156 or r19=r17,r19 // insert PTE control bits into r19
151 ;; 157 ;;
152 movl r20=IA64_GRANULE_SHIFT<<2 158 movl r20=IA64_GRANULE_SHIFT<<2
153 ;; 159 ;;
154 mov cr.itir=r20 160 mov cr.itir=r20
155 ;; 161 ;;
156 itc.i r19 // insert the TLB entry 162 itc.i r19 // insert the TLB entry
157 mov pr=r31,-1 163 mov pr=r31,-1
158 rfi 164 rfi
159END(kvm_alt_itlb_miss) 165END(kvm_alt_itlb_miss)
160 166
161 .org kvm_ia64_ivt+0x1000 167 .org kvm_ia64_ivt+0x1000
162///////////////////////////////////////////////////////////////////// 168/////////////////////////////////////////////////////////////////////
163// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) 169// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
164ENTRY(kvm_alt_dtlb_miss) 170ENTRY(kvm_alt_dtlb_miss)
165 mov r16=cr.ifa // get address that caused the TLB miss 171 mov r16=cr.ifa // get address that caused the TLB miss
166 ;; 172 ;;
167 movl r17=PAGE_KERNEL 173 movl r17=PAGE_KERNEL
168 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 174 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
169 mov r24=cr.ipsr 175 mov r24=cr.ipsr
170 ;; 176 ;;
171 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits 177 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
172 ;; 178 ;;
173 or r19=r19,r17 // insert PTE control bits into r19 179 or r19=r19,r17 // insert PTE control bits into r19
174 ;; 180 ;;
175 movl r20=IA64_GRANULE_SHIFT<<2 181 movl r20=IA64_GRANULE_SHIFT<<2
176 ;; 182 ;;
177 mov cr.itir=r20 183 mov cr.itir=r20
178 ;; 184 ;;
179 itc.d r19 // insert the TLB entry 185 itc.d r19 // insert the TLB entry
180 mov pr=r31,-1 186 mov pr=r31,-1
181 rfi 187 rfi
182END(kvm_alt_dtlb_miss) 188END(kvm_alt_dtlb_miss)
183 189
184 .org kvm_ia64_ivt+0x1400 190 .org kvm_ia64_ivt+0x1400
185////////////////////////////////////////////////////////////////////// 191//////////////////////////////////////////////////////////////////////
186// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) 192// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
187ENTRY(kvm_nested_dtlb_miss) 193ENTRY(kvm_nested_dtlb_miss)
188 KVM_FAULT(5) 194 KVM_FAULT(5)
189END(kvm_nested_dtlb_miss) 195END(kvm_nested_dtlb_miss)
190 196
191 .org kvm_ia64_ivt+0x1800 197 .org kvm_ia64_ivt+0x1800
192///////////////////////////////////////////////////////////////////// 198/////////////////////////////////////////////////////////////////////
193// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) 199// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
194ENTRY(kvm_ikey_miss) 200ENTRY(kvm_ikey_miss)
195 KVM_REFLECT(6) 201 KVM_REFLECT(6)
196END(kvm_ikey_miss) 202END(kvm_ikey_miss)
197 203
198 .org kvm_ia64_ivt+0x1c00 204 .org kvm_ia64_ivt+0x1c00
199///////////////////////////////////////////////////////////////////// 205/////////////////////////////////////////////////////////////////////
200// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) 206// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
201ENTRY(kvm_dkey_miss) 207ENTRY(kvm_dkey_miss)
202 KVM_REFLECT(7) 208 KVM_REFLECT(7)
203END(kvm_dkey_miss) 209END(kvm_dkey_miss)
204 210
205 .org kvm_ia64_ivt+0x2000 211 .org kvm_ia64_ivt+0x2000
206//////////////////////////////////////////////////////////////////// 212////////////////////////////////////////////////////////////////////
207// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) 213// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
208ENTRY(kvm_dirty_bit) 214ENTRY(kvm_dirty_bit)
209 KVM_REFLECT(8) 215 KVM_REFLECT(8)
210END(kvm_dirty_bit) 216END(kvm_dirty_bit)
211 217
212 .org kvm_ia64_ivt+0x2400 218 .org kvm_ia64_ivt+0x2400
213//////////////////////////////////////////////////////////////////// 219////////////////////////////////////////////////////////////////////
214// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) 220// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
215ENTRY(kvm_iaccess_bit) 221ENTRY(kvm_iaccess_bit)
216 KVM_REFLECT(9) 222 KVM_REFLECT(9)
217END(kvm_iaccess_bit) 223END(kvm_iaccess_bit)
218 224
219 .org kvm_ia64_ivt+0x2800 225 .org kvm_ia64_ivt+0x2800
220/////////////////////////////////////////////////////////////////// 226///////////////////////////////////////////////////////////////////
221// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) 227// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
222ENTRY(kvm_daccess_bit) 228ENTRY(kvm_daccess_bit)
223 KVM_REFLECT(10) 229 KVM_REFLECT(10)
224END(kvm_daccess_bit) 230END(kvm_daccess_bit)
225 231
226 .org kvm_ia64_ivt+0x2c00 232 .org kvm_ia64_ivt+0x2c00
227///////////////////////////////////////////////////////////////// 233/////////////////////////////////////////////////////////////////
228// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) 234// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
229ENTRY(kvm_break_fault) 235ENTRY(kvm_break_fault)
230 mov r31=pr 236 mov r31=pr
231 mov r19=11 237 mov r19=11
232 mov r29=cr.ipsr 238 mov r29=cr.ipsr
233 ;; 239 ;;
234 KVM_SAVE_MIN_WITH_COVER_R19 240 KVM_SAVE_MIN_WITH_COVER_R19
235 ;; 241 ;;
236 alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) 242 alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
237 mov out0=cr.ifa 243 mov out0=cr.ifa
238 mov out2=cr.isr // FIXME: pity to make this slow access twice 244 mov out2=cr.isr // FIXME: pity to make this slow access twice
239 mov out3=cr.iim // FIXME: pity to make this slow access twice 245 mov out3=cr.iim // FIXME: pity to make this slow access twice
240 adds r3=8,r2 // set up second base pointer 246 adds r3=8,r2 // set up second base pointer
241 ;; 247 ;;
242 ssm psr.ic 248 ssm psr.ic
243 ;; 249 ;;
244 srlz.i // guarantee that interruption collection is on 250 srlz.i // guarantee that interruption collection is on
245 ;; 251 ;;
246 //(p15)ssm psr.i // restore psr.i 252 //(p15)ssm psr.i // restore psr.i
247 addl r14=@gprel(ia64_leave_hypervisor),gp 253 addl r14=@gprel(ia64_leave_hypervisor),gp
248 ;; 254 ;;
249 KVM_SAVE_REST 255 KVM_SAVE_REST
250 mov rp=r14 256 mov rp=r14
251 ;; 257 ;;
252 adds out1=16,sp 258 adds out1=16,sp
253 br.call.sptk.many b6=kvm_ia64_handle_break 259 br.call.sptk.many b6=kvm_ia64_handle_break
254 ;; 260 ;;
255END(kvm_break_fault) 261END(kvm_break_fault)
256 262
257 .org kvm_ia64_ivt+0x3000 263 .org kvm_ia64_ivt+0x3000
258///////////////////////////////////////////////////////////////// 264/////////////////////////////////////////////////////////////////
259// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) 265// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
260ENTRY(kvm_interrupt) 266ENTRY(kvm_interrupt)
261 mov r31=pr // prepare to save predicates 267 mov r31=pr // prepare to save predicates
262 mov r19=12 268 mov r19=12
263 mov r29=cr.ipsr 269 mov r29=cr.ipsr
264 ;; 270 ;;
265 tbit.z p6,p7=r29,IA64_PSR_VM_BIT 271 tbit.z p6,p7=r29,IA64_PSR_VM_BIT
266 tbit.z p0,p15=r29,IA64_PSR_I_BIT 272 tbit.z p0,p15=r29,IA64_PSR_I_BIT
267 ;; 273 ;;
268(p7) br.sptk kvm_dispatch_interrupt 274(p7) br.sptk kvm_dispatch_interrupt
269 ;; 275 ;;
270 mov r27=ar.rsc /* M */ 276 mov r27=ar.rsc /* M */
271 mov r20=r1 /* A */ 277 mov r20=r1 /* A */
272 mov r25=ar.unat /* M */ 278 mov r25=ar.unat /* M */
273 mov r26=ar.pfs /* I */ 279 mov r26=ar.pfs /* I */
274 mov r28=cr.iip /* M */ 280 mov r28=cr.iip /* M */
275 cover /* B (or nothing) */ 281 cover /* B (or nothing) */
276 ;; 282 ;;
277 mov r1=sp 283 mov r1=sp
278 ;; 284 ;;
279 invala /* M */ 285 invala /* M */
280 mov r30=cr.ifs 286 mov r30=cr.ifs
281 ;; 287 ;;
282 addl r1=-VMM_PT_REGS_SIZE,r1 288 addl r1=-VMM_PT_REGS_SIZE,r1
283 ;; 289 ;;
284 adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ 290 adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
285 adds r16=PT(CR_IPSR),r1 291 adds r16=PT(CR_IPSR),r1
286 ;; 292 ;;
287 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES 293 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
288 st8 [r16]=r29 /* save cr.ipsr */ 294 st8 [r16]=r29 /* save cr.ipsr */
289 ;; 295 ;;
290 lfetch.fault.excl.nt1 [r17] 296 lfetch.fault.excl.nt1 [r17]
291 mov r29=b0 297 mov r29=b0
292 ;; 298 ;;
293 adds r16=PT(R8),r1 /* initialize first base pointer */ 299 adds r16=PT(R8),r1 /* initialize first base pointer */
294 adds r17=PT(R9),r1 /* initialize second base pointer */ 300 adds r17=PT(R9),r1 /* initialize second base pointer */
295 mov r18=r0 /* make sure r18 isn't NaT */ 301 mov r18=r0 /* make sure r18 isn't NaT */
296 ;; 302 ;;
297.mem.offset 0,0; st8.spill [r16]=r8,16 303.mem.offset 0,0; st8.spill [r16]=r8,16
298.mem.offset 8,0; st8.spill [r17]=r9,16 304.mem.offset 8,0; st8.spill [r17]=r9,16
299 ;; 305 ;;
300.mem.offset 0,0; st8.spill [r16]=r10,24 306.mem.offset 0,0; st8.spill [r16]=r10,24
301.mem.offset 8,0; st8.spill [r17]=r11,24 307.mem.offset 8,0; st8.spill [r17]=r11,24
302 ;; 308 ;;
303 st8 [r16]=r28,16 /* save cr.iip */ 309 st8 [r16]=r28,16 /* save cr.iip */
304 st8 [r17]=r30,16 /* save cr.ifs */ 310 st8 [r17]=r30,16 /* save cr.ifs */
305 mov r8=ar.fpsr /* M */ 311 mov r8=ar.fpsr /* M */
306 mov r9=ar.csd 312 mov r9=ar.csd
307 mov r10=ar.ssd 313 mov r10=ar.ssd
308 movl r11=FPSR_DEFAULT /* L-unit */ 314 movl r11=FPSR_DEFAULT /* L-unit */
309 ;; 315 ;;
310 st8 [r16]=r25,16 /* save ar.unat */ 316 st8 [r16]=r25,16 /* save ar.unat */
311 st8 [r17]=r26,16 /* save ar.pfs */ 317 st8 [r17]=r26,16 /* save ar.pfs */
312 shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ 318 shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
313 ;; 319 ;;
314 st8 [r16]=r27,16 /* save ar.rsc */ 320 st8 [r16]=r27,16 /* save ar.rsc */
315 adds r17=16,r17 /* skip over ar_rnat field */ 321 adds r17=16,r17 /* skip over ar_rnat field */
316 ;; 322 ;;
317 st8 [r17]=r31,16 /* save predicates */ 323 st8 [r17]=r31,16 /* save predicates */
318 adds r16=16,r16 /* skip over ar_bspstore field */ 324 adds r16=16,r16 /* skip over ar_bspstore field */
319 ;; 325 ;;
320 st8 [r16]=r29,16 /* save b0 */ 326 st8 [r16]=r29,16 /* save b0 */
321 st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ 327 st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
322 ;; 328 ;;
323.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ 329.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
324.mem.offset 8,0; st8.spill [r17]=r12,16 330.mem.offset 8,0; st8.spill [r17]=r12,16
325 adds r12=-16,r1 331 adds r12=-16,r1
326 /* switch to kernel memory stack (with 16 bytes of scratch) */ 332 /* switch to kernel memory stack (with 16 bytes of scratch) */
327 ;; 333 ;;
328.mem.offset 0,0; st8.spill [r16]=r13,16 334.mem.offset 0,0; st8.spill [r16]=r13,16
329.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ 335.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
330 ;; 336 ;;
331.mem.offset 0,0; st8.spill [r16]=r15,16 337.mem.offset 0,0; st8.spill [r16]=r15,16
332.mem.offset 8,0; st8.spill [r17]=r14,16 338.mem.offset 8,0; st8.spill [r17]=r14,16
333 dep r14=-1,r0,60,4 339 dep r14=-1,r0,60,4
334 ;; 340 ;;
335.mem.offset 0,0; st8.spill [r16]=r2,16 341.mem.offset 0,0; st8.spill [r16]=r2,16
336.mem.offset 8,0; st8.spill [r17]=r3,16 342.mem.offset 8,0; st8.spill [r17]=r3,16
337 adds r2=VMM_PT_REGS_R16_OFFSET,r1 343 adds r2=VMM_PT_REGS_R16_OFFSET,r1
338 adds r14 = VMM_VCPU_GP_OFFSET,r13 344 adds r14 = VMM_VCPU_GP_OFFSET,r13
339 ;; 345 ;;
340 mov r8=ar.ccv 346 mov r8=ar.ccv
341 ld8 r14 = [r14] 347 ld8 r14 = [r14]
342 ;; 348 ;;
343 mov r1=r14 /* establish kernel global pointer */ 349 mov r1=r14 /* establish kernel global pointer */
344 ;; \ 350 ;; \
345 bsw.1 351 bsw.1
346 ;; 352 ;;
347 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group 353 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
348 mov out0=r13 354 mov out0=r13
349 ;; 355 ;;
350 ssm psr.ic 356 ssm psr.ic
351 ;; 357 ;;
352 srlz.i 358 srlz.i
353 ;; 359 ;;
354 //(p15) ssm psr.i 360 //(p15) ssm psr.i
355 adds r3=8,r2 // set up second base pointer for SAVE_REST 361 adds r3=8,r2 // set up second base pointer for SAVE_REST
356 srlz.i // ensure everybody knows psr.ic is back on 362 srlz.i // ensure everybody knows psr.ic is back on
357 ;; 363 ;;
358.mem.offset 0,0; st8.spill [r2]=r16,16 364.mem.offset 0,0; st8.spill [r2]=r16,16
359.mem.offset 8,0; st8.spill [r3]=r17,16 365.mem.offset 8,0; st8.spill [r3]=r17,16
360 ;; 366 ;;
361.mem.offset 0,0; st8.spill [r2]=r18,16 367.mem.offset 0,0; st8.spill [r2]=r18,16
362.mem.offset 8,0; st8.spill [r3]=r19,16 368.mem.offset 8,0; st8.spill [r3]=r19,16
363 ;; 369 ;;
364.mem.offset 0,0; st8.spill [r2]=r20,16 370.mem.offset 0,0; st8.spill [r2]=r20,16
365.mem.offset 8,0; st8.spill [r3]=r21,16 371.mem.offset 8,0; st8.spill [r3]=r21,16
366 mov r18=b6 372 mov r18=b6
367 ;; 373 ;;
368.mem.offset 0,0; st8.spill [r2]=r22,16 374.mem.offset 0,0; st8.spill [r2]=r22,16
369.mem.offset 8,0; st8.spill [r3]=r23,16 375.mem.offset 8,0; st8.spill [r3]=r23,16
370 mov r19=b7 376 mov r19=b7
371 ;; 377 ;;
372.mem.offset 0,0; st8.spill [r2]=r24,16 378.mem.offset 0,0; st8.spill [r2]=r24,16
373.mem.offset 8,0; st8.spill [r3]=r25,16 379.mem.offset 8,0; st8.spill [r3]=r25,16
374 ;; 380 ;;
375.mem.offset 0,0; st8.spill [r2]=r26,16 381.mem.offset 0,0; st8.spill [r2]=r26,16
376.mem.offset 8,0; st8.spill [r3]=r27,16 382.mem.offset 8,0; st8.spill [r3]=r27,16
377 ;; 383 ;;
378.mem.offset 0,0; st8.spill [r2]=r28,16 384.mem.offset 0,0; st8.spill [r2]=r28,16
379.mem.offset 8,0; st8.spill [r3]=r29,16 385.mem.offset 8,0; st8.spill [r3]=r29,16
380 ;; 386 ;;
381.mem.offset 0,0; st8.spill [r2]=r30,16 387.mem.offset 0,0; st8.spill [r2]=r30,16
382.mem.offset 8,0; st8.spill [r3]=r31,32 388.mem.offset 8,0; st8.spill [r3]=r31,32
383 ;; 389 ;;
384 mov ar.fpsr=r11 /* M-unit */ 390 mov ar.fpsr=r11 /* M-unit */
385 st8 [r2]=r8,8 /* ar.ccv */ 391 st8 [r2]=r8,8 /* ar.ccv */
386 adds r24=PT(B6)-PT(F7),r3 392 adds r24=PT(B6)-PT(F7),r3
387 ;; 393 ;;
388 stf.spill [r2]=f6,32 394 stf.spill [r2]=f6,32
389 stf.spill [r3]=f7,32 395 stf.spill [r3]=f7,32
390 ;; 396 ;;
391 stf.spill [r2]=f8,32 397 stf.spill [r2]=f8,32
392 stf.spill [r3]=f9,32 398 stf.spill [r3]=f9,32
393 ;; 399 ;;
394 stf.spill [r2]=f10 400 stf.spill [r2]=f10
395 stf.spill [r3]=f11 401 stf.spill [r3]=f11
396 adds r25=PT(B7)-PT(F11),r3 402 adds r25=PT(B7)-PT(F11),r3
397 ;; 403 ;;
398 st8 [r24]=r18,16 /* b6 */ 404 st8 [r24]=r18,16 /* b6 */
399 st8 [r25]=r19,16 /* b7 */ 405 st8 [r25]=r19,16 /* b7 */
400 ;; 406 ;;
401 st8 [r24]=r9 /* ar.csd */ 407 st8 [r24]=r9 /* ar.csd */
402 st8 [r25]=r10 /* ar.ssd */ 408 st8 [r25]=r10 /* ar.ssd */
403 ;; 409 ;;
404 srlz.d // make sure we see the effect of cr.ivr 410 srlz.d // make sure we see the effect of cr.ivr
405 addl r14=@gprel(ia64_leave_nested),gp 411 addl r14=@gprel(ia64_leave_nested),gp
406 ;; 412 ;;
407 mov rp=r14 413 mov rp=r14
408 br.call.sptk.many b6=kvm_ia64_handle_irq 414 br.call.sptk.many b6=kvm_ia64_handle_irq
409 ;; 415 ;;
410END(kvm_interrupt) 416END(kvm_interrupt)
411 417
412 .global kvm_dispatch_vexirq 418 .global kvm_dispatch_vexirq
@@ -414,387 +420,385 @@ END(kvm_interrupt)
414////////////////////////////////////////////////////////////////////// 420//////////////////////////////////////////////////////////////////////
415// 0x3400 Entry 13 (size 64 bundles) Reserved 421// 0x3400 Entry 13 (size 64 bundles) Reserved
416ENTRY(kvm_virtual_exirq) 422ENTRY(kvm_virtual_exirq)
417 mov r31=pr 423 mov r31=pr
418 mov r19=13 424 mov r19=13
419 mov r30 =r0 425 mov r30 =r0
420 ;; 426 ;;
421kvm_dispatch_vexirq: 427kvm_dispatch_vexirq:
422 cmp.eq p6,p0 = 1,r30 428 cmp.eq p6,p0 = 1,r30
423 ;; 429 ;;
424(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 430(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
425 ;; 431 ;;
426(p6)ld8 r1 = [r29] 432(p6) ld8 r1 = [r29]
427 ;; 433 ;;
428 KVM_SAVE_MIN_WITH_COVER_R19 434 KVM_SAVE_MIN_WITH_COVER_R19
429 alloc r14=ar.pfs,0,0,1,0 435 alloc r14=ar.pfs,0,0,1,0
430 mov out0=r13 436 mov out0=r13
431 437
432 ssm psr.ic 438 ssm psr.ic
433 ;; 439 ;;
434 srlz.i // guarantee that interruption collection is on 440 srlz.i // guarantee that interruption collection is on
435 ;; 441 ;;
436 //(p15) ssm psr.i // restore psr.i 442 //(p15) ssm psr.i // restore psr.i
437 adds r3=8,r2 // set up second base pointer 443 adds r3=8,r2 // set up second base pointer
438 ;; 444 ;;
439 KVM_SAVE_REST 445 KVM_SAVE_REST
440 addl r14=@gprel(ia64_leave_hypervisor),gp 446 addl r14=@gprel(ia64_leave_hypervisor),gp
441 ;; 447 ;;
442 mov rp=r14 448 mov rp=r14
443 br.call.sptk.many b6=kvm_vexirq 449 br.call.sptk.many b6=kvm_vexirq
444END(kvm_virtual_exirq) 450END(kvm_virtual_exirq)
445 451
446 .org kvm_ia64_ivt+0x3800 452 .org kvm_ia64_ivt+0x3800
447///////////////////////////////////////////////////////////////////// 453/////////////////////////////////////////////////////////////////////
448// 0x3800 Entry 14 (size 64 bundles) Reserved 454// 0x3800 Entry 14 (size 64 bundles) Reserved
449 KVM_FAULT(14) 455 KVM_FAULT(14)
450 // this code segment is from 2.6.16.13 456 // this code segment is from 2.6.16.13
451
452 457
453 .org kvm_ia64_ivt+0x3c00 458 .org kvm_ia64_ivt+0x3c00
454/////////////////////////////////////////////////////////////////////// 459///////////////////////////////////////////////////////////////////////
455// 0x3c00 Entry 15 (size 64 bundles) Reserved 460// 0x3c00 Entry 15 (size 64 bundles) Reserved
456 KVM_FAULT(15) 461 KVM_FAULT(15)
457
458 462
459 .org kvm_ia64_ivt+0x4000 463 .org kvm_ia64_ivt+0x4000
460/////////////////////////////////////////////////////////////////////// 464///////////////////////////////////////////////////////////////////////
461// 0x4000 Entry 16 (size 64 bundles) Reserved 465// 0x4000 Entry 16 (size 64 bundles) Reserved
462 KVM_FAULT(16) 466 KVM_FAULT(16)
463 467
464 .org kvm_ia64_ivt+0x4400 468 .org kvm_ia64_ivt+0x4400
465////////////////////////////////////////////////////////////////////// 469//////////////////////////////////////////////////////////////////////
466// 0x4400 Entry 17 (size 64 bundles) Reserved 470// 0x4400 Entry 17 (size 64 bundles) Reserved
467 KVM_FAULT(17) 471 KVM_FAULT(17)
468 472
469 .org kvm_ia64_ivt+0x4800 473 .org kvm_ia64_ivt+0x4800
470////////////////////////////////////////////////////////////////////// 474//////////////////////////////////////////////////////////////////////
471// 0x4800 Entry 18 (size 64 bundles) Reserved 475// 0x4800 Entry 18 (size 64 bundles) Reserved
472 KVM_FAULT(18) 476 KVM_FAULT(18)
473 477
474 .org kvm_ia64_ivt+0x4c00 478 .org kvm_ia64_ivt+0x4c00
475////////////////////////////////////////////////////////////////////// 479//////////////////////////////////////////////////////////////////////
476// 0x4c00 Entry 19 (size 64 bundles) Reserved 480// 0x4c00 Entry 19 (size 64 bundles) Reserved
477 KVM_FAULT(19) 481 KVM_FAULT(19)
478 482
479 .org kvm_ia64_ivt+0x5000 483 .org kvm_ia64_ivt+0x5000
480////////////////////////////////////////////////////////////////////// 484//////////////////////////////////////////////////////////////////////
481// 0x5000 Entry 20 (size 16 bundles) Page Not Present 485// 0x5000 Entry 20 (size 16 bundles) Page Not Present
482ENTRY(kvm_page_not_present) 486ENTRY(kvm_page_not_present)
483 KVM_REFLECT(20) 487 KVM_REFLECT(20)
484END(kvm_page_not_present) 488END(kvm_page_not_present)
485 489
486 .org kvm_ia64_ivt+0x5100 490 .org kvm_ia64_ivt+0x5100
487/////////////////////////////////////////////////////////////////////// 491///////////////////////////////////////////////////////////////////////
488// 0x5100 Entry 21 (size 16 bundles) Key Permission vector 492// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
489ENTRY(kvm_key_permission) 493ENTRY(kvm_key_permission)
490 KVM_REFLECT(21) 494 KVM_REFLECT(21)
491END(kvm_key_permission) 495END(kvm_key_permission)
492 496
493 .org kvm_ia64_ivt+0x5200 497 .org kvm_ia64_ivt+0x5200
494////////////////////////////////////////////////////////////////////// 498//////////////////////////////////////////////////////////////////////
495// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) 499// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
496ENTRY(kvm_iaccess_rights) 500ENTRY(kvm_iaccess_rights)
497 KVM_REFLECT(22) 501 KVM_REFLECT(22)
498END(kvm_iaccess_rights) 502END(kvm_iaccess_rights)
499 503
500 .org kvm_ia64_ivt+0x5300 504 .org kvm_ia64_ivt+0x5300
501////////////////////////////////////////////////////////////////////// 505//////////////////////////////////////////////////////////////////////
502// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) 506// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
503ENTRY(kvm_daccess_rights) 507ENTRY(kvm_daccess_rights)
504 KVM_REFLECT(23) 508 KVM_REFLECT(23)
505END(kvm_daccess_rights) 509END(kvm_daccess_rights)
506 510
507 .org kvm_ia64_ivt+0x5400 511 .org kvm_ia64_ivt+0x5400
508///////////////////////////////////////////////////////////////////// 512/////////////////////////////////////////////////////////////////////
509// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) 513// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
510ENTRY(kvm_general_exception) 514ENTRY(kvm_general_exception)
511 KVM_REFLECT(24) 515 KVM_REFLECT(24)
512 KVM_FAULT(24) 516 KVM_FAULT(24)
513END(kvm_general_exception) 517END(kvm_general_exception)
514 518
515 .org kvm_ia64_ivt+0x5500 519 .org kvm_ia64_ivt+0x5500
516////////////////////////////////////////////////////////////////////// 520//////////////////////////////////////////////////////////////////////
517// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) 521// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
518ENTRY(kvm_disabled_fp_reg) 522ENTRY(kvm_disabled_fp_reg)
519 KVM_REFLECT(25) 523 KVM_REFLECT(25)
520END(kvm_disabled_fp_reg) 524END(kvm_disabled_fp_reg)
521 525
522 .org kvm_ia64_ivt+0x5600 526 .org kvm_ia64_ivt+0x5600
523//////////////////////////////////////////////////////////////////// 527////////////////////////////////////////////////////////////////////
524// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) 528// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
525ENTRY(kvm_nat_consumption) 529ENTRY(kvm_nat_consumption)
526 KVM_REFLECT(26) 530 KVM_REFLECT(26)
527END(kvm_nat_consumption) 531END(kvm_nat_consumption)
528 532
529 .org kvm_ia64_ivt+0x5700 533 .org kvm_ia64_ivt+0x5700
530///////////////////////////////////////////////////////////////////// 534/////////////////////////////////////////////////////////////////////
531// 0x5700 Entry 27 (size 16 bundles) Speculation (40) 535// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
532ENTRY(kvm_speculation_vector) 536ENTRY(kvm_speculation_vector)
533 KVM_REFLECT(27) 537 KVM_REFLECT(27)
534END(kvm_speculation_vector) 538END(kvm_speculation_vector)
535 539
536 .org kvm_ia64_ivt+0x5800 540 .org kvm_ia64_ivt+0x5800
537///////////////////////////////////////////////////////////////////// 541/////////////////////////////////////////////////////////////////////
538// 0x5800 Entry 28 (size 16 bundles) Reserved 542// 0x5800 Entry 28 (size 16 bundles) Reserved
539 KVM_FAULT(28) 543 KVM_FAULT(28)
540 544
541 .org kvm_ia64_ivt+0x5900 545 .org kvm_ia64_ivt+0x5900
542/////////////////////////////////////////////////////////////////// 546///////////////////////////////////////////////////////////////////
543// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) 547// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
544ENTRY(kvm_debug_vector) 548ENTRY(kvm_debug_vector)
545 KVM_FAULT(29) 549 KVM_FAULT(29)
546END(kvm_debug_vector) 550END(kvm_debug_vector)
547 551
548 .org kvm_ia64_ivt+0x5a00 552 .org kvm_ia64_ivt+0x5a00
549/////////////////////////////////////////////////////////////// 553///////////////////////////////////////////////////////////////
550// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) 554// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
551ENTRY(kvm_unaligned_access) 555ENTRY(kvm_unaligned_access)
552 KVM_REFLECT(30) 556 KVM_REFLECT(30)
553END(kvm_unaligned_access) 557END(kvm_unaligned_access)
554 558
555 .org kvm_ia64_ivt+0x5b00 559 .org kvm_ia64_ivt+0x5b00
556////////////////////////////////////////////////////////////////////// 560//////////////////////////////////////////////////////////////////////
557// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) 561// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
558ENTRY(kvm_unsupported_data_reference) 562ENTRY(kvm_unsupported_data_reference)
559 KVM_REFLECT(31) 563 KVM_REFLECT(31)
560END(kvm_unsupported_data_reference) 564END(kvm_unsupported_data_reference)
561 565
562 .org kvm_ia64_ivt+0x5c00 566 .org kvm_ia64_ivt+0x5c00
563//////////////////////////////////////////////////////////////////// 567////////////////////////////////////////////////////////////////////
564// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) 568// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
565ENTRY(kvm_floating_point_fault) 569ENTRY(kvm_floating_point_fault)
566 KVM_REFLECT(32) 570 KVM_REFLECT(32)
567END(kvm_floating_point_fault) 571END(kvm_floating_point_fault)
568 572
569 .org kvm_ia64_ivt+0x5d00 573 .org kvm_ia64_ivt+0x5d00
570///////////////////////////////////////////////////////////////////// 574/////////////////////////////////////////////////////////////////////
571// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) 575// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
572ENTRY(kvm_floating_point_trap) 576ENTRY(kvm_floating_point_trap)
573 KVM_REFLECT(33) 577 KVM_REFLECT(33)
574END(kvm_floating_point_trap) 578END(kvm_floating_point_trap)
575 579
576 .org kvm_ia64_ivt+0x5e00 580 .org kvm_ia64_ivt+0x5e00
577////////////////////////////////////////////////////////////////////// 581//////////////////////////////////////////////////////////////////////
578// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) 582// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
579ENTRY(kvm_lower_privilege_trap) 583ENTRY(kvm_lower_privilege_trap)
580 KVM_REFLECT(34) 584 KVM_REFLECT(34)
581END(kvm_lower_privilege_trap) 585END(kvm_lower_privilege_trap)
582 586
583 .org kvm_ia64_ivt+0x5f00 587 .org kvm_ia64_ivt+0x5f00
584////////////////////////////////////////////////////////////////////// 588//////////////////////////////////////////////////////////////////////
585// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) 589// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
586ENTRY(kvm_taken_branch_trap) 590ENTRY(kvm_taken_branch_trap)
587 KVM_REFLECT(35) 591 KVM_REFLECT(35)
588END(kvm_taken_branch_trap) 592END(kvm_taken_branch_trap)
589 593
590 .org kvm_ia64_ivt+0x6000 594 .org kvm_ia64_ivt+0x6000
591//////////////////////////////////////////////////////////////////// 595////////////////////////////////////////////////////////////////////
592// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) 596// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
593ENTRY(kvm_single_step_trap) 597ENTRY(kvm_single_step_trap)
594 KVM_REFLECT(36) 598 KVM_REFLECT(36)
595END(kvm_single_step_trap) 599END(kvm_single_step_trap)
596 .global kvm_virtualization_fault_back 600 .global kvm_virtualization_fault_back
597 .org kvm_ia64_ivt+0x6100 601 .org kvm_ia64_ivt+0x6100
598///////////////////////////////////////////////////////////////////// 602/////////////////////////////////////////////////////////////////////
599// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault 603// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
600ENTRY(kvm_virtualization_fault) 604ENTRY(kvm_virtualization_fault)
601 mov r31=pr 605 mov r31=pr
602 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 606 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
603 ;; 607 ;;
604 st8 [r16] = r1 608 st8 [r16] = r1
605 adds r17 = VMM_VCPU_GP_OFFSET, r21 609 adds r17 = VMM_VCPU_GP_OFFSET, r21
606 ;; 610 ;;
607 ld8 r1 = [r17] 611 ld8 r1 = [r17]
608 cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 612 cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
609 cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 613 cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
610 cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 614 cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
611 cmp.eq p9,p0=EVENT_RSM,r24 615 cmp.eq p9,p0=EVENT_RSM,r24
612 cmp.eq p10,p0=EVENT_SSM,r24 616 cmp.eq p10,p0=EVENT_SSM,r24
613 cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 617 cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
614 cmp.eq p12,p0=EVENT_THASH,r24 618 cmp.eq p12,p0=EVENT_THASH,r24
615 (p6) br.dptk.many kvm_asm_mov_from_ar 619(p6) br.dptk.many kvm_asm_mov_from_ar
616 (p7) br.dptk.many kvm_asm_mov_from_rr 620(p7) br.dptk.many kvm_asm_mov_from_rr
617 (p8) br.dptk.many kvm_asm_mov_to_rr 621(p8) br.dptk.many kvm_asm_mov_to_rr
618 (p9) br.dptk.many kvm_asm_rsm 622(p9) br.dptk.many kvm_asm_rsm
619 (p10) br.dptk.many kvm_asm_ssm 623(p10) br.dptk.many kvm_asm_ssm
620 (p11) br.dptk.many kvm_asm_mov_to_psr 624(p11) br.dptk.many kvm_asm_mov_to_psr
621 (p12) br.dptk.many kvm_asm_thash 625(p12) br.dptk.many kvm_asm_thash
622 ;; 626 ;;
623kvm_virtualization_fault_back: 627kvm_virtualization_fault_back:
624 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 628 adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
625 ;; 629 ;;
626 ld8 r1 = [r16] 630 ld8 r1 = [r16]
627 ;; 631 ;;
628 mov r19=37 632 mov r19=37
629 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 633 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
630 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 634 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
631 ;; 635 ;;
632 st8 [r16] = r24 636 st8 [r16] = r24
633 st8 [r17] = r25 637 st8 [r17] = r25
634 ;; 638 ;;
635 cmp.ne p6,p0=EVENT_RFI, r24 639 cmp.ne p6,p0=EVENT_RFI, r24
636 (p6) br.sptk kvm_dispatch_virtualization_fault 640(p6) br.sptk kvm_dispatch_virtualization_fault
637 ;; 641 ;;
638 adds r18=VMM_VPD_BASE_OFFSET,r21 642 adds r18=VMM_VPD_BASE_OFFSET,r21
639 ;; 643 ;;
640 ld8 r18=[r18] 644 ld8 r18=[r18]
641 ;; 645 ;;
642 adds r18=VMM_VPD_VIFS_OFFSET,r18 646 adds r18=VMM_VPD_VIFS_OFFSET,r18
643 ;; 647 ;;
644 ld8 r18=[r18] 648 ld8 r18=[r18]
645 ;; 649 ;;
646 tbit.z p6,p0=r18,63 650 tbit.z p6,p0=r18,63
647 (p6) br.sptk kvm_dispatch_virtualization_fault 651(p6) br.sptk kvm_dispatch_virtualization_fault
648 ;; 652 ;;
649 //if vifs.v=1 desert current register frame 653//if vifs.v=1 desert current register frame
650 alloc r18=ar.pfs,0,0,0,0 654 alloc r18=ar.pfs,0,0,0,0
651 br.sptk kvm_dispatch_virtualization_fault 655 br.sptk kvm_dispatch_virtualization_fault
652END(kvm_virtualization_fault) 656END(kvm_virtualization_fault)
653 657
654 .org kvm_ia64_ivt+0x6200 658 .org kvm_ia64_ivt+0x6200
655////////////////////////////////////////////////////////////// 659//////////////////////////////////////////////////////////////
656// 0x6200 Entry 38 (size 16 bundles) Reserved 660// 0x6200 Entry 38 (size 16 bundles) Reserved
657 KVM_FAULT(38) 661 KVM_FAULT(38)
658 662
659 .org kvm_ia64_ivt+0x6300 663 .org kvm_ia64_ivt+0x6300
660///////////////////////////////////////////////////////////////// 664/////////////////////////////////////////////////////////////////
661// 0x6300 Entry 39 (size 16 bundles) Reserved 665// 0x6300 Entry 39 (size 16 bundles) Reserved
662 KVM_FAULT(39) 666 KVM_FAULT(39)
663 667
664 .org kvm_ia64_ivt+0x6400 668 .org kvm_ia64_ivt+0x6400
665///////////////////////////////////////////////////////////////// 669/////////////////////////////////////////////////////////////////
666// 0x6400 Entry 40 (size 16 bundles) Reserved 670// 0x6400 Entry 40 (size 16 bundles) Reserved
667 KVM_FAULT(40) 671 KVM_FAULT(40)
668 672
669 .org kvm_ia64_ivt+0x6500 673 .org kvm_ia64_ivt+0x6500
670////////////////////////////////////////////////////////////////// 674//////////////////////////////////////////////////////////////////
671// 0x6500 Entry 41 (size 16 bundles) Reserved 675// 0x6500 Entry 41 (size 16 bundles) Reserved
672 KVM_FAULT(41) 676 KVM_FAULT(41)
673 677
674 .org kvm_ia64_ivt+0x6600 678 .org kvm_ia64_ivt+0x6600
675////////////////////////////////////////////////////////////////// 679//////////////////////////////////////////////////////////////////
676// 0x6600 Entry 42 (size 16 bundles) Reserved 680// 0x6600 Entry 42 (size 16 bundles) Reserved
677 KVM_FAULT(42) 681 KVM_FAULT(42)
678 682
679 .org kvm_ia64_ivt+0x6700 683 .org kvm_ia64_ivt+0x6700
680////////////////////////////////////////////////////////////////// 684//////////////////////////////////////////////////////////////////
681// 0x6700 Entry 43 (size 16 bundles) Reserved 685// 0x6700 Entry 43 (size 16 bundles) Reserved
682 KVM_FAULT(43) 686 KVM_FAULT(43)
683 687
684 .org kvm_ia64_ivt+0x6800 688 .org kvm_ia64_ivt+0x6800
685////////////////////////////////////////////////////////////////// 689//////////////////////////////////////////////////////////////////
686// 0x6800 Entry 44 (size 16 bundles) Reserved 690// 0x6800 Entry 44 (size 16 bundles) Reserved
687 KVM_FAULT(44) 691 KVM_FAULT(44)
688 692
689 .org kvm_ia64_ivt+0x6900 693 .org kvm_ia64_ivt+0x6900
690/////////////////////////////////////////////////////////////////// 694///////////////////////////////////////////////////////////////////
691// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception 695// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
692//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) 696//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
693ENTRY(kvm_ia32_exception) 697ENTRY(kvm_ia32_exception)
694 KVM_FAULT(45) 698 KVM_FAULT(45)
695END(kvm_ia32_exception) 699END(kvm_ia32_exception)
696 700
697 .org kvm_ia64_ivt+0x6a00 701 .org kvm_ia64_ivt+0x6a00
698//////////////////////////////////////////////////////////////////// 702////////////////////////////////////////////////////////////////////
699// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) 703// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
700ENTRY(kvm_ia32_intercept) 704ENTRY(kvm_ia32_intercept)
701 KVM_FAULT(47) 705 KVM_FAULT(47)
702END(kvm_ia32_intercept) 706END(kvm_ia32_intercept)
703 707
704 .org kvm_ia64_ivt+0x6c00 708 .org kvm_ia64_ivt+0x6c00
705///////////////////////////////////////////////////////////////////// 709/////////////////////////////////////////////////////////////////////
706// 0x6c00 Entry 48 (size 16 bundles) Reserved 710// 0x6c00 Entry 48 (size 16 bundles) Reserved
707 KVM_FAULT(48) 711 KVM_FAULT(48)
708 712
709 .org kvm_ia64_ivt+0x6d00 713 .org kvm_ia64_ivt+0x6d00
710////////////////////////////////////////////////////////////////////// 714//////////////////////////////////////////////////////////////////////
711// 0x6d00 Entry 49 (size 16 bundles) Reserved 715// 0x6d00 Entry 49 (size 16 bundles) Reserved
712 KVM_FAULT(49) 716 KVM_FAULT(49)
713 717
714 .org kvm_ia64_ivt+0x6e00 718 .org kvm_ia64_ivt+0x6e00
715////////////////////////////////////////////////////////////////////// 719//////////////////////////////////////////////////////////////////////
716// 0x6e00 Entry 50 (size 16 bundles) Reserved 720// 0x6e00 Entry 50 (size 16 bundles) Reserved
717 KVM_FAULT(50) 721 KVM_FAULT(50)
718 722
719 .org kvm_ia64_ivt+0x6f00 723 .org kvm_ia64_ivt+0x6f00
720///////////////////////////////////////////////////////////////////// 724/////////////////////////////////////////////////////////////////////
721// 0x6f00 Entry 51 (size 16 bundles) Reserved 725// 0x6f00 Entry 51 (size 16 bundles) Reserved
722 KVM_FAULT(52) 726 KVM_FAULT(52)
723 727
724 .org kvm_ia64_ivt+0x7100 728 .org kvm_ia64_ivt+0x7100
725//////////////////////////////////////////////////////////////////// 729////////////////////////////////////////////////////////////////////
726// 0x7100 Entry 53 (size 16 bundles) Reserved 730// 0x7100 Entry 53 (size 16 bundles) Reserved
727 KVM_FAULT(53) 731 KVM_FAULT(53)
728 732
729 .org kvm_ia64_ivt+0x7200 733 .org kvm_ia64_ivt+0x7200
730///////////////////////////////////////////////////////////////////// 734/////////////////////////////////////////////////////////////////////
731// 0x7200 Entry 54 (size 16 bundles) Reserved 735// 0x7200 Entry 54 (size 16 bundles) Reserved
732 KVM_FAULT(54) 736 KVM_FAULT(54)
733 737
734 .org kvm_ia64_ivt+0x7300 738 .org kvm_ia64_ivt+0x7300
735//////////////////////////////////////////////////////////////////// 739////////////////////////////////////////////////////////////////////
736// 0x7300 Entry 55 (size 16 bundles) Reserved 740// 0x7300 Entry 55 (size 16 bundles) Reserved
737 KVM_FAULT(55) 741 KVM_FAULT(55)
738 742
739 .org kvm_ia64_ivt+0x7400 743 .org kvm_ia64_ivt+0x7400
740//////////////////////////////////////////////////////////////////// 744////////////////////////////////////////////////////////////////////
741// 0x7400 Entry 56 (size 16 bundles) Reserved 745// 0x7400 Entry 56 (size 16 bundles) Reserved
742 KVM_FAULT(56) 746 KVM_FAULT(56)
743 747
744 .org kvm_ia64_ivt+0x7500 748 .org kvm_ia64_ivt+0x7500
745///////////////////////////////////////////////////////////////////// 749/////////////////////////////////////////////////////////////////////
746// 0x7500 Entry 57 (size 16 bundles) Reserved 750// 0x7500 Entry 57 (size 16 bundles) Reserved
747 KVM_FAULT(57) 751 KVM_FAULT(57)
748 752
749 .org kvm_ia64_ivt+0x7600 753 .org kvm_ia64_ivt+0x7600
750///////////////////////////////////////////////////////////////////// 754/////////////////////////////////////////////////////////////////////
751// 0x7600 Entry 58 (size 16 bundles) Reserved 755// 0x7600 Entry 58 (size 16 bundles) Reserved
752 KVM_FAULT(58) 756 KVM_FAULT(58)
753 757
754 .org kvm_ia64_ivt+0x7700 758 .org kvm_ia64_ivt+0x7700
755//////////////////////////////////////////////////////////////////// 759////////////////////////////////////////////////////////////////////
756// 0x7700 Entry 59 (size 16 bundles) Reserved 760// 0x7700 Entry 59 (size 16 bundles) Reserved
757 KVM_FAULT(59) 761 KVM_FAULT(59)
758 762
759 .org kvm_ia64_ivt+0x7800 763 .org kvm_ia64_ivt+0x7800
760//////////////////////////////////////////////////////////////////// 764////////////////////////////////////////////////////////////////////
761// 0x7800 Entry 60 (size 16 bundles) Reserved 765// 0x7800 Entry 60 (size 16 bundles) Reserved
762 KVM_FAULT(60) 766 KVM_FAULT(60)
763 767
764 .org kvm_ia64_ivt+0x7900 768 .org kvm_ia64_ivt+0x7900
765///////////////////////////////////////////////////////////////////// 769/////////////////////////////////////////////////////////////////////
766// 0x7900 Entry 61 (size 16 bundles) Reserved 770// 0x7900 Entry 61 (size 16 bundles) Reserved
767 KVM_FAULT(61) 771 KVM_FAULT(61)
768 772
769 .org kvm_ia64_ivt+0x7a00 773 .org kvm_ia64_ivt+0x7a00
770///////////////////////////////////////////////////////////////////// 774/////////////////////////////////////////////////////////////////////
771// 0x7a00 Entry 62 (size 16 bundles) Reserved 775// 0x7a00 Entry 62 (size 16 bundles) Reserved
772 KVM_FAULT(62) 776 KVM_FAULT(62)
773 777
774 .org kvm_ia64_ivt+0x7b00 778 .org kvm_ia64_ivt+0x7b00
775///////////////////////////////////////////////////////////////////// 779/////////////////////////////////////////////////////////////////////
776// 0x7b00 Entry 63 (size 16 bundles) Reserved 780// 0x7b00 Entry 63 (size 16 bundles) Reserved
777 KVM_FAULT(63) 781 KVM_FAULT(63)
778 782
779 .org kvm_ia64_ivt+0x7c00 783 .org kvm_ia64_ivt+0x7c00
780//////////////////////////////////////////////////////////////////// 784////////////////////////////////////////////////////////////////////
781// 0x7c00 Entry 64 (size 16 bundles) Reserved 785// 0x7c00 Entry 64 (size 16 bundles) Reserved
782 KVM_FAULT(64) 786 KVM_FAULT(64)
783 787
784 .org kvm_ia64_ivt+0x7d00 788 .org kvm_ia64_ivt+0x7d00
785///////////////////////////////////////////////////////////////////// 789/////////////////////////////////////////////////////////////////////
786// 0x7d00 Entry 65 (size 16 bundles) Reserved 790// 0x7d00 Entry 65 (size 16 bundles) Reserved
787 KVM_FAULT(65) 791 KVM_FAULT(65)
788 792
789 .org kvm_ia64_ivt+0x7e00 793 .org kvm_ia64_ivt+0x7e00
790///////////////////////////////////////////////////////////////////// 794/////////////////////////////////////////////////////////////////////
791// 0x7e00 Entry 66 (size 16 bundles) Reserved 795// 0x7e00 Entry 66 (size 16 bundles) Reserved
792 KVM_FAULT(66) 796 KVM_FAULT(66)
793 797
794 .org kvm_ia64_ivt+0x7f00 798 .org kvm_ia64_ivt+0x7f00
795//////////////////////////////////////////////////////////////////// 799////////////////////////////////////////////////////////////////////
796// 0x7f00 Entry 67 (size 16 bundles) Reserved 800// 0x7f00 Entry 67 (size 16 bundles) Reserved
797 KVM_FAULT(67) 801 KVM_FAULT(67)
798 802
799 .org kvm_ia64_ivt+0x8000 803 .org kvm_ia64_ivt+0x8000
800// There is no particular reason for this code to be here, other than that 804// There is no particular reason for this code to be here, other than that
@@ -804,132 +808,128 @@ END(kvm_ia32_intercept)
804 808
805 809
806ENTRY(kvm_dtlb_miss_dispatch) 810ENTRY(kvm_dtlb_miss_dispatch)
807 mov r19 = 2 811 mov r19 = 2
808 KVM_SAVE_MIN_WITH_COVER_R19 812 KVM_SAVE_MIN_WITH_COVER_R19
809 alloc r14=ar.pfs,0,0,3,0 813 alloc r14=ar.pfs,0,0,3,0
810 mov out0=cr.ifa 814 mov out0=cr.ifa
811 mov out1=r15 815 mov out1=r15
812 adds r3=8,r2 // set up second base pointer 816 adds r3=8,r2 // set up second base pointer
813 ;; 817 ;;
814 ssm psr.ic 818 ssm psr.ic
815 ;; 819 ;;
816 srlz.i // guarantee that interruption collection is on 820 srlz.i // guarantee that interruption collection is on
817 ;; 821 ;;
818 //(p15) ssm psr.i // restore psr.i 822 //(p15) ssm psr.i // restore psr.i
819 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 823 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
820 ;; 824 ;;
821 KVM_SAVE_REST 825 KVM_SAVE_REST
822 KVM_SAVE_EXTRA 826 KVM_SAVE_EXTRA
823 mov rp=r14 827 mov rp=r14
824 ;; 828 ;;
825 adds out2=16,r12 829 adds out2=16,r12
826 br.call.sptk.many b6=kvm_page_fault 830 br.call.sptk.many b6=kvm_page_fault
827END(kvm_dtlb_miss_dispatch) 831END(kvm_dtlb_miss_dispatch)
828 832
829ENTRY(kvm_itlb_miss_dispatch) 833ENTRY(kvm_itlb_miss_dispatch)
830 834
831 KVM_SAVE_MIN_WITH_COVER_R19 835 KVM_SAVE_MIN_WITH_COVER_R19
832 alloc r14=ar.pfs,0,0,3,0 836 alloc r14=ar.pfs,0,0,3,0
833 mov out0=cr.ifa 837 mov out0=cr.ifa
834 mov out1=r15 838 mov out1=r15
835 adds r3=8,r2 // set up second base pointer 839 adds r3=8,r2 // set up second base pointer
836 ;; 840 ;;
837 ssm psr.ic 841 ssm psr.ic
838 ;; 842 ;;
839 srlz.i // guarantee that interruption collection is on 843 srlz.i // guarantee that interruption collection is on
840 ;; 844 ;;
841 //(p15) ssm psr.i // restore psr.i 845 //(p15) ssm psr.i // restore psr.i
842 addl r14=@gprel(ia64_leave_hypervisor),gp 846 addl r14=@gprel(ia64_leave_hypervisor),gp
843 ;; 847 ;;
844 KVM_SAVE_REST 848 KVM_SAVE_REST
845 mov rp=r14 849 mov rp=r14
846 ;; 850 ;;
847 adds out2=16,r12 851 adds out2=16,r12
848 br.call.sptk.many b6=kvm_page_fault 852 br.call.sptk.many b6=kvm_page_fault
849END(kvm_itlb_miss_dispatch) 853END(kvm_itlb_miss_dispatch)
850 854
851ENTRY(kvm_dispatch_reflection) 855ENTRY(kvm_dispatch_reflection)
852 /* 856/*
853 * Input: 857 * Input:
854 * psr.ic: off 858 * psr.ic: off
855 * r19: intr type (offset into ivt, see ia64_int.h) 859 * r19: intr type (offset into ivt, see ia64_int.h)
856 * r31: contains saved predicates (pr) 860 * r31: contains saved predicates (pr)
857 */ 861 */
858 KVM_SAVE_MIN_WITH_COVER_R19 862 KVM_SAVE_MIN_WITH_COVER_R19
859 alloc r14=ar.pfs,0,0,5,0 863 alloc r14=ar.pfs,0,0,5,0
860 mov out0=cr.ifa 864 mov out0=cr.ifa
861 mov out1=cr.isr 865 mov out1=cr.isr
862 mov out2=cr.iim 866 mov out2=cr.iim
863 mov out3=r15 867 mov out3=r15
864 adds r3=8,r2 // set up second base pointer 868 adds r3=8,r2 // set up second base pointer
865 ;; 869 ;;
866 ssm psr.ic 870 ssm psr.ic
867 ;; 871 ;;
868 srlz.i // guarantee that interruption collection is on 872 srlz.i // guarantee that interruption collection is on
869 ;; 873 ;;
870 //(p15) ssm psr.i // restore psr.i 874 //(p15) ssm psr.i // restore psr.i
871 addl r14=@gprel(ia64_leave_hypervisor),gp 875 addl r14=@gprel(ia64_leave_hypervisor),gp
872 ;; 876 ;;
873 KVM_SAVE_REST 877 KVM_SAVE_REST
874 mov rp=r14 878 mov rp=r14
875 ;; 879 ;;
876 adds out4=16,r12 880 adds out4=16,r12
877 br.call.sptk.many b6=reflect_interruption 881 br.call.sptk.many b6=reflect_interruption
878END(kvm_dispatch_reflection) 882END(kvm_dispatch_reflection)
879 883
880ENTRY(kvm_dispatch_virtualization_fault) 884ENTRY(kvm_dispatch_virtualization_fault)
881 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 885 adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
882 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 886 adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
883 ;; 887 ;;
884 st8 [r16] = r24 888 st8 [r16] = r24
885 st8 [r17] = r25 889 st8 [r17] = r25
886 ;; 890 ;;
887 KVM_SAVE_MIN_WITH_COVER_R19 891 KVM_SAVE_MIN_WITH_COVER_R19
888 ;; 892 ;;
889 alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) 893 alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
890 mov out0=r13 //vcpu 894 mov out0=r13 //vcpu
891 adds r3=8,r2 // set up second base pointer 895 adds r3=8,r2 // set up second base pointer
892 ;; 896 ;;
893 ssm psr.ic 897 ssm psr.ic
894 ;; 898 ;;
895 srlz.i // guarantee that interruption collection is on 899 srlz.i // guarantee that interruption collection is on
896 ;; 900 ;;
897 //(p15) ssm psr.i // restore psr.i 901 //(p15) ssm psr.i // restore psr.i
898 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 902 addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
899 ;; 903 ;;
900 KVM_SAVE_REST 904 KVM_SAVE_REST
901 KVM_SAVE_EXTRA 905 KVM_SAVE_EXTRA
902 mov rp=r14 906 mov rp=r14
903 ;; 907 ;;
904 adds out1=16,sp //regs 908 adds out1=16,sp //regs
905 br.call.sptk.many b6=kvm_emulate 909 br.call.sptk.many b6=kvm_emulate
906END(kvm_dispatch_virtualization_fault) 910END(kvm_dispatch_virtualization_fault)
907 911
908 912
909ENTRY(kvm_dispatch_interrupt) 913ENTRY(kvm_dispatch_interrupt)
910 KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 914 KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
911 ;; 915 ;;
912 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group 916 alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
913 //mov out0=cr.ivr // pass cr.ivr as first arg 917 adds r3=8,r2 // set up second base pointer for SAVE_REST
914 adds r3=8,r2 // set up second base pointer for SAVE_REST 918 ;;
915 ;; 919 ssm psr.ic
916 ssm psr.ic 920 ;;
917 ;; 921 srlz.i
918 srlz.i 922 ;;
919 ;; 923 //(p15) ssm psr.i
920 //(p15) ssm psr.i 924 addl r14=@gprel(ia64_leave_hypervisor),gp
921 addl r14=@gprel(ia64_leave_hypervisor),gp 925 ;;
922 ;; 926 KVM_SAVE_REST
923 KVM_SAVE_REST 927 mov rp=r14
924 mov rp=r14 928 ;;
925 ;; 929 mov out0=r13 // pass pointer to pt_regs as second arg
926 mov out0=r13 // pass pointer to pt_regs as second arg 930 br.call.sptk.many b6=kvm_ia64_handle_irq
927 br.call.sptk.many b6=kvm_ia64_handle_irq
928END(kvm_dispatch_interrupt) 931END(kvm_dispatch_interrupt)
929 932
930
931
932
933GLOBAL_ENTRY(ia64_leave_nested) 933GLOBAL_ENTRY(ia64_leave_nested)
934 rsm psr.i 934 rsm psr.i
935 ;; 935 ;;
@@ -1008,7 +1008,7 @@ GLOBAL_ENTRY(ia64_leave_nested)
1008 ;; 1008 ;;
1009 ldf.fill f11=[r2] 1009 ldf.fill f11=[r2]
1010// mov r18=r13 1010// mov r18=r13
1011// mov r21=r13 1011// mov r21=r13
1012 adds r16=PT(CR_IPSR)+16,r12 1012 adds r16=PT(CR_IPSR)+16,r12
1013 adds r17=PT(CR_IIP)+16,r12 1013 adds r17=PT(CR_IIP)+16,r12
1014 ;; 1014 ;;
@@ -1058,138 +1058,135 @@ GLOBAL_ENTRY(ia64_leave_nested)
1058 rfi 1058 rfi
1059END(ia64_leave_nested) 1059END(ia64_leave_nested)
1060 1060
1061
1062
1063GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) 1061GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
1064 /* 1062/*
1065 * work.need_resched etc. mustn't get changed 1063 * work.need_resched etc. mustn't get changed
1066 *by this CPU before it returns to 1064 *by this CPU before it returns to
1067 ;; 1065 * user- or fsys-mode, hence we disable interrupts early on:
1068 * user- or fsys-mode, hence we disable interrupts early on: 1066 */
1069 */ 1067 adds r2 = PT(R4)+16,r12
1070 adds r2 = PT(R4)+16,r12 1068 adds r3 = PT(R5)+16,r12
1071 adds r3 = PT(R5)+16,r12 1069 adds r8 = PT(EML_UNAT)+16,r12
1072 adds r8 = PT(EML_UNAT)+16,r12 1070 ;;
1073 ;; 1071 ld8 r8 = [r8]
1074 ld8 r8 = [r8] 1072 ;;
1075 ;; 1073 mov ar.unat=r8
1076 mov ar.unat=r8 1074 ;;
1077 ;; 1075 ld8.fill r4=[r2],16 //load r4
1078 ld8.fill r4=[r2],16 //load r4 1076 ld8.fill r5=[r3],16 //load r5
1079 ld8.fill r5=[r3],16 //load r5 1077 ;;
1080 ;; 1078 ld8.fill r6=[r2] //load r6
1081 ld8.fill r6=[r2] //load r6 1079 ld8.fill r7=[r3] //load r7
1082 ld8.fill r7=[r3] //load r7 1080 ;;
1083 ;;
1084END(ia64_leave_hypervisor_prepare) 1081END(ia64_leave_hypervisor_prepare)
1085//fall through 1082//fall through
1086GLOBAL_ENTRY(ia64_leave_hypervisor) 1083GLOBAL_ENTRY(ia64_leave_hypervisor)
1087 rsm psr.i 1084 rsm psr.i
1088 ;; 1085 ;;
1089 br.call.sptk.many b0=leave_hypervisor_tail 1086 br.call.sptk.many b0=leave_hypervisor_tail
1090 ;; 1087 ;;
1091 adds r20=PT(PR)+16,r12 1088 adds r20=PT(PR)+16,r12
1092 adds r8=PT(EML_UNAT)+16,r12 1089 adds r8=PT(EML_UNAT)+16,r12
1093 ;; 1090 ;;
1094 ld8 r8=[r8] 1091 ld8 r8=[r8]
1095 ;; 1092 ;;
1096 mov ar.unat=r8 1093 mov ar.unat=r8
1097 ;; 1094 ;;
1098 lfetch [r20],PT(CR_IPSR)-PT(PR) 1095 lfetch [r20],PT(CR_IPSR)-PT(PR)
1099 adds r2 = PT(B6)+16,r12 1096 adds r2 = PT(B6)+16,r12
1100 adds r3 = PT(B7)+16,r12 1097 adds r3 = PT(B7)+16,r12
1101 ;; 1098 ;;
1102 lfetch [r20] 1099 lfetch [r20]
1103 ;; 1100 ;;
1104 ld8 r24=[r2],16 /* B6 */ 1101 ld8 r24=[r2],16 /* B6 */
1105 ld8 r25=[r3],16 /* B7 */ 1102 ld8 r25=[r3],16 /* B7 */
1106 ;; 1103 ;;
1107 ld8 r26=[r2],16 /* ar_csd */ 1104 ld8 r26=[r2],16 /* ar_csd */
1108 ld8 r27=[r3],16 /* ar_ssd */ 1105 ld8 r27=[r3],16 /* ar_ssd */
1109 mov b6 = r24 1106 mov b6 = r24
1110 ;; 1107 ;;
1111 ld8.fill r8=[r2],16 1108 ld8.fill r8=[r2],16
1112 ld8.fill r9=[r3],16 1109 ld8.fill r9=[r3],16
1113 mov b7 = r25 1110 mov b7 = r25
1114 ;; 1111 ;;
1115 mov ar.csd = r26 1112 mov ar.csd = r26
1116 mov ar.ssd = r27 1113 mov ar.ssd = r27
1117 ;; 1114 ;;
1118 ld8.fill r10=[r2],PT(R15)-PT(R10) 1115 ld8.fill r10=[r2],PT(R15)-PT(R10)
1119 ld8.fill r11=[r3],PT(R14)-PT(R11) 1116 ld8.fill r11=[r3],PT(R14)-PT(R11)
1120 ;; 1117 ;;
1121 ld8.fill r15=[r2],PT(R16)-PT(R15) 1118 ld8.fill r15=[r2],PT(R16)-PT(R15)
1122 ld8.fill r14=[r3],PT(R17)-PT(R14) 1119 ld8.fill r14=[r3],PT(R17)-PT(R14)
1123 ;; 1120 ;;
1124 ld8.fill r16=[r2],16 1121 ld8.fill r16=[r2],16
1125 ld8.fill r17=[r3],16 1122 ld8.fill r17=[r3],16
1126 ;; 1123 ;;
1127 ld8.fill r18=[r2],16 1124 ld8.fill r18=[r2],16
1128 ld8.fill r19=[r3],16 1125 ld8.fill r19=[r3],16
1129 ;; 1126 ;;
1130 ld8.fill r20=[r2],16 1127 ld8.fill r20=[r2],16
1131 ld8.fill r21=[r3],16 1128 ld8.fill r21=[r3],16
1132 ;; 1129 ;;
1133 ld8.fill r22=[r2],16 1130 ld8.fill r22=[r2],16
1134 ld8.fill r23=[r3],16 1131 ld8.fill r23=[r3],16
1135 ;; 1132 ;;
1136 ld8.fill r24=[r2],16 1133 ld8.fill r24=[r2],16
1137 ld8.fill r25=[r3],16 1134 ld8.fill r25=[r3],16
1138 ;; 1135 ;;
1139 ld8.fill r26=[r2],16 1136 ld8.fill r26=[r2],16
1140 ld8.fill r27=[r3],16 1137 ld8.fill r27=[r3],16
1141 ;; 1138 ;;
1142 ld8.fill r28=[r2],16 1139 ld8.fill r28=[r2],16
1143 ld8.fill r29=[r3],16 1140 ld8.fill r29=[r3],16
1144 ;; 1141 ;;
1145 ld8.fill r30=[r2],PT(F6)-PT(R30) 1142 ld8.fill r30=[r2],PT(F6)-PT(R30)
1146 ld8.fill r31=[r3],PT(F7)-PT(R31) 1143 ld8.fill r31=[r3],PT(F7)-PT(R31)
1147 ;; 1144 ;;
1148 rsm psr.i | psr.ic 1145 rsm psr.i | psr.ic
1149 // initiate turning off of interrupt and interruption collection 1146 // initiate turning off of interrupt and interruption collection
1150 invala // invalidate ALAT 1147 invala // invalidate ALAT
1151 ;; 1148 ;;
1152 srlz.i // ensure interruption collection is off 1149 srlz.i // ensure interruption collection is off
1153 ;; 1150 ;;
1154 bsw.0 1151 bsw.0
1155 ;; 1152 ;;
1156 adds r16 = PT(CR_IPSR)+16,r12 1153 adds r16 = PT(CR_IPSR)+16,r12
1157 adds r17 = PT(CR_IIP)+16,r12 1154 adds r17 = PT(CR_IIP)+16,r12
1158 mov r21=r13 // get current 1155 mov r21=r13 // get current
1159 ;; 1156 ;;
1160 ld8 r31=[r16],16 // load cr.ipsr 1157 ld8 r31=[r16],16 // load cr.ipsr
1161 ld8 r30=[r17],16 // load cr.iip 1158 ld8 r30=[r17],16 // load cr.iip
1162 ;; 1159 ;;
1163 ld8 r29=[r16],16 // load cr.ifs 1160 ld8 r29=[r16],16 // load cr.ifs
1164 ld8 r28=[r17],16 // load ar.unat 1161 ld8 r28=[r17],16 // load ar.unat
1165 ;; 1162 ;;
1166 ld8 r27=[r16],16 // load ar.pfs 1163 ld8 r27=[r16],16 // load ar.pfs
1167 ld8 r26=[r17],16 // load ar.rsc 1164 ld8 r26=[r17],16 // load ar.rsc
1168 ;; 1165 ;;
1169 ld8 r25=[r16],16 // load ar.rnat 1166 ld8 r25=[r16],16 // load ar.rnat
1170 ld8 r24=[r17],16 // load ar.bspstore 1167 ld8 r24=[r17],16 // load ar.bspstore
1171 ;; 1168 ;;
1172 ld8 r23=[r16],16 // load predicates 1169 ld8 r23=[r16],16 // load predicates
1173 ld8 r22=[r17],16 // load b0 1170 ld8 r22=[r17],16 // load b0
1174 ;; 1171 ;;
1175 ld8 r20=[r16],16 // load ar.rsc value for "loadrs" 1172 ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
1176 ld8.fill r1=[r17],16 //load r1 1173 ld8.fill r1=[r17],16 //load r1
1177 ;; 1174 ;;
1178 ld8.fill r12=[r16],16 //load r12 1175 ld8.fill r12=[r16],16 //load r12
1179 ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 1176 ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
1180 ;; 1177 ;;
1181 ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr 1178 ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
1182 ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 1179 ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
1183 ;; 1180 ;;
1184 ld8.fill r3=[r16] //load r3 1181 ld8.fill r3=[r16] //load r3
1185 ld8 r18=[r17] //load ar_ccv 1182 ld8 r18=[r17] //load ar_ccv
1186 ;; 1183 ;;
1187 mov ar.fpsr=r19 1184 mov ar.fpsr=r19
1188 mov ar.ccv=r18 1185 mov ar.ccv=r18
1189 shr.u r18=r20,16 1186 shr.u r18=r20,16
1190 ;; 1187 ;;
1191kvm_rbs_switch: 1188kvm_rbs_switch:
1192 mov r19=96 1189 mov r19=96
1193 1190
1194kvm_dont_preserve_current_frame: 1191kvm_dont_preserve_current_frame:
1195/* 1192/*
@@ -1201,76 +1198,76 @@ kvm_dont_preserve_current_frame:
1201# define pReturn p7 1198# define pReturn p7
1202# define Nregs 14 1199# define Nregs 14
1203 1200
1204 alloc loc0=ar.pfs,2,Nregs-2,2,0 1201 alloc loc0=ar.pfs,2,Nregs-2,2,0
1205 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) 1202 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
1206 sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize 1203 sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
1207 ;; 1204 ;;
1208 mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" 1205 mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
1209 shladd in0=loc1,3,r19 1206 shladd in0=loc1,3,r19
1210 mov in1=0 1207 mov in1=0
1211 ;; 1208 ;;
1212 TEXT_ALIGN(32) 1209 TEXT_ALIGN(32)
1213kvm_rse_clear_invalid: 1210kvm_rse_clear_invalid:
1214 alloc loc0=ar.pfs,2,Nregs-2,2,0 1211 alloc loc0=ar.pfs,2,Nregs-2,2,0
1215 cmp.lt pRecurse,p0=Nregs*8,in0 1212 cmp.lt pRecurse,p0=Nregs*8,in0
1216 // if more than Nregs regs left to clear, (re)curse 1213 // if more than Nregs regs left to clear, (re)curse
1217 add out0=-Nregs*8,in0 1214 add out0=-Nregs*8,in0
1218 add out1=1,in1 // increment recursion count 1215 add out1=1,in1 // increment recursion count
1219 mov loc1=0 1216 mov loc1=0
1220 mov loc2=0 1217 mov loc2=0
1221 ;; 1218 ;;
1222 mov loc3=0 1219 mov loc3=0
1223 mov loc4=0 1220 mov loc4=0
1224 mov loc5=0 1221 mov loc5=0
1225 mov loc6=0 1222 mov loc6=0
1226 mov loc7=0 1223 mov loc7=0
1227(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid 1224(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
1228 ;; 1225 ;;
1229 mov loc8=0 1226 mov loc8=0
1230 mov loc9=0 1227 mov loc9=0
1231 cmp.ne pReturn,p0=r0,in1 1228 cmp.ne pReturn,p0=r0,in1
1232 // if recursion count != 0, we need to do a br.ret 1229 // if recursion count != 0, we need to do a br.ret
1233 mov loc10=0 1230 mov loc10=0
1234 mov loc11=0 1231 mov loc11=0
1235(pReturn) br.ret.dptk.many b0 1232(pReturn) br.ret.dptk.many b0
1236 1233
1237# undef pRecurse 1234# undef pRecurse
1238# undef pReturn 1235# undef pReturn
1239 1236
1240// loadrs has already been shifted 1237// loadrs has already been shifted
1241 alloc r16=ar.pfs,0,0,0,0 // drop current register frame 1238 alloc r16=ar.pfs,0,0,0,0 // drop current register frame
1242 ;; 1239 ;;
1243 loadrs 1240 loadrs
1244 ;; 1241 ;;
1245 mov ar.bspstore=r24 1242 mov ar.bspstore=r24
1246 ;; 1243 ;;
1247 mov ar.unat=r28 1244 mov ar.unat=r28
1248 mov ar.rnat=r25 1245 mov ar.rnat=r25
1249 mov ar.rsc=r26 1246 mov ar.rsc=r26
1250 ;; 1247 ;;
1251 mov cr.ipsr=r31 1248 mov cr.ipsr=r31
1252 mov cr.iip=r30 1249 mov cr.iip=r30
1253 mov cr.ifs=r29 1250 mov cr.ifs=r29
1254 mov ar.pfs=r27 1251 mov ar.pfs=r27
1255 adds r18=VMM_VPD_BASE_OFFSET,r21 1252 adds r18=VMM_VPD_BASE_OFFSET,r21
1256 ;; 1253 ;;
1257 ld8 r18=[r18] //vpd 1254 ld8 r18=[r18] //vpd
1258 adds r17=VMM_VCPU_ISR_OFFSET,r21 1255 adds r17=VMM_VCPU_ISR_OFFSET,r21
1259 ;; 1256 ;;
1260 ld8 r17=[r17] 1257 ld8 r17=[r17]
1261 adds r19=VMM_VPD_VPSR_OFFSET,r18 1258 adds r19=VMM_VPD_VPSR_OFFSET,r18
1262 ;; 1259 ;;
1263 ld8 r19=[r19] //vpsr 1260 ld8 r19=[r19] //vpsr
1264 mov r25=r18 1261 mov r25=r18
1265 adds r16= VMM_VCPU_GP_OFFSET,r21 1262 adds r16= VMM_VCPU_GP_OFFSET,r21
1266 ;; 1263 ;;
1267 ld8 r16= [r16] // Put gp in r24 1264 ld8 r16= [r16] // Put gp in r24
1268 movl r24=@gprel(ia64_vmm_entry) // calculate return address 1265 movl r24=@gprel(ia64_vmm_entry) // calculate return address
1269 ;; 1266 ;;
1270 add r24=r24,r16 1267 add r24=r24,r16
1271 ;; 1268 ;;
1272 br.sptk.many kvm_vps_sync_write // call the service 1269 br.sptk.many kvm_vps_sync_write // call the service
1273 ;; 1270 ;;
1274END(ia64_leave_hypervisor) 1271END(ia64_leave_hypervisor)
1275// fall through 1272// fall through
1276GLOBAL_ENTRY(ia64_vmm_entry) 1273GLOBAL_ENTRY(ia64_vmm_entry)
@@ -1283,16 +1280,14 @@ GLOBAL_ENTRY(ia64_vmm_entry)
1283 * r22:b0 1280 * r22:b0
1284 * r23:predicate 1281 * r23:predicate
1285 */ 1282 */
1286 mov r24=r22 1283 mov r24=r22
1287 mov r25=r18 1284 mov r25=r18
1288 tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic 1285 tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
1289 (p1) br.cond.sptk.few kvm_vps_resume_normal 1286(p1) br.cond.sptk.few kvm_vps_resume_normal
1290 (p2) br.cond.sptk.many kvm_vps_resume_handler 1287(p2) br.cond.sptk.many kvm_vps_resume_handler
1291 ;; 1288 ;;
1292END(ia64_vmm_entry) 1289END(ia64_vmm_entry)
1293 1290
1294
1295
1296/* 1291/*
1297 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, 1292 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
1298 * u64 arg3, u64 arg4, u64 arg5, 1293 * u64 arg3, u64 arg4, u64 arg5,
@@ -1310,88 +1305,88 @@ psrsave = loc2
1310entry = loc3 1305entry = loc3
1311hostret = r24 1306hostret = r24
1312 1307
1313 alloc pfssave=ar.pfs,4,4,0,0 1308 alloc pfssave=ar.pfs,4,4,0,0
1314 mov rpsave=rp 1309 mov rpsave=rp
1315 adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 1310 adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
1316 ;; 1311 ;;
1317 ld8 entry=[entry] 1312 ld8 entry=[entry]
13181: mov hostret=ip 13131: mov hostret=ip
1319 mov r25=in1 // copy arguments 1314 mov r25=in1 // copy arguments
1320 mov r26=in2 1315 mov r26=in2
1321 mov r27=in3 1316 mov r27=in3
1322 mov psrsave=psr 1317 mov psrsave=psr
1323 ;; 1318 ;;
1324 tbit.nz p6,p0=psrsave,14 // IA64_PSR_I 1319 tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
1325 tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC 1320 tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
1326 ;; 1321 ;;
1327 add hostret=2f-1b,hostret // calculate return address 1322 add hostret=2f-1b,hostret // calculate return address
1328 add entry=entry,in0 1323 add entry=entry,in0
1329 ;; 1324 ;;
1330 rsm psr.i | psr.ic 1325 rsm psr.i | psr.ic
1331 ;; 1326 ;;
1332 srlz.i 1327 srlz.i
1333 mov b6=entry 1328 mov b6=entry
1334 br.cond.sptk b6 // call the service 1329 br.cond.sptk b6 // call the service
13352: 13302:
1336 // Architectural sequence for enabling interrupts if necessary 1331// Architectural sequence for enabling interrupts if necessary
1337(p7) ssm psr.ic 1332(p7) ssm psr.ic
1338 ;; 1333 ;;
1339(p7) srlz.i 1334(p7) srlz.i
1340 ;; 1335 ;;
1341//(p6) ssm psr.i 1336//(p6) ssm psr.i
1342 ;; 1337 ;;
1343 mov rp=rpsave 1338 mov rp=rpsave
1344 mov ar.pfs=pfssave 1339 mov ar.pfs=pfssave
1345 mov r8=r31 1340 mov r8=r31
1346 ;; 1341 ;;
1347 srlz.d 1342 srlz.d
1348 br.ret.sptk rp 1343 br.ret.sptk rp
1349 1344
1350END(ia64_call_vsa) 1345END(ia64_call_vsa)
1351 1346
1352#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) 1347#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100)
1353 1348
1354GLOBAL_ENTRY(vmm_reset_entry) 1349GLOBAL_ENTRY(vmm_reset_entry)
1355 //set up ipsr, iip, vpd.vpsr, dcr 1350 //set up ipsr, iip, vpd.vpsr, dcr
1356 // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 1351 // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
1357 // For DCR: all bits 0 1352 // For DCR: all bits 0
1358 bsw.0 1353 bsw.0
1359 ;; 1354 ;;
1360 mov r21 =r13 1355 mov r21 =r13
1361 adds r14=-VMM_PT_REGS_SIZE, r12 1356 adds r14=-VMM_PT_REGS_SIZE, r12
1362 ;; 1357 ;;
1363 movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 1358 movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
1364 movl r10=0x8000000000000000 1359 movl r10=0x8000000000000000
1365 adds r16=PT(CR_IIP), r14 1360 adds r16=PT(CR_IIP), r14
1366 adds r20=PT(R1), r14 1361 adds r20=PT(R1), r14
1367 ;; 1362 ;;
1368 rsm psr.ic | psr.i 1363 rsm psr.ic | psr.i
1369 ;; 1364 ;;
1370 srlz.i 1365 srlz.i
1371 ;; 1366 ;;
1372 mov ar.rsc = 0 1367 mov ar.rsc = 0
1373 ;; 1368 ;;
1374 flushrs 1369 flushrs
1375 ;; 1370 ;;
1376 mov ar.bspstore = 0 1371 mov ar.bspstore = 0
1377 // clear BSPSTORE 1372 // clear BSPSTORE
1378 ;; 1373 ;;
1379 mov cr.ipsr=r6 1374 mov cr.ipsr=r6
1380 mov cr.ifs=r10 1375 mov cr.ifs=r10
1381 ld8 r4 = [r16] // Set init iip for first run. 1376 ld8 r4 = [r16] // Set init iip for first run.
1382 ld8 r1 = [r20] 1377 ld8 r1 = [r20]
1383 ;; 1378 ;;
1384 mov cr.iip=r4 1379 mov cr.iip=r4
1385 adds r16=VMM_VPD_BASE_OFFSET,r13 1380 adds r16=VMM_VPD_BASE_OFFSET,r13
1386 ;; 1381 ;;
1387 ld8 r18=[r16] 1382 ld8 r18=[r16]
1388 ;; 1383 ;;
1389 adds r19=VMM_VPD_VPSR_OFFSET,r18 1384 adds r19=VMM_VPD_VPSR_OFFSET,r18
1390 ;; 1385 ;;
1391 ld8 r19=[r19] 1386 ld8 r19=[r19]
1392 mov r17=r0 1387 mov r17=r0
1393 mov r22=r0 1388 mov r22=r0
1394 mov r23=r0 1389 mov r23=r0
1395 br.cond.sptk ia64_vmm_entry 1390 br.cond.sptk ia64_vmm_entry
1396 br.ret.sptk b0 1391 br.ret.sptk b0
1397END(vmm_reset_entry) 1392END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index e22b93361e08..6b6307a3bd55 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
183 u64 i, dirty_pages = 1; 183 u64 i, dirty_pages = 1;
184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; 184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
185 spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); 185 spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
186 void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) 186 void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
187 + KVM_MEM_DIRTY_LOG_OFS; 187
188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; 188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
189 189
190 vmm_spin_lock(lock); 190 vmm_spin_lock(lock);
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 0c66dbdd1d72..66fd705e82c0 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -227,14 +227,14 @@ finish_up:
227 return new_irq_info; 227 return new_irq_info;
228} 228}
229 229
230static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) 230static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
231{ 231{
232 struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; 232 struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
233 nasid_t nasid; 233 nasid_t nasid;
234 int slice; 234 int slice;
235 235
236 nasid = cpuid_to_nasid(first_cpu(mask)); 236 nasid = cpuid_to_nasid(cpumask_first(mask));
237 slice = cpuid_to_slice(first_cpu(mask)); 237 slice = cpuid_to_slice(cpumask_first(mask));
238 238
239 list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, 239 list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
240 sn_irq_lh[irq], list) 240 sn_irq_lh[irq], list)
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 83f190ffe350..ca553b0429ce 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -151,7 +151,8 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
151} 151}
152 152
153#ifdef CONFIG_SMP 153#ifdef CONFIG_SMP
154static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) 154static void sn_set_msi_irq_affinity(unsigned int irq,
155 const struct cpumask *cpu_mask)
155{ 156{
156 struct msi_msg msg; 157 struct msi_msg msg;
157 int slice; 158 int slice;
@@ -164,7 +165,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
164 struct sn_pcibus_provider *provider; 165 struct sn_pcibus_provider *provider;
165 unsigned int cpu; 166 unsigned int cpu;
166 167
167 cpu = first_cpu(cpu_mask); 168 cpu = cpumask_first(cpu_mask);
168 sn_irq_info = sn_msi_info[irq].sn_irq_info; 169 sn_irq_info = sn_msi_info[irq].sn_irq_info;
169 if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) 170 if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
170 return; 171 return;
@@ -204,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
204 msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); 205 msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
205 206
206 write_msi_msg(irq, &msg); 207 write_msi_msg(irq, &msg);
207 irq_desc[irq].affinity = cpu_mask; 208 irq_desc[irq].affinity = *cpu_mask;
208} 209}
209#endif /* CONFIG_SMP */ 210#endif /* CONFIG_SMP */
210 211
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 29047d5c259a..cabba332cc48 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -10,6 +10,7 @@ config M32R
10 default y 10 default y
11 select HAVE_IDE 11 select HAVE_IDE
12 select HAVE_OPROFILE 12 select HAVE_OPROFILE
13 select INIT_ALL_POSSIBLE
13 14
14config SBUS 15config SBUS
15 bool 16 bool
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index 39cb6da72dcb..0f06b3722e96 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -73,17 +73,11 @@ static unsigned int bsp_phys_id = -1;
73/* Bitmask of physically existing CPUs */ 73/* Bitmask of physically existing CPUs */
74physid_mask_t phys_cpu_present_map; 74physid_mask_t phys_cpu_present_map;
75 75
76/* Bitmask of currently online CPUs */
77cpumask_t cpu_online_map;
78EXPORT_SYMBOL(cpu_online_map);
79
80cpumask_t cpu_bootout_map; 76cpumask_t cpu_bootout_map;
81cpumask_t cpu_bootin_map; 77cpumask_t cpu_bootin_map;
82static cpumask_t cpu_callin_map; 78static cpumask_t cpu_callin_map;
83cpumask_t cpu_callout_map; 79cpumask_t cpu_callout_map;
84EXPORT_SYMBOL(cpu_callout_map); 80EXPORT_SYMBOL(cpu_callout_map);
85cpumask_t cpu_possible_map = CPU_MASK_ALL;
86EXPORT_SYMBOL(cpu_possible_map);
87 81
88/* Per CPU bogomips and other parameters */ 82/* Per CPU bogomips and other parameters */
89struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned; 83struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned;
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 836fb66f080d..c825bde17cb3 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -280,7 +280,6 @@ config M68060
280 280
281config MMU_MOTOROLA 281config MMU_MOTOROLA
282 bool 282 bool
283 depends on MMU && !MMU_SUN3
284 283
285config MMU_SUN3 284config MMU_SUN3
286 bool 285 bool
diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c
index c5b916700b22..2a12e7fa9748 100644
--- a/arch/m68knommu/platform/coldfire/pit.c
+++ b/arch/m68knommu/platform/coldfire/pit.c
@@ -156,7 +156,7 @@ void hw_timer_init(void)
156{ 156{
157 u32 imr; 157 u32 imr;
158 158
159 cf_pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 159 cf_pit_clockevent.cpumask = cpumask_of(smp_processor_id());
160 cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32); 160 cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32);
161 cf_pit_clockevent.max_delta_ns = 161 cf_pit_clockevent.max_delta_ns =
162 clockevent_delta2ns(0xFFFF, &cf_pit_clockevent); 162 clockevent_delta2ns(0xFFFF, &cf_pit_clockevent);
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index a58f0eecc68f..abc62aa744ac 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -49,7 +49,8 @@ static inline void smtc_im_ack_irq(unsigned int irq)
49#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF 49#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
50#include <linux/cpumask.h> 50#include <linux/cpumask.h>
51 51
52extern void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity); 52extern void plat_set_irq_affinity(unsigned int irq,
53 const struct cpumask *affinity);
53extern void smtc_forward_irq(unsigned int irq); 54extern void smtc_forward_irq(unsigned int irq);
54 55
55/* 56/*
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 7785bec732f2..1fb959f98982 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -37,7 +37,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
37 37
38/* sched_domains SD_NODE_INIT for SGI IP27 machines */ 38/* sched_domains SD_NODE_INIT for SGI IP27 machines */
39#define SD_NODE_INIT (struct sched_domain) { \ 39#define SD_NODE_INIT (struct sched_domain) { \
40 .span = CPU_MASK_NONE, \
41 .parent = NULL, \ 40 .parent = NULL, \
42 .child = NULL, \ 41 .child = NULL, \
43 .groups = NULL, \ 42 .groups = NULL, \
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 0ff5b523ea77..86557b5d1b3f 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -38,9 +38,6 @@ extern int __cpu_logical_map[NR_CPUS];
38#define SMP_RESCHEDULE_YOURSELF 0x1 /* XXX braindead */ 38#define SMP_RESCHEDULE_YOURSELF 0x1 /* XXX braindead */
39#define SMP_CALL_FUNCTION 0x2 39#define SMP_CALL_FUNCTION 0x2
40 40
41extern cpumask_t phys_cpu_present_map;
42#define cpu_possible_map phys_cpu_present_map
43
44extern void asmlinkage smp_bootstrap(void); 41extern void asmlinkage smp_bootstrap(void);
45 42
46/* 43/*
diff --git a/arch/mips/jazz/irq.c b/arch/mips/jazz/irq.c
index d7f8a782aae4..03965cb1b252 100644
--- a/arch/mips/jazz/irq.c
+++ b/arch/mips/jazz/irq.c
@@ -146,7 +146,7 @@ void __init plat_time_init(void)
146 146
147 BUG_ON(HZ != 100); 147 BUG_ON(HZ != 100);
148 148
149 cd->cpumask = cpumask_of_cpu(cpu); 149 cd->cpumask = cpumask_of(cpu);
150 clockevents_register_device(cd); 150 clockevents_register_device(cd);
151 action->dev_id = cd; 151 action->dev_id = cd;
152 setup_irq(JAZZ_TIMER_IRQ, action); 152 setup_irq(JAZZ_TIMER_IRQ, action);
diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c
index 0a57f86945f1..b820661678b0 100644
--- a/arch/mips/kernel/cevt-bcm1480.c
+++ b/arch/mips/kernel/cevt-bcm1480.c
@@ -126,7 +126,7 @@ void __cpuinit sb1480_clockevent_init(void)
126 cd->min_delta_ns = clockevent_delta2ns(2, cd); 126 cd->min_delta_ns = clockevent_delta2ns(2, cd);
127 cd->rating = 200; 127 cd->rating = 200;
128 cd->irq = irq; 128 cd->irq = irq;
129 cd->cpumask = cpumask_of_cpu(cpu); 129 cd->cpumask = cpumask_of(cpu);
130 cd->set_next_event = sibyte_next_event; 130 cd->set_next_event = sibyte_next_event;
131 cd->set_mode = sibyte_set_mode; 131 cd->set_mode = sibyte_set_mode;
132 clockevents_register_device(cd); 132 clockevents_register_device(cd);
@@ -148,6 +148,6 @@ void __cpuinit sb1480_clockevent_init(void)
148 action->name = name; 148 action->name = name;
149 action->dev_id = cd; 149 action->dev_id = cd;
150 150
151 irq_set_affinity(irq, cpumask_of_cpu(cpu)); 151 irq_set_affinity(irq, cpumask_of(cpu));
152 setup_irq(irq, action); 152 setup_irq(irq, action);
153} 153}
diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c
index df4acb68bfb5..1ada45ea0700 100644
--- a/arch/mips/kernel/cevt-ds1287.c
+++ b/arch/mips/kernel/cevt-ds1287.c
@@ -88,7 +88,6 @@ static void ds1287_event_handler(struct clock_event_device *dev)
88static struct clock_event_device ds1287_clockevent = { 88static struct clock_event_device ds1287_clockevent = {
89 .name = "ds1287", 89 .name = "ds1287",
90 .features = CLOCK_EVT_FEAT_PERIODIC, 90 .features = CLOCK_EVT_FEAT_PERIODIC,
91 .cpumask = CPU_MASK_CPU0,
92 .set_next_event = ds1287_set_next_event, 91 .set_next_event = ds1287_set_next_event,
93 .set_mode = ds1287_set_mode, 92 .set_mode = ds1287_set_mode,
94 .event_handler = ds1287_event_handler, 93 .event_handler = ds1287_event_handler,
@@ -122,6 +121,7 @@ int __init ds1287_clockevent_init(int irq)
122 clockevent_set_clock(cd, 32768); 121 clockevent_set_clock(cd, 32768);
123 cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); 122 cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
124 cd->min_delta_ns = clockevent_delta2ns(0x300, cd); 123 cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
124 cd->cpumask = cpumask_of(0);
125 125
126 clockevents_register_device(&ds1287_clockevent); 126 clockevents_register_device(&ds1287_clockevent);
127 127
diff --git a/arch/mips/kernel/cevt-gt641xx.c b/arch/mips/kernel/cevt-gt641xx.c
index 6e2f58520afb..e9b787feedcb 100644
--- a/arch/mips/kernel/cevt-gt641xx.c
+++ b/arch/mips/kernel/cevt-gt641xx.c
@@ -96,7 +96,6 @@ static void gt641xx_timer0_event_handler(struct clock_event_device *dev)
96static struct clock_event_device gt641xx_timer0_clockevent = { 96static struct clock_event_device gt641xx_timer0_clockevent = {
97 .name = "gt641xx-timer0", 97 .name = "gt641xx-timer0",
98 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 98 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
99 .cpumask = CPU_MASK_CPU0,
100 .irq = GT641XX_TIMER0_IRQ, 99 .irq = GT641XX_TIMER0_IRQ,
101 .set_next_event = gt641xx_timer0_set_next_event, 100 .set_next_event = gt641xx_timer0_set_next_event,
102 .set_mode = gt641xx_timer0_set_mode, 101 .set_mode = gt641xx_timer0_set_mode,
@@ -132,6 +131,7 @@ static int __init gt641xx_timer0_clockevent_init(void)
132 clockevent_set_clock(cd, gt641xx_base_clock); 131 clockevent_set_clock(cd, gt641xx_base_clock);
133 cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); 132 cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
134 cd->min_delta_ns = clockevent_delta2ns(0x300, cd); 133 cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
134 cd->cpumask = cpumask_of(0);
135 135
136 clockevents_register_device(&gt641xx_timer0_clockevent); 136 clockevents_register_device(&gt641xx_timer0_clockevent);
137 137
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 4a4c59f2737a..e1ec83b68031 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -195,7 +195,7 @@ int __cpuinit mips_clockevent_init(void)
195 195
196 cd->rating = 300; 196 cd->rating = 300;
197 cd->irq = irq; 197 cd->irq = irq;
198 cd->cpumask = cpumask_of_cpu(cpu); 198 cd->cpumask = cpumask_of(cpu);
199 cd->set_next_event = mips_next_event; 199 cd->set_next_event = mips_next_event;
200 cd->set_mode = mips_set_clock_mode; 200 cd->set_mode = mips_set_clock_mode;
201 cd->event_handler = mips_event_handler; 201 cd->event_handler = mips_event_handler;
diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c
index 63ac3ad462bc..a2eebaafda52 100644
--- a/arch/mips/kernel/cevt-sb1250.c
+++ b/arch/mips/kernel/cevt-sb1250.c
@@ -125,7 +125,7 @@ void __cpuinit sb1250_clockevent_init(void)
125 cd->min_delta_ns = clockevent_delta2ns(2, cd); 125 cd->min_delta_ns = clockevent_delta2ns(2, cd);
126 cd->rating = 200; 126 cd->rating = 200;
127 cd->irq = irq; 127 cd->irq = irq;
128 cd->cpumask = cpumask_of_cpu(cpu); 128 cd->cpumask = cpumask_of(cpu);
129 cd->set_next_event = sibyte_next_event; 129 cd->set_next_event = sibyte_next_event;
130 cd->set_mode = sibyte_set_mode; 130 cd->set_mode = sibyte_set_mode;
131 clockevents_register_device(cd); 131 clockevents_register_device(cd);
@@ -147,6 +147,6 @@ void __cpuinit sb1250_clockevent_init(void)
147 action->name = name; 147 action->name = name;
148 action->dev_id = cd; 148 action->dev_id = cd;
149 149
150 irq_set_affinity(irq, cpumask_of_cpu(cpu)); 150 irq_set_affinity(irq, cpumask_of(cpu));
151 setup_irq(irq, action); 151 setup_irq(irq, action);
152} 152}
diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
index 5162fe4b5952..6d45e24db5bf 100644
--- a/arch/mips/kernel/cevt-smtc.c
+++ b/arch/mips/kernel/cevt-smtc.c
@@ -292,7 +292,7 @@ int __cpuinit mips_clockevent_init(void)
292 292
293 cd->rating = 300; 293 cd->rating = 300;
294 cd->irq = irq; 294 cd->irq = irq;
295 cd->cpumask = cpumask_of_cpu(cpu); 295 cd->cpumask = cpumask_of(cpu);
296 cd->set_next_event = mips_next_event; 296 cd->set_next_event = mips_next_event;
297 cd->set_mode = mips_set_clock_mode; 297 cd->set_mode = mips_set_clock_mode;
298 cd->event_handler = mips_event_handler; 298 cd->event_handler = mips_event_handler;
diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c
index b5fc4eb412d2..eccf7d6096bd 100644
--- a/arch/mips/kernel/cevt-txx9.c
+++ b/arch/mips/kernel/cevt-txx9.c
@@ -112,7 +112,6 @@ static struct clock_event_device txx9tmr_clock_event_device = {
112 .name = "TXx9", 112 .name = "TXx9",
113 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 113 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
114 .rating = 200, 114 .rating = 200,
115 .cpumask = CPU_MASK_CPU0,
116 .set_mode = txx9tmr_set_mode, 115 .set_mode = txx9tmr_set_mode,
117 .set_next_event = txx9tmr_set_next_event, 116 .set_next_event = txx9tmr_set_next_event,
118}; 117};
@@ -150,6 +149,7 @@ void __init txx9_clockevent_init(unsigned long baseaddr, int irq,
150 clockevent_delta2ns(0xffffffff >> (32 - TXX9_TIMER_BITS), cd); 149 clockevent_delta2ns(0xffffffff >> (32 - TXX9_TIMER_BITS), cd);
151 cd->min_delta_ns = clockevent_delta2ns(0xf, cd); 150 cd->min_delta_ns = clockevent_delta2ns(0xf, cd);
152 cd->irq = irq; 151 cd->irq = irq;
152 cd->cpumask = cpumask_of(0),
153 clockevents_register_device(cd); 153 clockevents_register_device(cd);
154 setup_irq(irq, &txx9tmr_irq); 154 setup_irq(irq, &txx9tmr_irq);
155 printk(KERN_INFO "TXx9: clockevent device at 0x%lx, irq %d\n", 155 printk(KERN_INFO "TXx9: clockevent device at 0x%lx, irq %d\n",
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index b6ac55162b9a..f4d187825f96 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -115,7 +115,7 @@ void __init setup_pit_timer(void)
115 * Start pit with the boot cpu mask and make it global after the 115 * Start pit with the boot cpu mask and make it global after the
116 * IO_APIC has been initialized. 116 * IO_APIC has been initialized.
117 */ 117 */
118 cd->cpumask = cpumask_of_cpu(cpu); 118 cd->cpumask = cpumask_of(cpu);
119 clockevent_set_clock(cd, CLOCK_TICK_RATE); 119 clockevent_set_clock(cd, CLOCK_TICK_RATE);
120 cd->max_delta_ns = clockevent_delta2ns(0x7FFF, cd); 120 cd->max_delta_ns = clockevent_delta2ns(0x7FFF, cd);
121 cd->min_delta_ns = clockevent_delta2ns(0xF, cd); 121 cd->min_delta_ns = clockevent_delta2ns(0xF, cd);
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index f0a4bb19e096..494a49a317e9 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq)
155 155
156static DEFINE_SPINLOCK(gic_lock); 156static DEFINE_SPINLOCK(gic_lock);
157 157
158static void gic_set_affinity(unsigned int irq, cpumask_t cpumask) 158static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
159{ 159{
160 cpumask_t tmp = CPU_MASK_NONE; 160 cpumask_t tmp = CPU_MASK_NONE;
161 unsigned long flags; 161 unsigned long flags;
@@ -164,7 +164,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
164 pr_debug(KERN_DEBUG "%s called\n", __func__); 164 pr_debug(KERN_DEBUG "%s called\n", __func__);
165 irq -= _irqbase; 165 irq -= _irqbase;
166 166
167 cpus_and(tmp, cpumask, cpu_online_map); 167 cpumask_and(&tmp, cpumask, cpu_online_mask);
168 if (cpus_empty(tmp)) 168 if (cpus_empty(tmp))
169 return; 169 return;
170 170
@@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
187 set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask); 187 set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
188 188
189 } 189 }
190 irq_desc[irq].affinity = cpumask; 190 irq_desc[irq].affinity = *cpumask;
191 spin_unlock_irqrestore(&gic_lock, flags); 191 spin_unlock_irqrestore(&gic_lock, flags);
192 192
193} 193}
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index ca476c4f62a5..f27beca4b26d 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -51,10 +51,10 @@ static int __init allowcpus(char *str)
51 int len; 51 int len;
52 52
53 cpus_clear(cpu_allow_map); 53 cpus_clear(cpu_allow_map);
54 if (cpulist_parse(str, cpu_allow_map) == 0) { 54 if (cpulist_parse(str, &cpu_allow_map) == 0) {
55 cpu_set(0, cpu_allow_map); 55 cpu_set(0, cpu_allow_map);
56 cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map); 56 cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map);
57 len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map); 57 len = cpulist_scnprintf(buf, sizeof(buf)-1, &cpu_possible_map);
58 buf[len] = '\0'; 58 buf[len] = '\0';
59 pr_debug("Allowable CPUs: %s\n", buf); 59 pr_debug("Allowable CPUs: %s\n", buf);
60 return 1; 60 return 1;
@@ -226,7 +226,7 @@ void __init cmp_smp_setup(void)
226 226
227 for (i = 1; i < NR_CPUS; i++) { 227 for (i = 1; i < NR_CPUS; i++) {
228 if (amon_cpu_avail(i)) { 228 if (amon_cpu_avail(i)) {
229 cpu_set(i, phys_cpu_present_map); 229 cpu_set(i, cpu_possible_map);
230 __cpu_number_map[i] = ++ncpu; 230 __cpu_number_map[i] = ++ncpu;
231 __cpu_logical_map[ncpu] = i; 231 __cpu_logical_map[ncpu] = i;
232 } 232 }
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 87a1816c1f45..6f7ee5ac46ee 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -70,7 +70,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
70 write_vpe_c0_vpeconf0(tmp); 70 write_vpe_c0_vpeconf0(tmp);
71 71
72 /* Record this as available CPU */ 72 /* Record this as available CPU */
73 cpu_set(tc, phys_cpu_present_map); 73 cpu_set(tc, cpu_possible_map);
74 __cpu_number_map[tc] = ++ncpu; 74 __cpu_number_map[tc] = ++ncpu;
75 __cpu_logical_map[ncpu] = tc; 75 __cpu_logical_map[ncpu] = tc;
76 } 76 }
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 8bf88faf5afd..3da94704f816 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -44,15 +44,10 @@
44#include <asm/mipsmtregs.h> 44#include <asm/mipsmtregs.h>
45#endif /* CONFIG_MIPS_MT_SMTC */ 45#endif /* CONFIG_MIPS_MT_SMTC */
46 46
47cpumask_t phys_cpu_present_map; /* Bitmask of available CPUs */
48volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ 47volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */
49cpumask_t cpu_online_map; /* Bitmask of currently online CPUs */
50int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ 48int __cpu_number_map[NR_CPUS]; /* Map physical to logical */
51int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ 49int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */
52 50
53EXPORT_SYMBOL(phys_cpu_present_map);
54EXPORT_SYMBOL(cpu_online_map);
55
56extern void cpu_idle(void); 51extern void cpu_idle(void);
57 52
58/* Number of TCs (or siblings in Intel speak) per CPU core */ 53/* Number of TCs (or siblings in Intel speak) per CPU core */
@@ -195,7 +190,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
195/* preload SMP state for boot cpu */ 190/* preload SMP state for boot cpu */
196void __devinit smp_prepare_boot_cpu(void) 191void __devinit smp_prepare_boot_cpu(void)
197{ 192{
198 cpu_set(0, phys_cpu_present_map); 193 cpu_set(0, cpu_possible_map);
199 cpu_set(0, cpu_online_map); 194 cpu_set(0, cpu_online_map);
200 cpu_set(0, cpu_callin_map); 195 cpu_set(0, cpu_callin_map);
201} 196}
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 897fb2b4751c..b6cca01ff82b 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -290,7 +290,7 @@ static void smtc_configure_tlb(void)
290 * possibly leave some TCs/VPEs as "slave" processors. 290 * possibly leave some TCs/VPEs as "slave" processors.
291 * 291 *
292 * Use c0_MVPConf0 to find out how many TCs are available, setting up 292 * Use c0_MVPConf0 to find out how many TCs are available, setting up
293 * phys_cpu_present_map and the logical/physical mappings. 293 * cpu_possible_map and the logical/physical mappings.
294 */ 294 */
295 295
296int __init smtc_build_cpu_map(int start_cpu_slot) 296int __init smtc_build_cpu_map(int start_cpu_slot)
@@ -304,7 +304,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot)
304 */ 304 */
305 ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1; 305 ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
306 for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) { 306 for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) {
307 cpu_set(i, phys_cpu_present_map); 307 cpu_set(i, cpu_possible_map);
308 __cpu_number_map[i] = i; 308 __cpu_number_map[i] = i;
309 __cpu_logical_map[i] = i; 309 __cpu_logical_map[i] = i;
310 } 310 }
@@ -521,7 +521,7 @@ void smtc_prepare_cpus(int cpus)
521 * Pull any physically present but unused TCs out of circulation. 521 * Pull any physically present but unused TCs out of circulation.
522 */ 522 */
523 while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) { 523 while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) {
524 cpu_clear(tc, phys_cpu_present_map); 524 cpu_clear(tc, cpu_possible_map);
525 cpu_clear(tc, cpu_present_map); 525 cpu_clear(tc, cpu_present_map);
526 tc++; 526 tc++;
527 } 527 }
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index f84a46a8ae6e..aabd7274507b 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -114,9 +114,9 @@ struct plat_smp_ops msmtc_smp_ops = {
114 */ 114 */
115 115
116 116
117void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity) 117void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
118{ 118{
119 cpumask_t tmask = affinity; 119 cpumask_t tmask = *affinity;
120 int cpu = 0; 120 int cpu = 0;
121 void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff); 121 void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff);
122 122
@@ -139,7 +139,7 @@ void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity)
139 * be made to forward to an offline "CPU". 139 * be made to forward to an offline "CPU".
140 */ 140 */
141 141
142 for_each_cpu_mask(cpu, affinity) { 142 for_each_cpu(cpu, affinity) {
143 if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu)) 143 if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu))
144 cpu_clear(cpu, tmask); 144 cpu_clear(cpu, tmask);
145 } 145 }
diff --git a/arch/mips/nxp/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c
index 62f495b57f93..cf293b279098 100644
--- a/arch/mips/nxp/pnx8550/common/time.c
+++ b/arch/mips/nxp/pnx8550/common/time.c
@@ -102,6 +102,7 @@ __init void plat_time_init(void)
102 unsigned int p; 102 unsigned int p;
103 unsigned int pow2p; 103 unsigned int pow2p;
104 104
105 pnx8xxx_clockevent.cpumask = cpu_none_mask;
105 clockevents_register_device(&pnx8xxx_clockevent); 106 clockevents_register_device(&pnx8xxx_clockevent);
106 clocksource_register(&pnx_clocksource); 107 clocksource_register(&pnx_clocksource);
107 108
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index 3a7df647ca77..f78c29b68d77 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -141,7 +141,7 @@ static void __cpuinit yos_boot_secondary(int cpu, struct task_struct *idle)
141} 141}
142 142
143/* 143/*
144 * Detect available CPUs, populate phys_cpu_present_map before smp_init 144 * Detect available CPUs, populate cpu_possible_map before smp_init
145 * 145 *
146 * We don't want to start the secondary CPU yet nor do we have a nice probing 146 * We don't want to start the secondary CPU yet nor do we have a nice probing
147 * feature in PMON so we just assume presence of the secondary core. 147 * feature in PMON so we just assume presence of the secondary core.
@@ -150,10 +150,10 @@ static void __init yos_smp_setup(void)
150{ 150{
151 int i; 151 int i;
152 152
153 cpus_clear(phys_cpu_present_map); 153 cpus_clear(cpu_possible_map);
154 154
155 for (i = 0; i < 2; i++) { 155 for (i = 0; i < 2; i++) {
156 cpu_set(i, phys_cpu_present_map); 156 cpu_set(i, cpu_possible_map);
157 __cpu_number_map[i] = i; 157 __cpu_number_map[i] = i;
158 __cpu_logical_map[i] = i; 158 __cpu_logical_map[i] = i;
159 } 159 }
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index ba5cdebeaf0d..5b47d6b65275 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -76,7 +76,7 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
76 /* Only let it join in if it's marked enabled */ 76 /* Only let it join in if it's marked enabled */
77 if ((acpu->cpu_info.flags & KLINFO_ENABLE) && 77 if ((acpu->cpu_info.flags & KLINFO_ENABLE) &&
78 (tot_cpus_found != NR_CPUS)) { 78 (tot_cpus_found != NR_CPUS)) {
79 cpu_set(cpuid, phys_cpu_present_map); 79 cpu_set(cpuid, cpu_possible_map);
80 alloc_cpupda(cpuid, tot_cpus_found); 80 alloc_cpupda(cpuid, tot_cpus_found);
81 cpus_found++; 81 cpus_found++;
82 tot_cpus_found++; 82 tot_cpus_found++;
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 1327c2746fb7..f024057a35f8 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -134,7 +134,7 @@ void __cpuinit hub_rt_clock_event_init(void)
134 cd->min_delta_ns = clockevent_delta2ns(0x300, cd); 134 cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
135 cd->rating = 200; 135 cd->rating = 200;
136 cd->irq = irq; 136 cd->irq = irq;
137 cd->cpumask = cpumask_of_cpu(cpu); 137 cd->cpumask = cpumask_of(cpu);
138 cd->set_next_event = rt_next_event; 138 cd->set_next_event = rt_next_event;
139 cd->set_mode = rt_set_mode; 139 cd->set_mode = rt_set_mode;
140 clockevents_register_device(cd); 140 clockevents_register_device(cd);
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index a35818ed4263..12b465d404df 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq);
50static void disable_bcm1480_irq(unsigned int irq); 50static void disable_bcm1480_irq(unsigned int irq);
51static void ack_bcm1480_irq(unsigned int irq); 51static void ack_bcm1480_irq(unsigned int irq);
52#ifdef CONFIG_SMP 52#ifdef CONFIG_SMP
53static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask); 53static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
54#endif 54#endif
55 55
56#ifdef CONFIG_PCI 56#ifdef CONFIG_PCI
@@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq)
109} 109}
110 110
111#ifdef CONFIG_SMP 111#ifdef CONFIG_SMP
112static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask) 112static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
113{ 113{
114 int i = 0, old_cpu, cpu, int_on, k; 114 int i = 0, old_cpu, cpu, int_on, k;
115 u64 cur_ints; 115 u64 cur_ints;
@@ -117,11 +117,11 @@ static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask)
117 unsigned long flags; 117 unsigned long flags;
118 unsigned int irq_dirty; 118 unsigned int irq_dirty;
119 119
120 if (cpus_weight(mask) != 1) { 120 if (cpumask_weight(mask) != 1) {
121 printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); 121 printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
122 return; 122 return;
123 } 123 }
124 i = first_cpu(mask); 124 i = cpumask_first(mask);
125 125
126 /* Convert logical CPU to physical CPU */ 126 /* Convert logical CPU to physical CPU */
127 cpu = cpu_logical_map(i); 127 cpu = cpu_logical_map(i);
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index bd9eeb43ed0e..dddfda8e8294 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -136,7 +136,7 @@ static void __cpuinit bcm1480_boot_secondary(int cpu, struct task_struct *idle)
136 136
137/* 137/*
138 * Use CFE to find out how many CPUs are available, setting up 138 * Use CFE to find out how many CPUs are available, setting up
139 * phys_cpu_present_map and the logical/physical mappings. 139 * cpu_possible_map and the logical/physical mappings.
140 * XXXKW will the boot CPU ever not be physical 0? 140 * XXXKW will the boot CPU ever not be physical 0?
141 * 141 *
142 * Common setup before any secondaries are started 142 * Common setup before any secondaries are started
@@ -145,14 +145,14 @@ static void __init bcm1480_smp_setup(void)
145{ 145{
146 int i, num; 146 int i, num;
147 147
148 cpus_clear(phys_cpu_present_map); 148 cpus_clear(cpu_possible_map);
149 cpu_set(0, phys_cpu_present_map); 149 cpu_set(0, cpu_possible_map);
150 __cpu_number_map[0] = 0; 150 __cpu_number_map[0] = 0;
151 __cpu_logical_map[0] = 0; 151 __cpu_logical_map[0] = 0;
152 152
153 for (i = 1, num = 0; i < NR_CPUS; i++) { 153 for (i = 1, num = 0; i < NR_CPUS; i++) {
154 if (cfe_cpu_stop(i) == 0) { 154 if (cfe_cpu_stop(i) == 0) {
155 cpu_set(i, phys_cpu_present_map); 155 cpu_set(i, cpu_possible_map);
156 __cpu_number_map[i] = ++num; 156 __cpu_number_map[i] = ++num;
157 __cpu_logical_map[num] = i; 157 __cpu_logical_map[num] = i;
158 } 158 }
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index a5158483986e..808ac2959b8c 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq);
50static void disable_sb1250_irq(unsigned int irq); 50static void disable_sb1250_irq(unsigned int irq);
51static void ack_sb1250_irq(unsigned int irq); 51static void ack_sb1250_irq(unsigned int irq);
52#ifdef CONFIG_SMP 52#ifdef CONFIG_SMP
53static void sb1250_set_affinity(unsigned int irq, cpumask_t mask); 53static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
54#endif 54#endif
55 55
56#ifdef CONFIG_SIBYTE_HAS_LDT 56#ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,16 +103,16 @@ void sb1250_unmask_irq(int cpu, int irq)
103} 103}
104 104
105#ifdef CONFIG_SMP 105#ifdef CONFIG_SMP
106static void sb1250_set_affinity(unsigned int irq, cpumask_t mask) 106static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
107{ 107{
108 int i = 0, old_cpu, cpu, int_on; 108 int i = 0, old_cpu, cpu, int_on;
109 u64 cur_ints; 109 u64 cur_ints;
110 struct irq_desc *desc = irq_desc + irq; 110 struct irq_desc *desc = irq_desc + irq;
111 unsigned long flags; 111 unsigned long flags;
112 112
113 i = first_cpu(mask); 113 i = cpumask_first(mask);
114 114
115 if (cpus_weight(mask) > 1) { 115 if (cpumask_weight(mask) > 1) {
116 printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); 116 printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
117 return; 117 return;
118 } 118 }
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index 0734b933e969..5950a288a7da 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -124,7 +124,7 @@ static void __cpuinit sb1250_boot_secondary(int cpu, struct task_struct *idle)
124 124
125/* 125/*
126 * Use CFE to find out how many CPUs are available, setting up 126 * Use CFE to find out how many CPUs are available, setting up
127 * phys_cpu_present_map and the logical/physical mappings. 127 * cpu_possible_map and the logical/physical mappings.
128 * XXXKW will the boot CPU ever not be physical 0? 128 * XXXKW will the boot CPU ever not be physical 0?
129 * 129 *
130 * Common setup before any secondaries are started 130 * Common setup before any secondaries are started
@@ -133,14 +133,14 @@ static void __init sb1250_smp_setup(void)
133{ 133{
134 int i, num; 134 int i, num;
135 135
136 cpus_clear(phys_cpu_present_map); 136 cpus_clear(cpu_possible_map);
137 cpu_set(0, phys_cpu_present_map); 137 cpu_set(0, cpu_possible_map);
138 __cpu_number_map[0] = 0; 138 __cpu_number_map[0] = 0;
139 __cpu_logical_map[0] = 0; 139 __cpu_logical_map[0] = 0;
140 140
141 for (i = 1, num = 0; i < NR_CPUS; i++) { 141 for (i = 1, num = 0; i < NR_CPUS; i++) {
142 if (cfe_cpu_stop(i) == 0) { 142 if (cfe_cpu_stop(i) == 0) {
143 cpu_set(i, phys_cpu_present_map); 143 cpu_set(i, cpu_possible_map);
144 __cpu_number_map[i] = ++num; 144 __cpu_number_map[i] = ++num;
145 __cpu_logical_map[num] = i; 145 __cpu_logical_map[num] = i;
146 } 146 }
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index 796e3ce28720..69f5f88711cc 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -80,7 +80,7 @@ static void __init sni_a20r_timer_setup(void)
80 struct irqaction *action = &a20r_irqaction; 80 struct irqaction *action = &a20r_irqaction;
81 unsigned int cpu = smp_processor_id(); 81 unsigned int cpu = smp_processor_id();
82 82
83 cd->cpumask = cpumask_of_cpu(cpu); 83 cd->cpumask = cpumask_of(cpu);
84 clockevents_register_device(cd); 84 clockevents_register_device(cd);
85 action->dev_id = cd; 85 action->dev_id = cd;
86 setup_irq(SNI_A20R_IRQ_TIMER, &a20r_irqaction); 86 setup_irq(SNI_A20R_IRQ_TIMER, &a20r_irqaction);
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 644a70b1b04e..aacf11d33723 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -11,6 +11,7 @@ config PARISC
11 select HAVE_OPROFILE 11 select HAVE_OPROFILE
12 select RTC_CLASS 12 select RTC_CLASS
13 select RTC_DRV_PARISC 13 select RTC_DRV_PARISC
14 select INIT_ALL_POSSIBLE
14 help 15 help
15 The PA-RISC microprocessor is designed by Hewlett-Packard and used 16 The PA-RISC microprocessor is designed by Hewlett-Packard and used
16 in many of their workstations & servers (HP9000 700 and 800 series, 17 in many of their workstations & servers (HP9000 700 and 800 series,
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 23ef950df008..4cea935e2f99 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -131,12 +131,12 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest)
131 return 0; 131 return 0;
132} 132}
133 133
134static void cpu_set_affinity_irq(unsigned int irq, cpumask_t dest) 134static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
135{ 135{
136 if (cpu_check_affinity(irq, &dest)) 136 if (cpu_check_affinity(irq, dest))
137 return; 137 return;
138 138
139 irq_desc[irq].affinity = dest; 139 irq_desc[irq].affinity = *dest;
140} 140}
141#endif 141#endif
142 142
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index d47f3975c9c6..80bc000523fa 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -67,21 +67,6 @@ static volatile int cpu_now_booting __read_mostly = 0; /* track which CPU is boo
67 67
68static int parisc_max_cpus __read_mostly = 1; 68static int parisc_max_cpus __read_mostly = 1;
69 69
70/* online cpus are ones that we've managed to bring up completely
71 * possible cpus are all valid cpu
72 * present cpus are all detected cpu
73 *
74 * On startup we bring up the "possible" cpus. Since we discover
75 * CPUs later, we add them as hotplug, so the possible cpu mask is
76 * empty in the beginning.
77 */
78
79cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; /* Bitmap of online CPUs */
80cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; /* Bitmap of Present CPUs */
81
82EXPORT_SYMBOL(cpu_online_map);
83EXPORT_SYMBOL(cpu_possible_map);
84
85DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED; 70DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED;
86 71
87enum ipi_message_type { 72enum ipi_message_type {
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
new file mode 100644
index 000000000000..9b198d1b3b2b
--- /dev/null
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -0,0 +1,80 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __ASM_PPC_DISASSEMBLE_H__
21#define __ASM_PPC_DISASSEMBLE_H__
22
23#include <linux/types.h>
24
25static inline unsigned int get_op(u32 inst)
26{
27 return inst >> 26;
28}
29
30static inline unsigned int get_xop(u32 inst)
31{
32 return (inst >> 1) & 0x3ff;
33}
34
35static inline unsigned int get_sprn(u32 inst)
36{
37 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
38}
39
40static inline unsigned int get_dcrn(u32 inst)
41{
42 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
43}
44
45static inline unsigned int get_rt(u32 inst)
46{
47 return (inst >> 21) & 0x1f;
48}
49
50static inline unsigned int get_rs(u32 inst)
51{
52 return (inst >> 21) & 0x1f;
53}
54
55static inline unsigned int get_ra(u32 inst)
56{
57 return (inst >> 16) & 0x1f;
58}
59
60static inline unsigned int get_rb(u32 inst)
61{
62 return (inst >> 11) & 0x1f;
63}
64
65static inline unsigned int get_rc(u32 inst)
66{
67 return inst & 0x1;
68}
69
70static inline unsigned int get_ws(u32 inst)
71{
72 return (inst >> 11) & 0x1f;
73}
74
75static inline unsigned int get_d(u32 inst)
76{
77 return inst & 0xffff;
78}
79
80#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
new file mode 100644
index 000000000000..f49031b632ca
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -0,0 +1,61 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __ASM_44X_H__
21#define __ASM_44X_H__
22
23#include <linux/kvm_host.h>
24
25#define PPC44x_TLB_SIZE 64
26
27/* If the guest is expecting it, this can be as large as we like; we'd just
28 * need to find some way of advertising it. */
29#define KVM44x_GUEST_TLB_SIZE 64
30
31struct kvmppc_44x_shadow_ref {
32 struct page *page;
33 u16 gtlb_index;
34 u8 writeable;
35 u8 tid;
36};
37
38struct kvmppc_vcpu_44x {
39 /* Unmodified copy of the guest's TLB. */
40 struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
41
42 /* References to guest pages in the hardware TLB. */
43 struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
44
45 /* State of the shadow TLB at guest context switch time. */
46 struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
47 u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
48
49 struct kvm_vcpu vcpu;
50};
51
52static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
53{
54 return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
55}
56
57void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
58void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
59void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
60
61#endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 34b52b7180cd..c1e436fe7738 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,27 +64,58 @@ struct kvm_vcpu_stat {
64 u32 halt_wakeup; 64 u32 halt_wakeup;
65}; 65};
66 66
67struct tlbe { 67struct kvmppc_44x_tlbe {
68 u32 tid; /* Only the low 8 bits are used. */ 68 u32 tid; /* Only the low 8 bits are used. */
69 u32 word0; 69 u32 word0;
70 u32 word1; 70 u32 word1;
71 u32 word2; 71 u32 word2;
72}; 72};
73 73
74struct kvm_arch { 74enum kvm_exit_types {
75 MMIO_EXITS,
76 DCR_EXITS,
77 SIGNAL_EXITS,
78 ITLB_REAL_MISS_EXITS,
79 ITLB_VIRT_MISS_EXITS,
80 DTLB_REAL_MISS_EXITS,
81 DTLB_VIRT_MISS_EXITS,
82 SYSCALL_EXITS,
83 ISI_EXITS,
84 DSI_EXITS,
85 EMULATED_INST_EXITS,
86 EMULATED_MTMSRWE_EXITS,
87 EMULATED_WRTEE_EXITS,
88 EMULATED_MTSPR_EXITS,
89 EMULATED_MFSPR_EXITS,
90 EMULATED_MTMSR_EXITS,
91 EMULATED_MFMSR_EXITS,
92 EMULATED_TLBSX_EXITS,
93 EMULATED_TLBWE_EXITS,
94 EMULATED_RFI_EXITS,
95 DEC_EXITS,
96 EXT_INTR_EXITS,
97 HALT_WAKEUP,
98 USR_PR_INST,
99 FP_UNAVAIL,
100 DEBUG_EXITS,
101 TIMEINGUEST,
102 __NUMBER_OF_KVM_EXIT_TYPES
75}; 103};
76 104
77struct kvm_vcpu_arch { 105/* allow access to big endian 32bit upper/lower parts and 64bit var */
78 /* Unmodified copy of the guest's TLB. */ 106struct kvmppc_exit_timing {
79 struct tlbe guest_tlb[PPC44x_TLB_SIZE]; 107 union {
80 /* TLB that's actually used when the guest is running. */ 108 u64 tv64;
81 struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; 109 struct {
82 /* Pages which are referenced in the shadow TLB. */ 110 u32 tbu, tbl;
83 struct page *shadow_pages[PPC44x_TLB_SIZE]; 111 } tv32;
112 };
113};
84 114
85 /* Track which TLB entries we've modified in the current exit. */ 115struct kvm_arch {
86 u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; 116};
87 117
118struct kvm_vcpu_arch {
88 u32 host_stack; 119 u32 host_stack;
89 u32 host_pid; 120 u32 host_pid;
90 u32 host_dbcr0; 121 u32 host_dbcr0;
@@ -94,32 +125,32 @@ struct kvm_vcpu_arch {
94 u32 host_msr; 125 u32 host_msr;
95 126
96 u64 fpr[32]; 127 u64 fpr[32];
97 u32 gpr[32]; 128 ulong gpr[32];
98 129
99 u32 pc; 130 ulong pc;
100 u32 cr; 131 u32 cr;
101 u32 ctr; 132 ulong ctr;
102 u32 lr; 133 ulong lr;
103 u32 xer; 134 ulong xer;
104 135
105 u32 msr; 136 ulong msr;
106 u32 mmucr; 137 u32 mmucr;
107 u32 sprg0; 138 ulong sprg0;
108 u32 sprg1; 139 ulong sprg1;
109 u32 sprg2; 140 ulong sprg2;
110 u32 sprg3; 141 ulong sprg3;
111 u32 sprg4; 142 ulong sprg4;
112 u32 sprg5; 143 ulong sprg5;
113 u32 sprg6; 144 ulong sprg6;
114 u32 sprg7; 145 ulong sprg7;
115 u32 srr0; 146 ulong srr0;
116 u32 srr1; 147 ulong srr1;
117 u32 csrr0; 148 ulong csrr0;
118 u32 csrr1; 149 ulong csrr1;
119 u32 dsrr0; 150 ulong dsrr0;
120 u32 dsrr1; 151 ulong dsrr1;
121 u32 dear; 152 ulong dear;
122 u32 esr; 153 ulong esr;
123 u32 dec; 154 u32 dec;
124 u32 decar; 155 u32 decar;
125 u32 tbl; 156 u32 tbl;
@@ -127,7 +158,7 @@ struct kvm_vcpu_arch {
127 u32 tcr; 158 u32 tcr;
128 u32 tsr; 159 u32 tsr;
129 u32 ivor[16]; 160 u32 ivor[16];
130 u32 ivpr; 161 ulong ivpr;
131 u32 pir; 162 u32 pir;
132 163
133 u32 shadow_pid; 164 u32 shadow_pid;
@@ -140,9 +171,22 @@ struct kvm_vcpu_arch {
140 u32 dbcr0; 171 u32 dbcr0;
141 u32 dbcr1; 172 u32 dbcr1;
142 173
174#ifdef CONFIG_KVM_EXIT_TIMING
175 struct kvmppc_exit_timing timing_exit;
176 struct kvmppc_exit_timing timing_last_enter;
177 u32 last_exit_type;
178 u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
179 u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
180 u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
181 u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
182 u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
183 u64 timing_last_exit;
184 struct dentry *debugfs_exit_timing;
185#endif
186
143 u32 last_inst; 187 u32 last_inst;
144 u32 fault_dear; 188 ulong fault_dear;
145 u32 fault_esr; 189 ulong fault_esr;
146 gpa_t paddr_accessed; 190 gpa_t paddr_accessed;
147 191
148 u8 io_gpr; /* GPR used as IO source/target */ 192 u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bb62ad876de3..36d2a50a8487 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,11 +29,6 @@
29#include <linux/kvm_types.h> 29#include <linux/kvm_types.h>
30#include <linux/kvm_host.h> 30#include <linux/kvm_host.h>
31 31
32struct kvm_tlb {
33 struct tlbe guest_tlb[PPC44x_TLB_SIZE];
34 struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
35};
36
37enum emulation_result { 32enum emulation_result {
38 EMULATE_DONE, /* no further processing */ 33 EMULATE_DONE, /* no further processing */
39 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ 34 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
@@ -41,9 +36,6 @@ enum emulation_result {
41 EMULATE_FAIL, /* can't emulate this instruction */ 36 EMULATE_FAIL, /* can't emulate this instruction */
42}; 37};
43 38
44extern const unsigned char exception_priority[];
45extern const unsigned char priority_exception[];
46
47extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 39extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
48extern char kvmppc_handlers_start[]; 40extern char kvmppc_handlers_start[];
49extern unsigned long kvmppc_handler_len; 41extern unsigned long kvmppc_handler_len;
@@ -58,51 +50,44 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
58extern int kvmppc_emulate_instruction(struct kvm_run *run, 50extern int kvmppc_emulate_instruction(struct kvm_run *run,
59 struct kvm_vcpu *vcpu); 51 struct kvm_vcpu *vcpu);
60extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); 52extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
53extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
61 54
62extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, 55extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
63 u64 asid, u32 flags); 56 u64 asid, u32 flags, u32 max_bytes,
64extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, 57 unsigned int gtlb_idx);
65 gva_t eend, u32 asid);
66extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); 58extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
67extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); 59extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
68 60
69/* XXX Book E specific */ 61/* Core-specific hooks */
70extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); 62
71 63extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
72extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); 64 unsigned int id);
73 65extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
74static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) 66extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
75{ 67extern int kvmppc_core_check_processor_compat(void);
76 unsigned int priority = exception_priority[exception]; 68extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
77 set_bit(priority, &vcpu->arch.pending_exceptions); 69 struct kvm_translation *tr);
78} 70
79 71extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
80static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) 72extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
81{ 73
82 unsigned int priority = exception_priority[exception]; 74extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
83 clear_bit(priority, &vcpu->arch.pending_exceptions); 75extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
84} 76
85 77extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
86/* Helper function for "full" MSR writes. No need to call this if only EE is 78extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
87 * changing. */ 79extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
88static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) 80extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
89{ 81extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
90 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) 82 struct kvm_interrupt *irq);
91 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); 83
92 84extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
93 vcpu->arch.msr = new_msr; 85 unsigned int op, int *advance);
94 86extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
95 if (vcpu->arch.msr & MSR_WE) 87extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
96 kvm_vcpu_block(vcpu); 88
97} 89extern int kvmppc_booke_init(void);
98 90extern void kvmppc_booke_exit(void);
99static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
100{
101 if (vcpu->arch.pid != new_pid) {
102 vcpu->arch.pid = new_pid;
103 vcpu->arch.swap_pid = 1;
104 }
105}
106 91
107extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); 92extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
108 93
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 8a97cfb08b7e..27cc6fdcd3b7 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -56,6 +56,7 @@
56#ifndef __ASSEMBLY__ 56#ifndef __ASSEMBLY__
57 57
58extern unsigned int tlb_44x_hwater; 58extern unsigned int tlb_44x_hwater;
59extern unsigned int tlb_44x_index;
59 60
60typedef struct { 61typedef struct {
61 unsigned int id; 62 unsigned int id;
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c32da6f97999..373fca394a54 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -48,7 +48,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
48 48
49/* sched_domains SD_NODE_INIT for PPC64 machines */ 49/* sched_domains SD_NODE_INIT for PPC64 machines */
50#define SD_NODE_INIT (struct sched_domain) { \ 50#define SD_NODE_INIT (struct sched_domain) { \
51 .span = CPU_MASK_NONE, \
52 .parent = NULL, \ 51 .parent = NULL, \
53 .child = NULL, \ 52 .child = NULL, \
54 .groups = NULL, \ 53 .groups = NULL, \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 661d07d2146b..9937fe44555f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,9 +23,6 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/suspend.h> 24#include <linux/suspend.h>
25#include <linux/hrtimer.h> 25#include <linux/hrtimer.h>
26#ifdef CONFIG_KVM
27#include <linux/kvm_host.h>
28#endif
29#ifdef CONFIG_PPC64 26#ifdef CONFIG_PPC64
30#include <linux/time.h> 27#include <linux/time.h>
31#include <linux/hardirq.h> 28#include <linux/hardirq.h>
@@ -51,6 +48,9 @@
51#ifdef CONFIG_PPC_ISERIES 48#ifdef CONFIG_PPC_ISERIES
52#include <asm/iseries/alpaca.h> 49#include <asm/iseries/alpaca.h>
53#endif 50#endif
51#ifdef CONFIG_KVM
52#include <asm/kvm_44x.h>
53#endif
54 54
55#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 55#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
56#include "head_booke.h" 56#include "head_booke.h"
@@ -357,12 +357,10 @@ int main(void)
357 DEFINE(PTE_SIZE, sizeof(pte_t)); 357 DEFINE(PTE_SIZE, sizeof(pte_t));
358 358
359#ifdef CONFIG_KVM 359#ifdef CONFIG_KVM
360 DEFINE(TLBE_BYTES, sizeof(struct tlbe)); 360 DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
361 361
362 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 362 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
363 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 363 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
364 DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
365 DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
366 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 364 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
367 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 365 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
368 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 366 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
@@ -385,5 +383,16 @@ int main(void)
385 DEFINE(PTE_T_LOG2, PTE_T_LOG2); 383 DEFINE(PTE_T_LOG2, PTE_T_LOG2);
386#endif 384#endif
387 385
386#ifdef CONFIG_KVM_EXIT_TIMING
387 DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
388 arch.timing_exit.tv32.tbu));
389 DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
390 arch.timing_exit.tv32.tbl));
391 DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
392 arch.timing_last_enter.tv32.tbu));
393 DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
394 arch.timing_last_enter.tv32.tbl));
395#endif
396
388 return 0; 397 return 0;
389} 398}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ac222d0ab12e..23b8b5e36f98 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map)
237 mask = map; 237 mask = map;
238 } 238 }
239 if (irq_desc[irq].chip->set_affinity) 239 if (irq_desc[irq].chip->set_affinity)
240 irq_desc[irq].chip->set_affinity(irq, mask); 240 irq_desc[irq].chip->set_affinity(irq, &mask);
241 else if (irq_desc[irq].action && !(warned++)) 241 else if (irq_desc[irq].action && !(warned++))
242 printk("Cannot set affinity for irq %i\n", irq); 242 printk("Cannot set affinity for irq %i\n", irq);
243 } 243 }
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51b201ddf9a1..fb7049c054c0 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -33,6 +33,7 @@
33#include <linux/mqueue.h> 33#include <linux/mqueue.h>
34#include <linux/hardirq.h> 34#include <linux/hardirq.h>
35#include <linux/utsname.h> 35#include <linux/utsname.h>
36#include <linux/kernel_stat.h>
36 37
37#include <asm/pgtable.h> 38#include <asm/pgtable.h>
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8ac3f721d235..65484b2200b3 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -59,13 +59,9 @@
59 59
60struct thread_info *secondary_ti; 60struct thread_info *secondary_ti;
61 61
62cpumask_t cpu_possible_map = CPU_MASK_NONE;
63cpumask_t cpu_online_map = CPU_MASK_NONE;
64DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; 62DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
65DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE; 63DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
66 64
67EXPORT_SYMBOL(cpu_online_map);
68EXPORT_SYMBOL(cpu_possible_map);
69EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 65EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
70EXPORT_PER_CPU_SYMBOL(cpu_core_map); 66EXPORT_PER_CPU_SYMBOL(cpu_core_map);
71 67
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e1f3a5140429..c9564031a2a9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,8 +256,10 @@ void account_system_vtime(struct task_struct *tsk)
256 delta += sys_time; 256 delta += sys_time;
257 get_paca()->system_time = 0; 257 get_paca()->system_time = 0;
258 } 258 }
259 account_system_time(tsk, 0, delta); 259 if (in_irq() || idle_task(smp_processor_id()) != tsk)
260 account_system_time_scaled(tsk, deltascaled); 260 account_system_time(tsk, 0, delta, deltascaled);
261 else
262 account_idle_time(delta);
261 per_cpu(cputime_last_delta, smp_processor_id()) = delta; 263 per_cpu(cputime_last_delta, smp_processor_id()) = delta;
262 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; 264 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
263 local_irq_restore(flags); 265 local_irq_restore(flags);
@@ -275,10 +277,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
275 277
276 utime = get_paca()->user_time; 278 utime = get_paca()->user_time;
277 get_paca()->user_time = 0; 279 get_paca()->user_time = 0;
278 account_user_time(tsk, utime);
279
280 utimescaled = cputime_to_scaled(utime); 280 utimescaled = cputime_to_scaled(utime);
281 account_user_time_scaled(tsk, utimescaled); 281 account_user_time(tsk, utime, utimescaled);
282} 282}
283 283
284/* 284/*
@@ -338,8 +338,12 @@ void calculate_steal_time(void)
338 tb = mftb(); 338 tb = mftb();
339 purr = mfspr(SPRN_PURR); 339 purr = mfspr(SPRN_PURR);
340 stolen = (tb - pme->tb) - (purr - pme->purr); 340 stolen = (tb - pme->tb) - (purr - pme->purr);
341 if (stolen > 0) 341 if (stolen > 0) {
342 account_steal_time(current, stolen); 342 if (idle_task(smp_processor_id()) != current)
343 account_steal_time(stolen);
344 else
345 account_idle_time(stolen);
346 }
343 pme->tb = tb; 347 pme->tb = tb;
344 pme->purr = purr; 348 pme->purr = purr;
345} 349}
@@ -844,7 +848,7 @@ static void register_decrementer_clockevent(int cpu)
844 struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; 848 struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
845 849
846 *dec = decrementer_clockevent; 850 *dec = decrementer_clockevent;
847 dec->cpumask = cpumask_of_cpu(cpu); 851 dec->cpumask = cpumask_of(cpu);
848 852
849 printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n", 853 printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
850 dec->name, dec->mult, dec->shift, cpu); 854 dec->name, dec->mult, dec->shift, cpu);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
new file mode 100644
index 000000000000..a66bec57265a
--- /dev/null
+++ b/arch/powerpc/kvm/44x.c
@@ -0,0 +1,228 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/kvm_host.h>
21#include <linux/err.h>
22
23#include <asm/reg.h>
24#include <asm/cputable.h>
25#include <asm/tlbflush.h>
26#include <asm/kvm_44x.h>
27#include <asm/kvm_ppc.h>
28
29#include "44x_tlb.h"
30
31/* Note: clearing MSR[DE] just means that the debug interrupt will not be
32 * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
33 * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
34 * will be delivered as an "imprecise debug event" (which is indicated by
35 * DBSR[IDE].
36 */
37static void kvm44x_disable_debug_interrupts(void)
38{
39 mtmsr(mfmsr() & ~MSR_DE);
40}
41
42void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
43{
44 kvm44x_disable_debug_interrupts();
45
46 mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
47 mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
48 mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
49 mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
50 mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
51 mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
52 mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
53 mtmsr(vcpu->arch.host_msr);
54}
55
56void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
57{
58 struct kvm_guest_debug *dbg = &vcpu->guest_debug;
59 u32 dbcr0 = 0;
60
61 vcpu->arch.host_msr = mfmsr();
62 kvm44x_disable_debug_interrupts();
63
64 /* Save host debug register state. */
65 vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
66 vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
67 vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
68 vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
69 vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
70 vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
71 vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
72
73 /* set registers up for guest */
74
75 if (dbg->bp[0]) {
76 mtspr(SPRN_IAC1, dbg->bp[0]);
77 dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
78 }
79 if (dbg->bp[1]) {
80 mtspr(SPRN_IAC2, dbg->bp[1]);
81 dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
82 }
83 if (dbg->bp[2]) {
84 mtspr(SPRN_IAC3, dbg->bp[2]);
85 dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
86 }
87 if (dbg->bp[3]) {
88 mtspr(SPRN_IAC4, dbg->bp[3]);
89 dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
90 }
91
92 mtspr(SPRN_DBCR0, dbcr0);
93 mtspr(SPRN_DBCR1, 0);
94 mtspr(SPRN_DBCR2, 0);
95}
96
97void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
98{
99 kvmppc_44x_tlb_load(vcpu);
100}
101
102void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
103{
104 kvmppc_44x_tlb_put(vcpu);
105}
106
107int kvmppc_core_check_processor_compat(void)
108{
109 int r;
110
111 if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
112 r = 0;
113 else
114 r = -ENOTSUPP;
115
116 return r;
117}
118
119int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
120{
121 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
122 struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
123 int i;
124
125 tlbe->tid = 0;
126 tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
127 tlbe->word1 = 0;
128 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
129
130 tlbe++;
131 tlbe->tid = 0;
132 tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
133 tlbe->word1 = 0xef600000;
134 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
135 | PPC44x_TLB_I | PPC44x_TLB_G;
136
137 /* Since the guest can directly access the timebase, it must know the
138 * real timebase frequency. Accordingly, it must see the state of
139 * CCR1[TCS]. */
140 vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
141
142 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
143 vcpu_44x->shadow_refs[i].gtlb_index = -1;
144
145 return 0;
146}
147
148/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
149int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
150 struct kvm_translation *tr)
151{
152 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
153 struct kvmppc_44x_tlbe *gtlbe;
154 int index;
155 gva_t eaddr;
156 u8 pid;
157 u8 as;
158
159 eaddr = tr->linear_address;
160 pid = (tr->linear_address >> 32) & 0xff;
161 as = (tr->linear_address >> 40) & 0x1;
162
163 index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
164 if (index == -1) {
165 tr->valid = 0;
166 return 0;
167 }
168
169 gtlbe = &vcpu_44x->guest_tlb[index];
170
171 tr->physical_address = tlb_xlate(gtlbe, eaddr);
172 /* XXX what does "writeable" and "usermode" even mean? */
173 tr->valid = 1;
174
175 return 0;
176}
177
178struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
179{
180 struct kvmppc_vcpu_44x *vcpu_44x;
181 struct kvm_vcpu *vcpu;
182 int err;
183
184 vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
185 if (!vcpu_44x) {
186 err = -ENOMEM;
187 goto out;
188 }
189
190 vcpu = &vcpu_44x->vcpu;
191 err = kvm_vcpu_init(vcpu, kvm, id);
192 if (err)
193 goto free_vcpu;
194
195 return vcpu;
196
197free_vcpu:
198 kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
199out:
200 return ERR_PTR(err);
201}
202
203void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
204{
205 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
206
207 kvm_vcpu_uninit(vcpu);
208 kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
209}
210
211static int kvmppc_44x_init(void)
212{
213 int r;
214
215 r = kvmppc_booke_init();
216 if (r)
217 return r;
218
219 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
220}
221
222static void kvmppc_44x_exit(void)
223{
224 kvmppc_booke_exit();
225}
226
227module_init(kvmppc_44x_init);
228module_exit(kvmppc_44x_exit);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
new file mode 100644
index 000000000000..82489a743a6f
--- /dev/null
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -0,0 +1,371 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <asm/kvm_ppc.h>
21#include <asm/dcr.h>
22#include <asm/dcr-regs.h>
23#include <asm/disassemble.h>
24#include <asm/kvm_44x.h>
25#include "timing.h"
26
27#include "booke.h"
28#include "44x_tlb.h"
29
30#define OP_RFI 19
31
32#define XOP_RFI 50
33#define XOP_MFMSR 83
34#define XOP_WRTEE 131
35#define XOP_MTMSR 146
36#define XOP_WRTEEI 163
37#define XOP_MFDCR 323
38#define XOP_MTDCR 451
39#define XOP_TLBSX 914
40#define XOP_ICCCI 966
41#define XOP_TLBWE 978
42
43static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
44{
45 vcpu->arch.pc = vcpu->arch.srr0;
46 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
47}
48
49int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
50 unsigned int inst, int *advance)
51{
52 int emulated = EMULATE_DONE;
53 int dcrn;
54 int ra;
55 int rb;
56 int rc;
57 int rs;
58 int rt;
59 int ws;
60
61 switch (get_op(inst)) {
62 case OP_RFI:
63 switch (get_xop(inst)) {
64 case XOP_RFI:
65 kvmppc_emul_rfi(vcpu);
66 kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
67 *advance = 0;
68 break;
69
70 default:
71 emulated = EMULATE_FAIL;
72 break;
73 }
74 break;
75
76 case 31:
77 switch (get_xop(inst)) {
78
79 case XOP_MFMSR:
80 rt = get_rt(inst);
81 vcpu->arch.gpr[rt] = vcpu->arch.msr;
82 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
83 break;
84
85 case XOP_MTMSR:
86 rs = get_rs(inst);
87 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
88 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
89 break;
90
91 case XOP_WRTEE:
92 rs = get_rs(inst);
93 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
94 | (vcpu->arch.gpr[rs] & MSR_EE);
95 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
96 break;
97
98 case XOP_WRTEEI:
99 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
100 | (inst & MSR_EE);
101 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
102 break;
103
104 case XOP_MFDCR:
105 dcrn = get_dcrn(inst);
106 rt = get_rt(inst);
107
108 /* The guest may access CPR0 registers to determine the timebase
109 * frequency, and it must know the real host frequency because it
110 * can directly access the timebase registers.
111 *
112 * It would be possible to emulate those accesses in userspace,
113 * but userspace can really only figure out the end frequency.
114 * We could decompose that into the factors that compute it, but
115 * that's tricky math, and it's easier to just report the real
116 * CPR0 values.
117 */
118 switch (dcrn) {
119 case DCRN_CPR0_CONFIG_ADDR:
120 vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
121 break;
122 case DCRN_CPR0_CONFIG_DATA:
123 local_irq_disable();
124 mtdcr(DCRN_CPR0_CONFIG_ADDR,
125 vcpu->arch.cpr0_cfgaddr);
126 vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
127 local_irq_enable();
128 break;
129 default:
130 run->dcr.dcrn = dcrn;
131 run->dcr.data = 0;
132 run->dcr.is_write = 0;
133 vcpu->arch.io_gpr = rt;
134 vcpu->arch.dcr_needed = 1;
135 kvmppc_account_exit(vcpu, DCR_EXITS);
136 emulated = EMULATE_DO_DCR;
137 }
138
139 break;
140
141 case XOP_MTDCR:
142 dcrn = get_dcrn(inst);
143 rs = get_rs(inst);
144
145 /* emulate some access in kernel */
146 switch (dcrn) {
147 case DCRN_CPR0_CONFIG_ADDR:
148 vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
149 break;
150 default:
151 run->dcr.dcrn = dcrn;
152 run->dcr.data = vcpu->arch.gpr[rs];
153 run->dcr.is_write = 1;
154 vcpu->arch.dcr_needed = 1;
155 kvmppc_account_exit(vcpu, DCR_EXITS);
156 emulated = EMULATE_DO_DCR;
157 }
158
159 break;
160
161 case XOP_TLBWE:
162 ra = get_ra(inst);
163 rs = get_rs(inst);
164 ws = get_ws(inst);
165 emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
166 break;
167
168 case XOP_TLBSX:
169 rt = get_rt(inst);
170 ra = get_ra(inst);
171 rb = get_rb(inst);
172 rc = get_rc(inst);
173 emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
174 break;
175
176 case XOP_ICCCI:
177 break;
178
179 default:
180 emulated = EMULATE_FAIL;
181 }
182
183 break;
184
185 default:
186 emulated = EMULATE_FAIL;
187 }
188
189 return emulated;
190}
191
192int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
193{
194 switch (sprn) {
195 case SPRN_MMUCR:
196 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
197 case SPRN_PID:
198 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
199 case SPRN_CCR0:
200 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
201 case SPRN_CCR1:
202 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
203 case SPRN_DEAR:
204 vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
205 case SPRN_ESR:
206 vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
207 case SPRN_DBCR0:
208 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
209 case SPRN_DBCR1:
210 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
211 case SPRN_TSR:
212 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
213 case SPRN_TCR:
214 vcpu->arch.tcr = vcpu->arch.gpr[rs];
215 kvmppc_emulate_dec(vcpu);
216 break;
217
218 /* Note: SPRG4-7 are user-readable. These values are
219 * loaded into the real SPRGs when resuming the
220 * guest. */
221 case SPRN_SPRG4:
222 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
223 case SPRN_SPRG5:
224 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
225 case SPRN_SPRG6:
226 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
227 case SPRN_SPRG7:
228 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
229
230 case SPRN_IVPR:
231 vcpu->arch.ivpr = vcpu->arch.gpr[rs];
232 break;
233 case SPRN_IVOR0:
234 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
235 break;
236 case SPRN_IVOR1:
237 vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
238 break;
239 case SPRN_IVOR2:
240 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
241 break;
242 case SPRN_IVOR3:
243 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
244 break;
245 case SPRN_IVOR4:
246 vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
247 break;
248 case SPRN_IVOR5:
249 vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
250 break;
251 case SPRN_IVOR6:
252 vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
253 break;
254 case SPRN_IVOR7:
255 vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
256 break;
257 case SPRN_IVOR8:
258 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
259 break;
260 case SPRN_IVOR9:
261 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
262 break;
263 case SPRN_IVOR10:
264 vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
265 break;
266 case SPRN_IVOR11:
267 vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
268 break;
269 case SPRN_IVOR12:
270 vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
271 break;
272 case SPRN_IVOR13:
273 vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
274 break;
275 case SPRN_IVOR14:
276 vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
277 break;
278 case SPRN_IVOR15:
279 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
280 break;
281
282 default:
283 return EMULATE_FAIL;
284 }
285
286 kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
287 return EMULATE_DONE;
288}
289
290int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
291{
292 switch (sprn) {
293 /* 440 */
294 case SPRN_MMUCR:
295 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
296 case SPRN_CCR0:
297 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
298 case SPRN_CCR1:
299 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
300
301 /* Book E */
302 case SPRN_PID:
303 vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
304 case SPRN_IVPR:
305 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
306 case SPRN_DEAR:
307 vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
308 case SPRN_ESR:
309 vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
310 case SPRN_DBCR0:
311 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
312 case SPRN_DBCR1:
313 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
314
315 case SPRN_IVOR0:
316 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
317 break;
318 case SPRN_IVOR1:
319 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
320 break;
321 case SPRN_IVOR2:
322 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
323 break;
324 case SPRN_IVOR3:
325 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
326 break;
327 case SPRN_IVOR4:
328 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
329 break;
330 case SPRN_IVOR5:
331 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
332 break;
333 case SPRN_IVOR6:
334 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
335 break;
336 case SPRN_IVOR7:
337 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
338 break;
339 case SPRN_IVOR8:
340 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
341 break;
342 case SPRN_IVOR9:
343 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
344 break;
345 case SPRN_IVOR10:
346 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
347 break;
348 case SPRN_IVOR11:
349 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
350 break;
351 case SPRN_IVOR12:
352 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
353 break;
354 case SPRN_IVOR13:
355 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
356 break;
357 case SPRN_IVOR14:
358 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
359 break;
360 case SPRN_IVOR15:
361 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
362 break;
363
364 default:
365 return EMULATE_FAIL;
366 }
367
368 kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
369 return EMULATE_DONE;
370}
371
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ad72c6f9811f..9a34b8edb9e2 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -22,20 +22,103 @@
22#include <linux/kvm.h> 22#include <linux/kvm.h>
23#include <linux/kvm_host.h> 23#include <linux/kvm_host.h>
24#include <linux/highmem.h> 24#include <linux/highmem.h>
25
26#include <asm/tlbflush.h>
25#include <asm/mmu-44x.h> 27#include <asm/mmu-44x.h>
26#include <asm/kvm_ppc.h> 28#include <asm/kvm_ppc.h>
29#include <asm/kvm_44x.h>
30#include "timing.h"
27 31
28#include "44x_tlb.h" 32#include "44x_tlb.h"
29 33
34#ifndef PPC44x_TLBE_SIZE
35#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
36#endif
37
38#define PAGE_SIZE_4K (1<<12)
39#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
40
41#define PPC44x_TLB_UATTR_MASK \
42 (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
30#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) 43#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
31#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) 44#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
32 45
33static unsigned int kvmppc_tlb_44x_pos; 46#ifdef DEBUG
47void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
48{
49 struct kvmppc_44x_tlbe *tlbe;
50 int i;
51
52 printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
53 printk("| %2s | %3s | %8s | %8s | %8s |\n",
54 "nr", "tid", "word0", "word1", "word2");
55
56 for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
57 tlbe = &vcpu_44x->guest_tlb[i];
58 if (tlbe->word0 & PPC44x_TLB_VALID)
59 printk(" G%2d | %02X | %08X | %08X | %08X |\n",
60 i, tlbe->tid, tlbe->word0, tlbe->word1,
61 tlbe->word2);
62 }
63}
64#endif
65
66static inline void kvmppc_44x_tlbie(unsigned int index)
67{
68 /* 0 <= index < 64, so the V bit is clear and we can use the index as
69 * word0. */
70 asm volatile(
71 "tlbwe %[index], %[index], 0\n"
72 :
73 : [index] "r"(index)
74 );
75}
76
77static inline void kvmppc_44x_tlbre(unsigned int index,
78 struct kvmppc_44x_tlbe *tlbe)
79{
80 asm volatile(
81 "tlbre %[word0], %[index], 0\n"
82 "mfspr %[tid], %[sprn_mmucr]\n"
83 "andi. %[tid], %[tid], 0xff\n"
84 "tlbre %[word1], %[index], 1\n"
85 "tlbre %[word2], %[index], 2\n"
86 : [word0] "=r"(tlbe->word0),
87 [word1] "=r"(tlbe->word1),
88 [word2] "=r"(tlbe->word2),
89 [tid] "=r"(tlbe->tid)
90 : [index] "r"(index),
91 [sprn_mmucr] "i"(SPRN_MMUCR)
92 : "cc"
93 );
94}
95
96static inline void kvmppc_44x_tlbwe(unsigned int index,
97 struct kvmppc_44x_tlbe *stlbe)
98{
99 unsigned long tmp;
100
101 asm volatile(
102 "mfspr %[tmp], %[sprn_mmucr]\n"
103 "rlwimi %[tmp], %[tid], 0, 0xff\n"
104 "mtspr %[sprn_mmucr], %[tmp]\n"
105 "tlbwe %[word0], %[index], 0\n"
106 "tlbwe %[word1], %[index], 1\n"
107 "tlbwe %[word2], %[index], 2\n"
108 : [tmp] "=&r"(tmp)
109 : [word0] "r"(stlbe->word0),
110 [word1] "r"(stlbe->word1),
111 [word2] "r"(stlbe->word2),
112 [tid] "r"(stlbe->tid),
113 [index] "r"(index),
114 [sprn_mmucr] "i"(SPRN_MMUCR)
115 );
116}
34 117
35static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) 118static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
36{ 119{
37 /* Mask off reserved bits. */ 120 /* We only care about the guest's permission and user bits. */
38 attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; 121 attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
39 122
40 if (!usermode) { 123 if (!usermode) {
41 /* Guest is in supervisor mode, so we need to translate guest 124 /* Guest is in supervisor mode, so we need to translate guest
@@ -47,18 +130,60 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
47 /* Make sure host can always access this memory. */ 130 /* Make sure host can always access this memory. */
48 attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; 131 attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
49 132
133 /* WIMGE = 0b00100 */
134 attrib |= PPC44x_TLB_M;
135
50 return attrib; 136 return attrib;
51} 137}
52 138
139/* Load shadow TLB back into hardware. */
140void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
141{
142 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
143 int i;
144
145 for (i = 0; i <= tlb_44x_hwater; i++) {
146 struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
147
148 if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
149 kvmppc_44x_tlbwe(i, stlbe);
150 }
151}
152
153static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
154 unsigned int i)
155{
156 vcpu_44x->shadow_tlb_mod[i] = 1;
157}
158
159/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
160void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
161{
162 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
163 int i;
164
165 for (i = 0; i <= tlb_44x_hwater; i++) {
166 struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
167
168 if (vcpu_44x->shadow_tlb_mod[i])
169 kvmppc_44x_tlbre(i, stlbe);
170
171 if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
172 kvmppc_44x_tlbie(i);
173 }
174}
175
176
53/* Search the guest TLB for a matching entry. */ 177/* Search the guest TLB for a matching entry. */
54int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, 178int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
55 unsigned int as) 179 unsigned int as)
56{ 180{
181 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
57 int i; 182 int i;
58 183
59 /* XXX Replace loop with fancy data structures. */ 184 /* XXX Replace loop with fancy data structures. */
60 for (i = 0; i < PPC44x_TLB_SIZE; i++) { 185 for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
61 struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; 186 struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
62 unsigned int tid; 187 unsigned int tid;
63 188
64 if (eaddr < get_tlb_eaddr(tlbe)) 189 if (eaddr < get_tlb_eaddr(tlbe))
@@ -83,78 +208,89 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
83 return -1; 208 return -1;
84} 209}
85 210
86struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) 211int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
87{ 212{
88 unsigned int as = !!(vcpu->arch.msr & MSR_IS); 213 unsigned int as = !!(vcpu->arch.msr & MSR_IS);
89 unsigned int index;
90 214
91 index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 215 return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
92 if (index == -1)
93 return NULL;
94 return &vcpu->arch.guest_tlb[index];
95} 216}
96 217
97struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) 218int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
98{ 219{
99 unsigned int as = !!(vcpu->arch.msr & MSR_DS); 220 unsigned int as = !!(vcpu->arch.msr & MSR_DS);
100 unsigned int index;
101 221
102 index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 222 return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
103 if (index == -1)
104 return NULL;
105 return &vcpu->arch.guest_tlb[index];
106} 223}
107 224
108static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) 225static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
226 unsigned int stlb_index)
109{ 227{
110 return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); 228 struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
111}
112 229
113static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, 230 if (!ref->page)
114 unsigned int index) 231 return;
115{
116 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
117 struct page *page = vcpu->arch.shadow_pages[index];
118 232
119 if (get_tlb_v(stlbe)) { 233 /* Discard from the TLB. */
120 if (kvmppc_44x_tlbe_is_writable(stlbe)) 234 /* Note: we could actually invalidate a host mapping, if the host overwrote
121 kvm_release_page_dirty(page); 235 * this TLB entry since we inserted a guest mapping. */
122 else 236 kvmppc_44x_tlbie(stlb_index);
123 kvm_release_page_clean(page); 237
124 } 238 /* Now release the page. */
239 if (ref->writeable)
240 kvm_release_page_dirty(ref->page);
241 else
242 kvm_release_page_clean(ref->page);
243
244 ref->page = NULL;
245
246 /* XXX set tlb_44x_index to stlb_index? */
247
248 KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
125} 249}
126 250
127void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) 251void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
128{ 252{
253 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
129 int i; 254 int i;
130 255
131 for (i = 0; i <= tlb_44x_hwater; i++) 256 for (i = 0; i <= tlb_44x_hwater; i++)
132 kvmppc_44x_shadow_release(vcpu, i); 257 kvmppc_44x_shadow_release(vcpu_44x, i);
133}
134
135void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
136{
137 vcpu->arch.shadow_tlb_mod[i] = 1;
138} 258}
139 259
140/* Caller must ensure that the specified guest TLB entry is safe to insert into 260/**
141 * the shadow TLB. */ 261 * kvmppc_mmu_map -- create a host mapping for guest memory
142void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, 262 *
143 u32 flags) 263 * If the guest wanted a larger page than the host supports, only the first
264 * host page is mapped here and the rest are demand faulted.
265 *
266 * If the guest wanted a smaller page than the host page size, we map only the
267 * guest-size page (i.e. not a full host page mapping).
268 *
269 * Caller must ensure that the specified guest TLB entry is safe to insert into
270 * the shadow TLB.
271 */
272void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
273 u32 flags, u32 max_bytes, unsigned int gtlb_index)
144{ 274{
275 struct kvmppc_44x_tlbe stlbe;
276 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
277 struct kvmppc_44x_shadow_ref *ref;
145 struct page *new_page; 278 struct page *new_page;
146 struct tlbe *stlbe;
147 hpa_t hpaddr; 279 hpa_t hpaddr;
280 gfn_t gfn;
148 unsigned int victim; 281 unsigned int victim;
149 282
150 /* Future optimization: don't overwrite the TLB entry containing the 283 /* Select TLB entry to clobber. Indirectly guard against races with the TLB
151 * current PC (or stack?). */ 284 * miss handler by disabling interrupts. */
152 victim = kvmppc_tlb_44x_pos++; 285 local_irq_disable();
153 if (kvmppc_tlb_44x_pos > tlb_44x_hwater) 286 victim = ++tlb_44x_index;
154 kvmppc_tlb_44x_pos = 0; 287 if (victim > tlb_44x_hwater)
155 stlbe = &vcpu->arch.shadow_tlb[victim]; 288 victim = 0;
289 tlb_44x_index = victim;
290 local_irq_enable();
156 291
157 /* Get reference to new page. */ 292 /* Get reference to new page. */
293 gfn = gpaddr >> PAGE_SHIFT;
158 new_page = gfn_to_page(vcpu->kvm, gfn); 294 new_page = gfn_to_page(vcpu->kvm, gfn);
159 if (is_error_page(new_page)) { 295 if (is_error_page(new_page)) {
160 printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); 296 printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -163,10 +299,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
163 } 299 }
164 hpaddr = page_to_phys(new_page); 300 hpaddr = page_to_phys(new_page);
165 301
166 /* Drop reference to old page. */ 302 /* Invalidate any previous shadow mappings. */
167 kvmppc_44x_shadow_release(vcpu, victim); 303 kvmppc_44x_shadow_release(vcpu_44x, victim);
168
169 vcpu->arch.shadow_pages[victim] = new_page;
170 304
171 /* XXX Make sure (va, size) doesn't overlap any other 305 /* XXX Make sure (va, size) doesn't overlap any other
172 * entries. 440x6 user manual says the result would be 306 * entries. 440x6 user manual says the result would be
@@ -174,78 +308,193 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
174 308
175 /* XXX what about AS? */ 309 /* XXX what about AS? */
176 310
177 stlbe->tid = !(asid & 0xff);
178
179 /* Force TS=1 for all guest mappings. */ 311 /* Force TS=1 for all guest mappings. */
180 /* For now we hardcode 4KB mappings, but it will be important to 312 stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
181 * use host large pages in the future. */ 313
182 stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS 314 if (max_bytes >= PAGE_SIZE) {
183 | PPC44x_TLB_4K; 315 /* Guest mapping is larger than or equal to host page size. We can use
184 stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); 316 * a "native" host mapping. */
185 stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, 317 stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
186 vcpu->arch.msr & MSR_PR); 318 } else {
187 kvmppc_tlbe_set_modified(vcpu, victim); 319 /* Guest mapping is smaller than host page size. We must restrict the
320 * size of the mapping to be at most the smaller of the two, but for
321 * simplicity we fall back to a 4K mapping (this is probably what the
322 * guest is using anyways). */
323 stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
324
325 /* 'hpaddr' is a host page, which is larger than the mapping we're
326 * inserting here. To compensate, we must add the in-page offset to the
327 * sub-page. */
328 hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
329 }
188 330
189 KVMTRACE_5D(STLB_WRITE, vcpu, victim, 331 stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
190 stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, 332 stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
191 handler); 333 vcpu->arch.msr & MSR_PR);
334 stlbe.tid = !(asid & 0xff);
335
336 /* Keep track of the reference so we can properly release it later. */
337 ref = &vcpu_44x->shadow_refs[victim];
338 ref->page = new_page;
339 ref->gtlb_index = gtlb_index;
340 ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
341 ref->tid = stlbe.tid;
342
343 /* Insert shadow mapping into hardware TLB. */
344 kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
345 kvmppc_44x_tlbwe(victim, &stlbe);
346 KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
347 stlbe.word2, handler);
192} 348}
193 349
194void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, 350/* For a particular guest TLB entry, invalidate the corresponding host TLB
195 gva_t eend, u32 asid) 351 * mappings and release the host pages. */
352static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
353 unsigned int gtlb_index)
196{ 354{
197 unsigned int pid = !(asid & 0xff); 355 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
198 int i; 356 int i;
199 357
200 /* XXX Replace loop with fancy data structures. */ 358 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
201 for (i = 0; i <= tlb_44x_hwater; i++) { 359 struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
202 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; 360 if (ref->gtlb_index == gtlb_index)
203 unsigned int tid; 361 kvmppc_44x_shadow_release(vcpu_44x, i);
362 }
363}
204 364
205 if (!get_tlb_v(stlbe)) 365void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
206 continue; 366{
367 vcpu->arch.shadow_pid = !usermode;
368}
207 369
208 if (eend < get_tlb_eaddr(stlbe)) 370void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
209 continue; 371{
372 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
373 int i;
210 374
211 if (eaddr > get_tlb_end(stlbe)) 375 if (unlikely(vcpu->arch.pid == new_pid))
212 continue; 376 return;
213 377
214 tid = get_tlb_tid(stlbe); 378 vcpu->arch.pid = new_pid;
215 if (tid && (tid != pid))
216 continue;
217 379
218 kvmppc_44x_shadow_release(vcpu, i); 380 /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
219 stlbe->word0 = 0; 381 * can't access guest kernel mappings (TID=1). When we switch to a new
220 kvmppc_tlbe_set_modified(vcpu, i); 382 * guest PID, which will also use host PID=0, we must discard the old guest
221 KVMTRACE_5D(STLB_INVAL, vcpu, i, 383 * userspace mappings. */
222 stlbe->tid, stlbe->word0, stlbe->word1, 384 for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
223 stlbe->word2, handler); 385 struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
386
387 if (ref->tid == 0)
388 kvmppc_44x_shadow_release(vcpu_44x, i);
224 } 389 }
225} 390}
226 391
227/* Invalidate all mappings on the privilege switch after PID has been changed. 392static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
228 * The guest always runs with PID=1, so we must clear the entire TLB when 393 const struct kvmppc_44x_tlbe *tlbe)
229 * switching address spaces. */
230void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
231{ 394{
232 int i; 395 gpa_t gpa;
233 396
234 if (vcpu->arch.swap_pid) { 397 if (!get_tlb_v(tlbe))
235 /* XXX Replace loop with fancy data structures. */ 398 return 0;
236 for (i = 0; i <= tlb_44x_hwater; i++) { 399
237 struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; 400 /* Does it match current guest AS? */
238 401 /* XXX what about IS != DS? */
239 /* Future optimization: clear only userspace mappings. */ 402 if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
240 kvmppc_44x_shadow_release(vcpu, i); 403 return 0;
241 stlbe->word0 = 0; 404
242 kvmppc_tlbe_set_modified(vcpu, i); 405 gpa = get_tlb_raddr(tlbe);
243 KVMTRACE_5D(STLB_INVAL, vcpu, i, 406 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
244 stlbe->tid, stlbe->word0, stlbe->word1, 407 /* Mapping is not for RAM. */
245 stlbe->word2, handler); 408 return 0;
246 } 409
247 vcpu->arch.swap_pid = 0; 410 return 1;
411}
412
413int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
414{
415 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
416 struct kvmppc_44x_tlbe *tlbe;
417 unsigned int gtlb_index;
418
419 gtlb_index = vcpu->arch.gpr[ra];
420 if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
421 printk("%s: index %d\n", __func__, gtlb_index);
422 kvmppc_dump_vcpu(vcpu);
423 return EMULATE_FAIL;
248 } 424 }
249 425
250 vcpu->arch.shadow_pid = !usermode; 426 tlbe = &vcpu_44x->guest_tlb[gtlb_index];
427
428 /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
429 if (tlbe->word0 & PPC44x_TLB_VALID)
430 kvmppc_44x_invalidate(vcpu, gtlb_index);
431
432 switch (ws) {
433 case PPC44x_TLB_PAGEID:
434 tlbe->tid = get_mmucr_stid(vcpu);
435 tlbe->word0 = vcpu->arch.gpr[rs];
436 break;
437
438 case PPC44x_TLB_XLAT:
439 tlbe->word1 = vcpu->arch.gpr[rs];
440 break;
441
442 case PPC44x_TLB_ATTRIB:
443 tlbe->word2 = vcpu->arch.gpr[rs];
444 break;
445
446 default:
447 return EMULATE_FAIL;
448 }
449
450 if (tlbe_is_host_safe(vcpu, tlbe)) {
451 u64 asid;
452 gva_t eaddr;
453 gpa_t gpaddr;
454 u32 flags;
455 u32 bytes;
456
457 eaddr = get_tlb_eaddr(tlbe);
458 gpaddr = get_tlb_raddr(tlbe);
459
460 /* Use the advertised page size to mask effective and real addrs. */
461 bytes = get_tlb_bytes(tlbe);
462 eaddr &= ~(bytes - 1);
463 gpaddr &= ~(bytes - 1);
464
465 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
466 flags = tlbe->word2 & 0xffff;
467
468 kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
469 }
470
471 KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
472 tlbe->word1, tlbe->word2, handler);
473
474 kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
475 return EMULATE_DONE;
476}
477
478int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
479{
480 u32 ea;
481 int gtlb_index;
482 unsigned int as = get_mmucr_sts(vcpu);
483 unsigned int pid = get_mmucr_stid(vcpu);
484
485 ea = vcpu->arch.gpr[rb];
486 if (ra)
487 ea += vcpu->arch.gpr[ra];
488
489 gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
490 if (rc) {
491 if (gtlb_index < 0)
492 vcpu->arch.cr &= ~0x20000000;
493 else
494 vcpu->arch.cr |= 0x20000000;
495 }
496 vcpu->arch.gpr[rt] = gtlb_index;
497
498 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
499 return EMULATE_DONE;
251} 500}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 2ccd46b6f6b7..772191f29e62 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,48 +25,52 @@
25 25
26extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, 26extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
27 unsigned int pid, unsigned int as); 27 unsigned int pid, unsigned int as);
28extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); 28extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
29extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); 29extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
30
31extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
32 u8 rc);
33extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
30 34
31/* TLB helper functions */ 35/* TLB helper functions */
32static inline unsigned int get_tlb_size(const struct tlbe *tlbe) 36static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
33{ 37{
34 return (tlbe->word0 >> 4) & 0xf; 38 return (tlbe->word0 >> 4) & 0xf;
35} 39}
36 40
37static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) 41static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
38{ 42{
39 return tlbe->word0 & 0xfffffc00; 43 return tlbe->word0 & 0xfffffc00;
40} 44}
41 45
42static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) 46static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
43{ 47{
44 unsigned int pgsize = get_tlb_size(tlbe); 48 unsigned int pgsize = get_tlb_size(tlbe);
45 return 1 << 10 << (pgsize << 1); 49 return 1 << 10 << (pgsize << 1);
46} 50}
47 51
48static inline gva_t get_tlb_end(const struct tlbe *tlbe) 52static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
49{ 53{
50 return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; 54 return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
51} 55}
52 56
53static inline u64 get_tlb_raddr(const struct tlbe *tlbe) 57static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
54{ 58{
55 u64 word1 = tlbe->word1; 59 u64 word1 = tlbe->word1;
56 return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); 60 return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
57} 61}
58 62
59static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) 63static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
60{ 64{
61 return tlbe->tid & 0xff; 65 return tlbe->tid & 0xff;
62} 66}
63 67
64static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) 68static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
65{ 69{
66 return (tlbe->word0 >> 8) & 0x1; 70 return (tlbe->word0 >> 8) & 0x1;
67} 71}
68 72
69static inline unsigned int get_tlb_v(const struct tlbe *tlbe) 73static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
70{ 74{
71 return (tlbe->word0 >> 9) & 0x1; 75 return (tlbe->word0 >> 9) & 0x1;
72} 76}
@@ -81,7 +85,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
81 return (vcpu->arch.mmucr >> 16) & 0x1; 85 return (vcpu->arch.mmucr >> 16) & 0x1;
82} 86}
83 87
84static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) 88static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
85{ 89{
86 unsigned int pgmask = get_tlb_bytes(tlbe) - 1; 90 unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
87 91
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 53aaa66b25e5..6dbdc4817d80 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -15,27 +15,33 @@ menuconfig VIRTUALIZATION
15if VIRTUALIZATION 15if VIRTUALIZATION
16 16
17config KVM 17config KVM
18 bool "Kernel-based Virtual Machine (KVM) support" 18 bool
19 depends on 44x && EXPERIMENTAL
20 select PREEMPT_NOTIFIERS 19 select PREEMPT_NOTIFIERS
21 select ANON_INODES 20 select ANON_INODES
22 # We can only run on Book E hosts so far 21
23 select KVM_BOOKE_HOST 22config KVM_440
23 bool "KVM support for PowerPC 440 processors"
24 depends on EXPERIMENTAL && 44x
25 select KVM
24 ---help--- 26 ---help---
25 Support hosting virtualized guest machines. You will also 27 Support running unmodified 440 guest kernels in virtual machines on
26 need to select one or more of the processor modules below. 28 440 host processors.
27 29
28 This module provides access to the hardware capabilities through 30 This module provides access to the hardware capabilities through
29 a character device node named /dev/kvm. 31 a character device node named /dev/kvm.
30 32
31 If unsure, say N. 33 If unsure, say N.
32 34
33config KVM_BOOKE_HOST 35config KVM_EXIT_TIMING
34 bool "KVM host support for Book E PowerPC processors" 36 bool "Detailed exit timing"
35 depends on KVM && 44x 37 depends on KVM
36 ---help--- 38 ---help---
37 Provides host support for KVM on Book E PowerPC processors. Currently 39 Calculate elapsed time for every exit/enter cycle. A per-vcpu
38 this works on 440 processors only. 40 report is available in debugfs kvm/vm#_vcpu#_timing.
41 The overhead is relatively small, however it is not recommended for
42 production environments.
43
44 If unsure, say N.
39 45
40config KVM_TRACE 46config KVM_TRACE
41 bool "KVM trace support" 47 bool "KVM trace support"
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2a5d4397ac4b..df7ba59e6d53 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,10 +8,16 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
8 8
9common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) 9common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
10 10
11kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o 11kvm-objs := $(common-objs-y) powerpc.o emulate.o
12obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
12obj-$(CONFIG_KVM) += kvm.o 13obj-$(CONFIG_KVM) += kvm.o
13 14
14AFLAGS_booke_interrupts.o := -I$(obj) 15AFLAGS_booke_interrupts.o := -I$(obj)
15 16
16kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o 17kvm-440-objs := \
17obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o 18 booke.o \
19 booke_interrupts.o \
20 44x.o \
21 44x_tlb.o \
22 44x_emulate.o
23obj-$(CONFIG_KVM_440) += kvm-440.o
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke.c
index 7b2591e26bae..35485dd6927e 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke.c
@@ -24,21 +24,26 @@
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/vmalloc.h> 25#include <linux/vmalloc.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27
27#include <asm/cputable.h> 28#include <asm/cputable.h>
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29#include <asm/kvm_ppc.h> 30#include <asm/kvm_ppc.h>
31#include "timing.h"
32#include <asm/cacheflush.h>
33#include <asm/kvm_44x.h>
30 34
35#include "booke.h"
31#include "44x_tlb.h" 36#include "44x_tlb.h"
32 37
38unsigned long kvmppc_booke_handlers;
39
33#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 40#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
34#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 41#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
35 42
36struct kvm_stats_debugfs_item debugfs_entries[] = { 43struct kvm_stats_debugfs_item debugfs_entries[] = {
37 { "exits", VCPU_STAT(sum_exits) },
38 { "mmio", VCPU_STAT(mmio_exits) }, 44 { "mmio", VCPU_STAT(mmio_exits) },
39 { "dcr", VCPU_STAT(dcr_exits) }, 45 { "dcr", VCPU_STAT(dcr_exits) },
40 { "sig", VCPU_STAT(signal_exits) }, 46 { "sig", VCPU_STAT(signal_exits) },
41 { "light", VCPU_STAT(light_exits) },
42 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, 47 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
43 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, 48 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
44 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, 49 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
@@ -53,103 +58,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
53 { NULL } 58 { NULL }
54}; 59};
55 60
56static const u32 interrupt_msr_mask[16] = {
57 [BOOKE_INTERRUPT_CRITICAL] = MSR_ME,
58 [BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
59 [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
60 [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
61 [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE,
62 [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE,
63 [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE,
64 [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
65 [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE,
66 [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
67 [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE,
68 [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE,
69 [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME,
70 [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
71 [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
72 [BOOKE_INTERRUPT_DEBUG] = MSR_ME,
73};
74
75const unsigned char exception_priority[] = {
76 [BOOKE_INTERRUPT_DATA_STORAGE] = 0,
77 [BOOKE_INTERRUPT_INST_STORAGE] = 1,
78 [BOOKE_INTERRUPT_ALIGNMENT] = 2,
79 [BOOKE_INTERRUPT_PROGRAM] = 3,
80 [BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
81 [BOOKE_INTERRUPT_SYSCALL] = 5,
82 [BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
83 [BOOKE_INTERRUPT_DTLB_MISS] = 7,
84 [BOOKE_INTERRUPT_ITLB_MISS] = 8,
85 [BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
86 [BOOKE_INTERRUPT_DEBUG] = 10,
87 [BOOKE_INTERRUPT_CRITICAL] = 11,
88 [BOOKE_INTERRUPT_WATCHDOG] = 12,
89 [BOOKE_INTERRUPT_EXTERNAL] = 13,
90 [BOOKE_INTERRUPT_FIT] = 14,
91 [BOOKE_INTERRUPT_DECREMENTER] = 15,
92};
93
94const unsigned char priority_exception[] = {
95 BOOKE_INTERRUPT_DATA_STORAGE,
96 BOOKE_INTERRUPT_INST_STORAGE,
97 BOOKE_INTERRUPT_ALIGNMENT,
98 BOOKE_INTERRUPT_PROGRAM,
99 BOOKE_INTERRUPT_FP_UNAVAIL,
100 BOOKE_INTERRUPT_SYSCALL,
101 BOOKE_INTERRUPT_AP_UNAVAIL,
102 BOOKE_INTERRUPT_DTLB_MISS,
103 BOOKE_INTERRUPT_ITLB_MISS,
104 BOOKE_INTERRUPT_MACHINE_CHECK,
105 BOOKE_INTERRUPT_DEBUG,
106 BOOKE_INTERRUPT_CRITICAL,
107 BOOKE_INTERRUPT_WATCHDOG,
108 BOOKE_INTERRUPT_EXTERNAL,
109 BOOKE_INTERRUPT_FIT,
110 BOOKE_INTERRUPT_DECREMENTER,
111};
112
113
114void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
115{
116 struct tlbe *tlbe;
117 int i;
118
119 printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
120 printk("| %2s | %3s | %8s | %8s | %8s |\n",
121 "nr", "tid", "word0", "word1", "word2");
122
123 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
124 tlbe = &vcpu->arch.guest_tlb[i];
125 if (tlbe->word0 & PPC44x_TLB_VALID)
126 printk(" G%2d | %02X | %08X | %08X | %08X |\n",
127 i, tlbe->tid, tlbe->word0, tlbe->word1,
128 tlbe->word2);
129 }
130
131 for (i = 0; i < PPC44x_TLB_SIZE; i++) {
132 tlbe = &vcpu->arch.shadow_tlb[i];
133 if (tlbe->word0 & PPC44x_TLB_VALID)
134 printk(" S%2d | %02X | %08X | %08X | %08X |\n",
135 i, tlbe->tid, tlbe->word0, tlbe->word1,
136 tlbe->word2);
137 }
138}
139
140/* TODO: use vcpu_printf() */ 61/* TODO: use vcpu_printf() */
141void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) 62void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
142{ 63{
143 int i; 64 int i;
144 65
145 printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); 66 printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
146 printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); 67 printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
147 printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); 68 printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
148 69
149 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); 70 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
150 71
151 for (i = 0; i < 32; i += 4) { 72 for (i = 0; i < 32; i += 4) {
152 printk("gpr%02d: %08x %08x %08x %08x\n", i, 73 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
153 vcpu->arch.gpr[i], 74 vcpu->arch.gpr[i],
154 vcpu->arch.gpr[i+1], 75 vcpu->arch.gpr[i+1],
155 vcpu->arch.gpr[i+2], 76 vcpu->arch.gpr[i+2],
@@ -157,69 +78,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
157 } 78 }
158} 79}
159 80
160/* Check if we are ready to deliver the interrupt */ 81static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
161static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) 82 unsigned int priority)
162{ 83{
163 int r; 84 set_bit(priority, &vcpu->arch.pending_exceptions);
85}
164 86
165 switch (interrupt) { 87void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
166 case BOOKE_INTERRUPT_CRITICAL: 88{
167 r = vcpu->arch.msr & MSR_CE; 89 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
168 break; 90}
169 case BOOKE_INTERRUPT_MACHINE_CHECK: 91
170 r = vcpu->arch.msr & MSR_ME; 92void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
171 break; 93{
172 case BOOKE_INTERRUPT_EXTERNAL: 94 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
173 r = vcpu->arch.msr & MSR_EE; 95}
96
97int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
98{
99 return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
100}
101
102void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
103 struct kvm_interrupt *irq)
104{
105 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
106}
107
108/* Deliver the interrupt of the corresponding priority, if possible. */
109static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
110 unsigned int priority)
111{
112 int allowed = 0;
113 ulong msr_mask;
114
115 switch (priority) {
116 case BOOKE_IRQPRIO_PROGRAM:
117 case BOOKE_IRQPRIO_DTLB_MISS:
118 case BOOKE_IRQPRIO_ITLB_MISS:
119 case BOOKE_IRQPRIO_SYSCALL:
120 case BOOKE_IRQPRIO_DATA_STORAGE:
121 case BOOKE_IRQPRIO_INST_STORAGE:
122 case BOOKE_IRQPRIO_FP_UNAVAIL:
123 case BOOKE_IRQPRIO_AP_UNAVAIL:
124 case BOOKE_IRQPRIO_ALIGNMENT:
125 allowed = 1;
126 msr_mask = MSR_CE|MSR_ME|MSR_DE;
174 break; 127 break;
175 case BOOKE_INTERRUPT_DECREMENTER: 128 case BOOKE_IRQPRIO_CRITICAL:
176 r = vcpu->arch.msr & MSR_EE; 129 case BOOKE_IRQPRIO_WATCHDOG:
130 allowed = vcpu->arch.msr & MSR_CE;
131 msr_mask = MSR_ME;
177 break; 132 break;
178 case BOOKE_INTERRUPT_FIT: 133 case BOOKE_IRQPRIO_MACHINE_CHECK:
179 r = vcpu->arch.msr & MSR_EE; 134 allowed = vcpu->arch.msr & MSR_ME;
135 msr_mask = 0;
180 break; 136 break;
181 case BOOKE_INTERRUPT_WATCHDOG: 137 case BOOKE_IRQPRIO_EXTERNAL:
182 r = vcpu->arch.msr & MSR_CE; 138 case BOOKE_IRQPRIO_DECREMENTER:
139 case BOOKE_IRQPRIO_FIT:
140 allowed = vcpu->arch.msr & MSR_EE;
141 msr_mask = MSR_CE|MSR_ME|MSR_DE;
183 break; 142 break;
184 case BOOKE_INTERRUPT_DEBUG: 143 case BOOKE_IRQPRIO_DEBUG:
185 r = vcpu->arch.msr & MSR_DE; 144 allowed = vcpu->arch.msr & MSR_DE;
145 msr_mask = MSR_ME;
186 break; 146 break;
187 default:
188 r = 1;
189 } 147 }
190 148
191 return r; 149 if (allowed) {
192} 150 vcpu->arch.srr0 = vcpu->arch.pc;
151 vcpu->arch.srr1 = vcpu->arch.msr;
152 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
153 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
193 154
194static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) 155 clear_bit(priority, &vcpu->arch.pending_exceptions);
195{
196 switch (interrupt) {
197 case BOOKE_INTERRUPT_DECREMENTER:
198 vcpu->arch.tsr |= TSR_DIS;
199 break;
200 } 156 }
201 157
202 vcpu->arch.srr0 = vcpu->arch.pc; 158 return allowed;
203 vcpu->arch.srr1 = vcpu->arch.msr;
204 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
205 kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
206} 159}
207 160
208/* Check pending exceptions and deliver one, if possible. */ 161/* Check pending exceptions and deliver one, if possible. */
209void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) 162void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
210{ 163{
211 unsigned long *pending = &vcpu->arch.pending_exceptions; 164 unsigned long *pending = &vcpu->arch.pending_exceptions;
212 unsigned int exception;
213 unsigned int priority; 165 unsigned int priority;
214 166
215 priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); 167 priority = __ffs(*pending);
216 while (priority <= BOOKE_MAX_INTERRUPT) { 168 while (priority <= BOOKE_MAX_INTERRUPT) {
217 exception = priority_exception[priority]; 169 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
218 if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
219 kvmppc_clear_exception(vcpu, exception);
220 kvmppc_deliver_interrupt(vcpu, exception);
221 break; 170 break;
222 }
223 171
224 priority = find_next_bit(pending, 172 priority = find_next_bit(pending,
225 BITS_PER_BYTE * sizeof(*pending), 173 BITS_PER_BYTE * sizeof(*pending),
@@ -238,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
238 enum emulation_result er; 186 enum emulation_result er;
239 int r = RESUME_HOST; 187 int r = RESUME_HOST;
240 188
189 /* update before a new last_exit_type is rewritten */
190 kvmppc_update_timing_stats(vcpu);
191
241 local_irq_enable(); 192 local_irq_enable();
242 193
243 run->exit_reason = KVM_EXIT_UNKNOWN; 194 run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -251,21 +202,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
251 break; 202 break;
252 203
253 case BOOKE_INTERRUPT_EXTERNAL: 204 case BOOKE_INTERRUPT_EXTERNAL:
205 kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
206 if (need_resched())
207 cond_resched();
208 r = RESUME_GUEST;
209 break;
210
254 case BOOKE_INTERRUPT_DECREMENTER: 211 case BOOKE_INTERRUPT_DECREMENTER:
255 /* Since we switched IVPR back to the host's value, the host 212 /* Since we switched IVPR back to the host's value, the host
256 * handled this interrupt the moment we enabled interrupts. 213 * handled this interrupt the moment we enabled interrupts.
257 * Now we just offer it a chance to reschedule the guest. */ 214 * Now we just offer it a chance to reschedule the guest. */
258 215 kvmppc_account_exit(vcpu, DEC_EXITS);
259 /* XXX At this point the TLB still holds our shadow TLB, so if
260 * we do reschedule the host will fault over it. Perhaps we
261 * should politely restore the host's entries to minimize
262 * misses before ceding control. */
263 if (need_resched()) 216 if (need_resched())
264 cond_resched(); 217 cond_resched();
265 if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
266 vcpu->stat.dec_exits++;
267 else
268 vcpu->stat.ext_intr_exits++;
269 r = RESUME_GUEST; 218 r = RESUME_GUEST;
270 break; 219 break;
271 220
@@ -274,17 +223,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
274 /* Program traps generated by user-level software must be handled 223 /* Program traps generated by user-level software must be handled
275 * by the guest kernel. */ 224 * by the guest kernel. */
276 vcpu->arch.esr = vcpu->arch.fault_esr; 225 vcpu->arch.esr = vcpu->arch.fault_esr;
277 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); 226 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
278 r = RESUME_GUEST; 227 r = RESUME_GUEST;
228 kvmppc_account_exit(vcpu, USR_PR_INST);
279 break; 229 break;
280 } 230 }
281 231
282 er = kvmppc_emulate_instruction(run, vcpu); 232 er = kvmppc_emulate_instruction(run, vcpu);
283 switch (er) { 233 switch (er) {
284 case EMULATE_DONE: 234 case EMULATE_DONE:
235 /* don't overwrite subtypes, just account kvm_stats */
236 kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
285 /* Future optimization: only reload non-volatiles if 237 /* Future optimization: only reload non-volatiles if
286 * they were actually modified by emulation. */ 238 * they were actually modified by emulation. */
287 vcpu->stat.emulated_inst_exits++;
288 r = RESUME_GUEST_NV; 239 r = RESUME_GUEST_NV;
289 break; 240 break;
290 case EMULATE_DO_DCR: 241 case EMULATE_DO_DCR:
@@ -293,7 +244,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
293 break; 244 break;
294 case EMULATE_FAIL: 245 case EMULATE_FAIL:
295 /* XXX Deliver Program interrupt to guest. */ 246 /* XXX Deliver Program interrupt to guest. */
296 printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", 247 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
297 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 248 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
298 /* For debugging, encode the failing instruction and 249 /* For debugging, encode the failing instruction and
299 * report it to userspace. */ 250 * report it to userspace. */
@@ -307,48 +258,53 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
307 break; 258 break;
308 259
309 case BOOKE_INTERRUPT_FP_UNAVAIL: 260 case BOOKE_INTERRUPT_FP_UNAVAIL:
310 kvmppc_queue_exception(vcpu, exit_nr); 261 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
262 kvmppc_account_exit(vcpu, FP_UNAVAIL);
311 r = RESUME_GUEST; 263 r = RESUME_GUEST;
312 break; 264 break;
313 265
314 case BOOKE_INTERRUPT_DATA_STORAGE: 266 case BOOKE_INTERRUPT_DATA_STORAGE:
315 vcpu->arch.dear = vcpu->arch.fault_dear; 267 vcpu->arch.dear = vcpu->arch.fault_dear;
316 vcpu->arch.esr = vcpu->arch.fault_esr; 268 vcpu->arch.esr = vcpu->arch.fault_esr;
317 kvmppc_queue_exception(vcpu, exit_nr); 269 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
318 vcpu->stat.dsi_exits++; 270 kvmppc_account_exit(vcpu, DSI_EXITS);
319 r = RESUME_GUEST; 271 r = RESUME_GUEST;
320 break; 272 break;
321 273
322 case BOOKE_INTERRUPT_INST_STORAGE: 274 case BOOKE_INTERRUPT_INST_STORAGE:
323 vcpu->arch.esr = vcpu->arch.fault_esr; 275 vcpu->arch.esr = vcpu->arch.fault_esr;
324 kvmppc_queue_exception(vcpu, exit_nr); 276 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
325 vcpu->stat.isi_exits++; 277 kvmppc_account_exit(vcpu, ISI_EXITS);
326 r = RESUME_GUEST; 278 r = RESUME_GUEST;
327 break; 279 break;
328 280
329 case BOOKE_INTERRUPT_SYSCALL: 281 case BOOKE_INTERRUPT_SYSCALL:
330 kvmppc_queue_exception(vcpu, exit_nr); 282 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
331 vcpu->stat.syscall_exits++; 283 kvmppc_account_exit(vcpu, SYSCALL_EXITS);
332 r = RESUME_GUEST; 284 r = RESUME_GUEST;
333 break; 285 break;
334 286
287 /* XXX move to a 440-specific file. */
335 case BOOKE_INTERRUPT_DTLB_MISS: { 288 case BOOKE_INTERRUPT_DTLB_MISS: {
336 struct tlbe *gtlbe; 289 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
290 struct kvmppc_44x_tlbe *gtlbe;
337 unsigned long eaddr = vcpu->arch.fault_dear; 291 unsigned long eaddr = vcpu->arch.fault_dear;
292 int gtlb_index;
338 gfn_t gfn; 293 gfn_t gfn;
339 294
340 /* Check the guest TLB. */ 295 /* Check the guest TLB. */
341 gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); 296 gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
342 if (!gtlbe) { 297 if (gtlb_index < 0) {
343 /* The guest didn't have a mapping for it. */ 298 /* The guest didn't have a mapping for it. */
344 kvmppc_queue_exception(vcpu, exit_nr); 299 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
345 vcpu->arch.dear = vcpu->arch.fault_dear; 300 vcpu->arch.dear = vcpu->arch.fault_dear;
346 vcpu->arch.esr = vcpu->arch.fault_esr; 301 vcpu->arch.esr = vcpu->arch.fault_esr;
347 vcpu->stat.dtlb_real_miss_exits++; 302 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
348 r = RESUME_GUEST; 303 r = RESUME_GUEST;
349 break; 304 break;
350 } 305 }
351 306
307 gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
352 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); 308 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
353 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; 309 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
354 310
@@ -359,38 +315,45 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
359 * b) the guest used a large mapping which we're faking 315 * b) the guest used a large mapping which we're faking
360 * Either way, we need to satisfy the fault without 316 * Either way, we need to satisfy the fault without
361 * invoking the guest. */ 317 * invoking the guest. */
362 kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, 318 kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
363 gtlbe->word2); 319 gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
364 vcpu->stat.dtlb_virt_miss_exits++; 320 kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
365 r = RESUME_GUEST; 321 r = RESUME_GUEST;
366 } else { 322 } else {
367 /* Guest has mapped and accessed a page which is not 323 /* Guest has mapped and accessed a page which is not
368 * actually RAM. */ 324 * actually RAM. */
369 r = kvmppc_emulate_mmio(run, vcpu); 325 r = kvmppc_emulate_mmio(run, vcpu);
326 kvmppc_account_exit(vcpu, MMIO_EXITS);
370 } 327 }
371 328
372 break; 329 break;
373 } 330 }
374 331
332 /* XXX move to a 440-specific file. */
375 case BOOKE_INTERRUPT_ITLB_MISS: { 333 case BOOKE_INTERRUPT_ITLB_MISS: {
376 struct tlbe *gtlbe; 334 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
335 struct kvmppc_44x_tlbe *gtlbe;
377 unsigned long eaddr = vcpu->arch.pc; 336 unsigned long eaddr = vcpu->arch.pc;
337 gpa_t gpaddr;
378 gfn_t gfn; 338 gfn_t gfn;
339 int gtlb_index;
379 340
380 r = RESUME_GUEST; 341 r = RESUME_GUEST;
381 342
382 /* Check the guest TLB. */ 343 /* Check the guest TLB. */
383 gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); 344 gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
384 if (!gtlbe) { 345 if (gtlb_index < 0) {
385 /* The guest didn't have a mapping for it. */ 346 /* The guest didn't have a mapping for it. */
386 kvmppc_queue_exception(vcpu, exit_nr); 347 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
387 vcpu->stat.itlb_real_miss_exits++; 348 kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
388 break; 349 break;
389 } 350 }
390 351
391 vcpu->stat.itlb_virt_miss_exits++; 352 kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
392 353
393 gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; 354 gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
355 gpaddr = tlb_xlate(gtlbe, eaddr);
356 gfn = gpaddr >> PAGE_SHIFT;
394 357
395 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { 358 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
396 /* The guest TLB had a mapping, but the shadow TLB 359 /* The guest TLB had a mapping, but the shadow TLB
@@ -399,12 +362,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
399 * b) the guest used a large mapping which we're faking 362 * b) the guest used a large mapping which we're faking
400 * Either way, we need to satisfy the fault without 363 * Either way, we need to satisfy the fault without
401 * invoking the guest. */ 364 * invoking the guest. */
402 kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, 365 kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
403 gtlbe->word2); 366 gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
404 } else { 367 } else {
405 /* Guest mapped and leaped at non-RAM! */ 368 /* Guest mapped and leaped at non-RAM! */
406 kvmppc_queue_exception(vcpu, 369 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
407 BOOKE_INTERRUPT_MACHINE_CHECK);
408 } 370 }
409 371
410 break; 372 break;
@@ -421,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
421 mtspr(SPRN_DBSR, dbsr); 383 mtspr(SPRN_DBSR, dbsr);
422 384
423 run->exit_reason = KVM_EXIT_DEBUG; 385 run->exit_reason = KVM_EXIT_DEBUG;
386 kvmppc_account_exit(vcpu, DEBUG_EXITS);
424 r = RESUME_HOST; 387 r = RESUME_HOST;
425 break; 388 break;
426 } 389 }
@@ -432,10 +395,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
432 395
433 local_irq_disable(); 396 local_irq_disable();
434 397
435 kvmppc_check_and_deliver_interrupts(vcpu); 398 kvmppc_core_deliver_interrupts(vcpu);
436 399
437 /* Do some exit accounting. */
438 vcpu->stat.sum_exits++;
439 if (!(r & RESUME_HOST)) { 400 if (!(r & RESUME_HOST)) {
440 /* To avoid clobbering exit_reason, only check for signals if 401 /* To avoid clobbering exit_reason, only check for signals if
441 * we aren't already exiting to userspace for some other 402 * we aren't already exiting to userspace for some other
@@ -443,22 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
443 if (signal_pending(current)) { 404 if (signal_pending(current)) {
444 run->exit_reason = KVM_EXIT_INTR; 405 run->exit_reason = KVM_EXIT_INTR;
445 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); 406 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
446 407 kvmppc_account_exit(vcpu, SIGNAL_EXITS);
447 vcpu->stat.signal_exits++;
448 } else {
449 vcpu->stat.light_exits++;
450 }
451 } else {
452 switch (run->exit_reason) {
453 case KVM_EXIT_MMIO:
454 vcpu->stat.mmio_exits++;
455 break;
456 case KVM_EXIT_DCR:
457 vcpu->stat.dcr_exits++;
458 break;
459 case KVM_EXIT_INTR:
460 vcpu->stat.signal_exits++;
461 break;
462 } 408 }
463 } 409 }
464 410
@@ -468,20 +414,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
468/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ 414/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
469int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 415int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
470{ 416{
471 struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
472
473 tlbe->tid = 0;
474 tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
475 tlbe->word1 = 0;
476 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
477
478 tlbe++;
479 tlbe->tid = 0;
480 tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
481 tlbe->word1 = 0xef600000;
482 tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
483 | PPC44x_TLB_I | PPC44x_TLB_G;
484
485 vcpu->arch.pc = 0; 417 vcpu->arch.pc = 0;
486 vcpu->arch.msr = 0; 418 vcpu->arch.msr = 0;
487 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ 419 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@@ -492,12 +424,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
492 * before it's programmed its own IVPR. */ 424 * before it's programmed its own IVPR. */
493 vcpu->arch.ivpr = 0x55550000; 425 vcpu->arch.ivpr = 0x55550000;
494 426
495 /* Since the guest can directly access the timebase, it must know the 427 kvmppc_init_timing_stats(vcpu);
496 * real timebase frequency. Accordingly, it must see the state of
497 * CCR1[TCS]. */
498 vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
499 428
500 return 0; 429 return kvmppc_core_vcpu_setup(vcpu);
501} 430}
502 431
503int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 432int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -536,7 +465,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
536 vcpu->arch.ctr = regs->ctr; 465 vcpu->arch.ctr = regs->ctr;
537 vcpu->arch.lr = regs->lr; 466 vcpu->arch.lr = regs->lr;
538 vcpu->arch.xer = regs->xer; 467 vcpu->arch.xer = regs->xer;
539 vcpu->arch.msr = regs->msr; 468 kvmppc_set_msr(vcpu, regs->msr);
540 vcpu->arch.srr0 = regs->srr0; 469 vcpu->arch.srr0 = regs->srr0;
541 vcpu->arch.srr1 = regs->srr1; 470 vcpu->arch.srr1 = regs->srr1;
542 vcpu->arch.sprg0 = regs->sprg0; 471 vcpu->arch.sprg0 = regs->sprg0;
@@ -575,31 +504,62 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
575 return -ENOTSUPP; 504 return -ENOTSUPP;
576} 505}
577 506
578/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
579int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 507int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
580 struct kvm_translation *tr) 508 struct kvm_translation *tr)
581{ 509{
582 struct tlbe *gtlbe; 510 return kvmppc_core_vcpu_translate(vcpu, tr);
583 int index; 511}
584 gva_t eaddr;
585 u8 pid;
586 u8 as;
587
588 eaddr = tr->linear_address;
589 pid = (tr->linear_address >> 32) & 0xff;
590 as = (tr->linear_address >> 40) & 0x1;
591
592 index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
593 if (index == -1) {
594 tr->valid = 0;
595 return 0;
596 }
597 512
598 gtlbe = &vcpu->arch.guest_tlb[index]; 513int kvmppc_booke_init(void)
514{
515 unsigned long ivor[16];
516 unsigned long max_ivor = 0;
517 int i;
599 518
600 tr->physical_address = tlb_xlate(gtlbe, eaddr); 519 /* We install our own exception handlers by hijacking IVPR. IVPR must
601 /* XXX what does "writeable" and "usermode" even mean? */ 520 * be 16-bit aligned, so we need a 64KB allocation. */
602 tr->valid = 1; 521 kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
522 VCPU_SIZE_ORDER);
523 if (!kvmppc_booke_handlers)
524 return -ENOMEM;
525
526 /* XXX make sure our handlers are smaller than Linux's */
527
528 /* Copy our interrupt handlers to match host IVORs. That way we don't
529 * have to swap the IVORs on every guest/host transition. */
530 ivor[0] = mfspr(SPRN_IVOR0);
531 ivor[1] = mfspr(SPRN_IVOR1);
532 ivor[2] = mfspr(SPRN_IVOR2);
533 ivor[3] = mfspr(SPRN_IVOR3);
534 ivor[4] = mfspr(SPRN_IVOR4);
535 ivor[5] = mfspr(SPRN_IVOR5);
536 ivor[6] = mfspr(SPRN_IVOR6);
537 ivor[7] = mfspr(SPRN_IVOR7);
538 ivor[8] = mfspr(SPRN_IVOR8);
539 ivor[9] = mfspr(SPRN_IVOR9);
540 ivor[10] = mfspr(SPRN_IVOR10);
541 ivor[11] = mfspr(SPRN_IVOR11);
542 ivor[12] = mfspr(SPRN_IVOR12);
543 ivor[13] = mfspr(SPRN_IVOR13);
544 ivor[14] = mfspr(SPRN_IVOR14);
545 ivor[15] = mfspr(SPRN_IVOR15);
546
547 for (i = 0; i < 16; i++) {
548 if (ivor[i] > max_ivor)
549 max_ivor = ivor[i];
550
551 memcpy((void *)kvmppc_booke_handlers + ivor[i],
552 kvmppc_handlers_start + i * kvmppc_handler_len,
553 kvmppc_handler_len);
554 }
555 flush_icache_range(kvmppc_booke_handlers,
556 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
603 557
604 return 0; 558 return 0;
605} 559}
560
561void __exit kvmppc_booke_exit(void)
562{
563 free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
564 kvm_exit();
565}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
new file mode 100644
index 000000000000..cf7c94ca24bf
--- /dev/null
+++ b/arch/powerpc/kvm/booke.h
@@ -0,0 +1,60 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#ifndef __KVM_BOOKE_H__
21#define __KVM_BOOKE_H__
22
23#include <linux/types.h>
24#include <linux/kvm_host.h>
25#include "timing.h"
26
27/* interrupt priortity ordering */
28#define BOOKE_IRQPRIO_DATA_STORAGE 0
29#define BOOKE_IRQPRIO_INST_STORAGE 1
30#define BOOKE_IRQPRIO_ALIGNMENT 2
31#define BOOKE_IRQPRIO_PROGRAM 3
32#define BOOKE_IRQPRIO_FP_UNAVAIL 4
33#define BOOKE_IRQPRIO_SYSCALL 5
34#define BOOKE_IRQPRIO_AP_UNAVAIL 6
35#define BOOKE_IRQPRIO_DTLB_MISS 7
36#define BOOKE_IRQPRIO_ITLB_MISS 8
37#define BOOKE_IRQPRIO_MACHINE_CHECK 9
38#define BOOKE_IRQPRIO_DEBUG 10
39#define BOOKE_IRQPRIO_CRITICAL 11
40#define BOOKE_IRQPRIO_WATCHDOG 12
41#define BOOKE_IRQPRIO_EXTERNAL 13
42#define BOOKE_IRQPRIO_FIT 14
43#define BOOKE_IRQPRIO_DECREMENTER 15
44
45/* Helper function for "full" MSR writes. No need to call this if only EE is
46 * changing. */
47static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
48{
49 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
50 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
51
52 vcpu->arch.msr = new_msr;
53
54 if (vcpu->arch.msr & MSR_WE) {
55 kvm_vcpu_block(vcpu);
56 kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
57 };
58}
59
60#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
deleted file mode 100644
index b480341bc31e..000000000000
--- a/arch/powerpc/kvm/booke_host.c
+++ /dev/null
@@ -1,83 +0,0 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 */
19
20#include <linux/errno.h>
21#include <linux/kvm_host.h>
22#include <linux/module.h>
23#include <asm/cacheflush.h>
24#include <asm/kvm_ppc.h>
25
26unsigned long kvmppc_booke_handlers;
27
28static int kvmppc_booke_init(void)
29{
30 unsigned long ivor[16];
31 unsigned long max_ivor = 0;
32 int i;
33
34 /* We install our own exception handlers by hijacking IVPR. IVPR must
35 * be 16-bit aligned, so we need a 64KB allocation. */
36 kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
37 VCPU_SIZE_ORDER);
38 if (!kvmppc_booke_handlers)
39 return -ENOMEM;
40
41 /* XXX make sure our handlers are smaller than Linux's */
42
43 /* Copy our interrupt handlers to match host IVORs. That way we don't
44 * have to swap the IVORs on every guest/host transition. */
45 ivor[0] = mfspr(SPRN_IVOR0);
46 ivor[1] = mfspr(SPRN_IVOR1);
47 ivor[2] = mfspr(SPRN_IVOR2);
48 ivor[3] = mfspr(SPRN_IVOR3);
49 ivor[4] = mfspr(SPRN_IVOR4);
50 ivor[5] = mfspr(SPRN_IVOR5);
51 ivor[6] = mfspr(SPRN_IVOR6);
52 ivor[7] = mfspr(SPRN_IVOR7);
53 ivor[8] = mfspr(SPRN_IVOR8);
54 ivor[9] = mfspr(SPRN_IVOR9);
55 ivor[10] = mfspr(SPRN_IVOR10);
56 ivor[11] = mfspr(SPRN_IVOR11);
57 ivor[12] = mfspr(SPRN_IVOR12);
58 ivor[13] = mfspr(SPRN_IVOR13);
59 ivor[14] = mfspr(SPRN_IVOR14);
60 ivor[15] = mfspr(SPRN_IVOR15);
61
62 for (i = 0; i < 16; i++) {
63 if (ivor[i] > max_ivor)
64 max_ivor = ivor[i];
65
66 memcpy((void *)kvmppc_booke_handlers + ivor[i],
67 kvmppc_handlers_start + i * kvmppc_handler_len,
68 kvmppc_handler_len);
69 }
70 flush_icache_range(kvmppc_booke_handlers,
71 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
72
73 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
74}
75
76static void __exit kvmppc_booke_exit(void)
77{
78 free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
79 kvm_exit();
80}
81
82module_init(kvmppc_booke_init)
83module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 95e165baf85f..084ebcd7dd83 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host)
107 li r6, 1 107 li r6, 1
108 slw r6, r6, r5 108 slw r6, r6, r5
109 109
110#ifdef CONFIG_KVM_EXIT_TIMING
111 /* save exit time */
1121:
113 mfspr r7, SPRN_TBRU
114 mfspr r8, SPRN_TBRL
115 mfspr r9, SPRN_TBRU
116 cmpw r9, r7
117 bne 1b
118 stw r8, VCPU_TIMING_EXIT_TBL(r4)
119 stw r9, VCPU_TIMING_EXIT_TBU(r4)
120#endif
121
110 /* Save the faulting instruction and all GPRs for emulation. */ 122 /* Save the faulting instruction and all GPRs for emulation. */
111 andi. r7, r6, NEED_INST_MASK 123 andi. r7, r6, NEED_INST_MASK
112 beq ..skip_inst_copy 124 beq ..skip_inst_copy
@@ -335,54 +347,6 @@ lightweight_exit:
335 lwz r3, VCPU_SHADOW_PID(r4) 347 lwz r3, VCPU_SHADOW_PID(r4)
336 mtspr SPRN_PID, r3 348 mtspr SPRN_PID, r3
337 349
338 /* Prevent all asynchronous TLB updates. */
339 mfmsr r5
340 lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
341 ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
342 andc r6, r5, r6
343 mtmsr r6
344
345 /* Load the guest mappings, leaving the host's "pinned" kernel mappings
346 * in place. */
347 mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
348 li r5, PPC44x_TLB_SIZE
349 lis r5, tlb_44x_hwater@ha
350 lwz r5, tlb_44x_hwater@l(r5)
351 mtctr r5
352 addi r9, r4, VCPU_SHADOW_TLB
353 addi r5, r4, VCPU_SHADOW_MOD
354 li r3, 0
3551:
356 lbzx r7, r3, r5
357 cmpwi r7, 0
358 beq 3f
359
360 /* Load guest entry. */
361 mulli r11, r3, TLBE_BYTES
362 add r11, r11, r9
363 lwz r7, 0(r11)
364 mtspr SPRN_MMUCR, r7
365 lwz r7, 4(r11)
366 tlbwe r7, r3, PPC44x_TLB_PAGEID
367 lwz r7, 8(r11)
368 tlbwe r7, r3, PPC44x_TLB_XLAT
369 lwz r7, 12(r11)
370 tlbwe r7, r3, PPC44x_TLB_ATTRIB
3713:
372 addi r3, r3, 1 /* Increment index. */
373 bdnz 1b
374
375 mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
376
377 /* Clear bitmap of modified TLB entries */
378 li r5, PPC44x_TLB_SIZE>>2
379 mtctr r5
380 addi r5, r4, VCPU_SHADOW_MOD - 4
381 li r6, 0
3821:
383 stwu r6, 4(r5)
384 bdnz 1b
385
386 iccci 0, 0 /* XXX hack */ 350 iccci 0, 0 /* XXX hack */
387 351
388 /* Load some guest volatiles. */ 352 /* Load some guest volatiles. */
@@ -423,6 +387,18 @@ lightweight_exit:
423 lwz r3, VCPU_SPRG7(r4) 387 lwz r3, VCPU_SPRG7(r4)
424 mtspr SPRN_SPRG7, r3 388 mtspr SPRN_SPRG7, r3
425 389
390#ifdef CONFIG_KVM_EXIT_TIMING
391 /* save enter time */
3921:
393 mfspr r6, SPRN_TBRU
394 mfspr r7, SPRN_TBRL
395 mfspr r8, SPRN_TBRU
396 cmpw r8, r6
397 bne 1b
398 stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
399 stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
400#endif
401
426 /* Finish loading guest volatiles and jump to guest. */ 402 /* Finish loading guest volatiles and jump to guest. */
427 lwz r3, VCPU_CTR(r4) 403 lwz r3, VCPU_CTR(r4)
428 mtctr r3 404 mtctr r3
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 0fce4fbdc20d..d1d38daa93fb 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,161 +23,14 @@
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25 25
26#include <asm/dcr.h> 26#include <asm/reg.h>
27#include <asm/dcr-regs.h>
28#include <asm/time.h> 27#include <asm/time.h>
29#include <asm/byteorder.h> 28#include <asm/byteorder.h>
30#include <asm/kvm_ppc.h> 29#include <asm/kvm_ppc.h>
30#include <asm/disassemble.h>
31#include "timing.h"
31 32
32#include "44x_tlb.h" 33void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
33
34/* Instruction decoding */
35static inline unsigned int get_op(u32 inst)
36{
37 return inst >> 26;
38}
39
40static inline unsigned int get_xop(u32 inst)
41{
42 return (inst >> 1) & 0x3ff;
43}
44
45static inline unsigned int get_sprn(u32 inst)
46{
47 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
48}
49
50static inline unsigned int get_dcrn(u32 inst)
51{
52 return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
53}
54
55static inline unsigned int get_rt(u32 inst)
56{
57 return (inst >> 21) & 0x1f;
58}
59
60static inline unsigned int get_rs(u32 inst)
61{
62 return (inst >> 21) & 0x1f;
63}
64
65static inline unsigned int get_ra(u32 inst)
66{
67 return (inst >> 16) & 0x1f;
68}
69
70static inline unsigned int get_rb(u32 inst)
71{
72 return (inst >> 11) & 0x1f;
73}
74
75static inline unsigned int get_rc(u32 inst)
76{
77 return inst & 0x1;
78}
79
80static inline unsigned int get_ws(u32 inst)
81{
82 return (inst >> 11) & 0x1f;
83}
84
85static inline unsigned int get_d(u32 inst)
86{
87 return inst & 0xffff;
88}
89
90static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
91 const struct tlbe *tlbe)
92{
93 gpa_t gpa;
94
95 if (!get_tlb_v(tlbe))
96 return 0;
97
98 /* Does it match current guest AS? */
99 /* XXX what about IS != DS? */
100 if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
101 return 0;
102
103 gpa = get_tlb_raddr(tlbe);
104 if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
105 /* Mapping is not for RAM. */
106 return 0;
107
108 return 1;
109}
110
111static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
112{
113 u64 eaddr;
114 u64 raddr;
115 u64 asid;
116 u32 flags;
117 struct tlbe *tlbe;
118 unsigned int ra;
119 unsigned int rs;
120 unsigned int ws;
121 unsigned int index;
122
123 ra = get_ra(inst);
124 rs = get_rs(inst);
125 ws = get_ws(inst);
126
127 index = vcpu->arch.gpr[ra];
128 if (index > PPC44x_TLB_SIZE) {
129 printk("%s: index %d\n", __func__, index);
130 kvmppc_dump_vcpu(vcpu);
131 return EMULATE_FAIL;
132 }
133
134 tlbe = &vcpu->arch.guest_tlb[index];
135
136 /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
137 if (tlbe->word0 & PPC44x_TLB_VALID) {
138 eaddr = get_tlb_eaddr(tlbe);
139 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
140 kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
141 }
142
143 switch (ws) {
144 case PPC44x_TLB_PAGEID:
145 tlbe->tid = vcpu->arch.mmucr & 0xff;
146 tlbe->word0 = vcpu->arch.gpr[rs];
147 break;
148
149 case PPC44x_TLB_XLAT:
150 tlbe->word1 = vcpu->arch.gpr[rs];
151 break;
152
153 case PPC44x_TLB_ATTRIB:
154 tlbe->word2 = vcpu->arch.gpr[rs];
155 break;
156
157 default:
158 return EMULATE_FAIL;
159 }
160
161 if (tlbe_is_host_safe(vcpu, tlbe)) {
162 eaddr = get_tlb_eaddr(tlbe);
163 raddr = get_tlb_raddr(tlbe);
164 asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
165 flags = tlbe->word2 & 0xffff;
166
167 /* Create a 4KB mapping on the host. If the guest wanted a
168 * large page, only the first 4KB is mapped here and the rest
169 * are mapped on the fly. */
170 kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
171 }
172
173 KVMTRACE_5D(GTLB_WRITE, vcpu, index,
174 tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
175 handler);
176
177 return EMULATE_DONE;
178}
179
180static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
181{ 34{
182 if (vcpu->arch.tcr & TCR_DIE) { 35 if (vcpu->arch.tcr & TCR_DIE) {
183 /* The decrementer ticks at the same rate as the timebase, so 36 /* The decrementer ticks at the same rate as the timebase, so
@@ -193,12 +46,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
193 } 46 }
194} 47}
195 48
196static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
197{
198 vcpu->arch.pc = vcpu->arch.srr0;
199 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
200}
201
202/* XXX to do: 49/* XXX to do:
203 * lhax 50 * lhax
204 * lhaux 51 * lhaux
@@ -213,40 +60,30 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
213 * 60 *
214 * XXX is_bigendian should depend on MMU mapping or MSR[LE] 61 * XXX is_bigendian should depend on MMU mapping or MSR[LE]
215 */ 62 */
63/* XXX Should probably auto-generate instruction decoding for a particular core
64 * from opcode tables in the future. */
216int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 65int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
217{ 66{
218 u32 inst = vcpu->arch.last_inst; 67 u32 inst = vcpu->arch.last_inst;
219 u32 ea; 68 u32 ea;
220 int ra; 69 int ra;
221 int rb; 70 int rb;
222 int rc;
223 int rs; 71 int rs;
224 int rt; 72 int rt;
225 int sprn; 73 int sprn;
226 int dcrn;
227 enum emulation_result emulated = EMULATE_DONE; 74 enum emulation_result emulated = EMULATE_DONE;
228 int advance = 1; 75 int advance = 1;
229 76
77 /* this default type might be overwritten by subcategories */
78 kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
79
230 switch (get_op(inst)) { 80 switch (get_op(inst)) {
231 case 3: /* trap */ 81 case 3: /* trap */
232 printk("trap!\n"); 82 vcpu->arch.esr |= ESR_PTR;
233 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); 83 kvmppc_core_queue_program(vcpu);
234 advance = 0; 84 advance = 0;
235 break; 85 break;
236 86
237 case 19:
238 switch (get_xop(inst)) {
239 case 50: /* rfi */
240 kvmppc_emul_rfi(vcpu);
241 advance = 0;
242 break;
243
244 default:
245 emulated = EMULATE_FAIL;
246 break;
247 }
248 break;
249
250 case 31: 87 case 31:
251 switch (get_xop(inst)) { 88 switch (get_xop(inst)) {
252 89
@@ -255,27 +92,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
255 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 92 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
256 break; 93 break;
257 94
258 case 83: /* mfmsr */
259 rt = get_rt(inst);
260 vcpu->arch.gpr[rt] = vcpu->arch.msr;
261 break;
262
263 case 87: /* lbzx */ 95 case 87: /* lbzx */
264 rt = get_rt(inst); 96 rt = get_rt(inst);
265 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 97 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
266 break; 98 break;
267 99
268 case 131: /* wrtee */
269 rs = get_rs(inst);
270 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
271 | (vcpu->arch.gpr[rs] & MSR_EE);
272 break;
273
274 case 146: /* mtmsr */
275 rs = get_rs(inst);
276 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
277 break;
278
279 case 151: /* stwx */ 100 case 151: /* stwx */
280 rs = get_rs(inst); 101 rs = get_rs(inst);
281 emulated = kvmppc_handle_store(run, vcpu, 102 emulated = kvmppc_handle_store(run, vcpu,
@@ -283,11 +104,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
283 4, 1); 104 4, 1);
284 break; 105 break;
285 106
286 case 163: /* wrteei */
287 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
288 | (inst & MSR_EE);
289 break;
290
291 case 215: /* stbx */ 107 case 215: /* stbx */
292 rs = get_rs(inst); 108 rs = get_rs(inst);
293 emulated = kvmppc_handle_store(run, vcpu, 109 emulated = kvmppc_handle_store(run, vcpu,
@@ -328,42 +144,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
328 vcpu->arch.gpr[ra] = ea; 144 vcpu->arch.gpr[ra] = ea;
329 break; 145 break;
330 146
331 case 323: /* mfdcr */
332 dcrn = get_dcrn(inst);
333 rt = get_rt(inst);
334
335 /* The guest may access CPR0 registers to determine the timebase
336 * frequency, and it must know the real host frequency because it
337 * can directly access the timebase registers.
338 *
339 * It would be possible to emulate those accesses in userspace,
340 * but userspace can really only figure out the end frequency.
341 * We could decompose that into the factors that compute it, but
342 * that's tricky math, and it's easier to just report the real
343 * CPR0 values.
344 */
345 switch (dcrn) {
346 case DCRN_CPR0_CONFIG_ADDR:
347 vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
348 break;
349 case DCRN_CPR0_CONFIG_DATA:
350 local_irq_disable();
351 mtdcr(DCRN_CPR0_CONFIG_ADDR,
352 vcpu->arch.cpr0_cfgaddr);
353 vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
354 local_irq_enable();
355 break;
356 default:
357 run->dcr.dcrn = dcrn;
358 run->dcr.data = 0;
359 run->dcr.is_write = 0;
360 vcpu->arch.io_gpr = rt;
361 vcpu->arch.dcr_needed = 1;
362 emulated = EMULATE_DO_DCR;
363 }
364
365 break;
366
367 case 339: /* mfspr */ 147 case 339: /* mfspr */
368 sprn = get_sprn(inst); 148 sprn = get_sprn(inst);
369 rt = get_rt(inst); 149 rt = get_rt(inst);
@@ -373,26 +153,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
373 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; 153 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
374 case SPRN_SRR1: 154 case SPRN_SRR1:
375 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; 155 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
376 case SPRN_MMUCR:
377 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
378 case SPRN_PID:
379 vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
380 case SPRN_IVPR:
381 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
382 case SPRN_CCR0:
383 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
384 case SPRN_CCR1:
385 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
386 case SPRN_PVR: 156 case SPRN_PVR:
387 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; 157 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
388 case SPRN_DEAR:
389 vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
390 case SPRN_ESR:
391 vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
392 case SPRN_DBCR0:
393 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
394 case SPRN_DBCR1:
395 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
396 158
397 /* Note: mftb and TBRL/TBWL are user-accessible, so 159 /* Note: mftb and TBRL/TBWL are user-accessible, so
398 * the guest can always access the real TB anyways. 160 * the guest can always access the real TB anyways.
@@ -413,42 +175,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
413 /* Note: SPRG4-7 are user-readable, so we don't get 175 /* Note: SPRG4-7 are user-readable, so we don't get
414 * a trap. */ 176 * a trap. */
415 177
416 case SPRN_IVOR0:
417 vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
418 case SPRN_IVOR1:
419 vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
420 case SPRN_IVOR2:
421 vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
422 case SPRN_IVOR3:
423 vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
424 case SPRN_IVOR4:
425 vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
426 case SPRN_IVOR5:
427 vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
428 case SPRN_IVOR6:
429 vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
430 case SPRN_IVOR7:
431 vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
432 case SPRN_IVOR8:
433 vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
434 case SPRN_IVOR9:
435 vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
436 case SPRN_IVOR10:
437 vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
438 case SPRN_IVOR11:
439 vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
440 case SPRN_IVOR12:
441 vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
442 case SPRN_IVOR13:
443 vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
444 case SPRN_IVOR14:
445 vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
446 case SPRN_IVOR15:
447 vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
448
449 default: 178 default:
450 printk("mfspr: unknown spr %x\n", sprn); 179 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
451 vcpu->arch.gpr[rt] = 0; 180 if (emulated == EMULATE_FAIL) {
181 printk("mfspr: unknown spr %x\n", sprn);
182 vcpu->arch.gpr[rt] = 0;
183 }
452 break; 184 break;
453 } 185 }
454 break; 186 break;
@@ -478,25 +210,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
478 vcpu->arch.gpr[ra] = ea; 210 vcpu->arch.gpr[ra] = ea;
479 break; 211 break;
480 212
481 case 451: /* mtdcr */
482 dcrn = get_dcrn(inst);
483 rs = get_rs(inst);
484
485 /* emulate some access in kernel */
486 switch (dcrn) {
487 case DCRN_CPR0_CONFIG_ADDR:
488 vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
489 break;
490 default:
491 run->dcr.dcrn = dcrn;
492 run->dcr.data = vcpu->arch.gpr[rs];
493 run->dcr.is_write = 1;
494 vcpu->arch.dcr_needed = 1;
495 emulated = EMULATE_DO_DCR;
496 }
497
498 break;
499
500 case 467: /* mtspr */ 213 case 467: /* mtspr */
501 sprn = get_sprn(inst); 214 sprn = get_sprn(inst);
502 rs = get_rs(inst); 215 rs = get_rs(inst);
@@ -505,22 +218,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
505 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; 218 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
506 case SPRN_SRR1: 219 case SPRN_SRR1:
507 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; 220 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
508 case SPRN_MMUCR:
509 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
510 case SPRN_PID:
511 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
512 case SPRN_CCR0:
513 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
514 case SPRN_CCR1:
515 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
516 case SPRN_DEAR:
517 vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
518 case SPRN_ESR:
519 vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
520 case SPRN_DBCR0:
521 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
522 case SPRN_DBCR1:
523 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
524 221
525 /* XXX We need to context-switch the timebase for 222 /* XXX We need to context-switch the timebase for
526 * watchdog and FIT. */ 223 * watchdog and FIT. */
@@ -532,14 +229,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
532 kvmppc_emulate_dec(vcpu); 229 kvmppc_emulate_dec(vcpu);
533 break; 230 break;
534 231
535 case SPRN_TSR:
536 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
537
538 case SPRN_TCR:
539 vcpu->arch.tcr = vcpu->arch.gpr[rs];
540 kvmppc_emulate_dec(vcpu);
541 break;
542
543 case SPRN_SPRG0: 232 case SPRN_SPRG0:
544 vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; 233 vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
545 case SPRN_SPRG1: 234 case SPRN_SPRG1:
@@ -549,56 +238,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
549 case SPRN_SPRG3: 238 case SPRN_SPRG3:
550 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; 239 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
551 240
552 /* Note: SPRG4-7 are user-readable. These values are
553 * loaded into the real SPRGs when resuming the
554 * guest. */
555 case SPRN_SPRG4:
556 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
557 case SPRN_SPRG5:
558 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
559 case SPRN_SPRG6:
560 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
561 case SPRN_SPRG7:
562 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
563
564 case SPRN_IVPR:
565 vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
566 case SPRN_IVOR0:
567 vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
568 case SPRN_IVOR1:
569 vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
570 case SPRN_IVOR2:
571 vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
572 case SPRN_IVOR3:
573 vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
574 case SPRN_IVOR4:
575 vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
576 case SPRN_IVOR5:
577 vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
578 case SPRN_IVOR6:
579 vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
580 case SPRN_IVOR7:
581 vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
582 case SPRN_IVOR8:
583 vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
584 case SPRN_IVOR9:
585 vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
586 case SPRN_IVOR10:
587 vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
588 case SPRN_IVOR11:
589 vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
590 case SPRN_IVOR12:
591 vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
592 case SPRN_IVOR13:
593 vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
594 case SPRN_IVOR14:
595 vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
596 case SPRN_IVOR15:
597 vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
598
599 default: 241 default:
600 printk("mtspr: unknown spr %x\n", sprn); 242 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
601 emulated = EMULATE_FAIL; 243 if (emulated == EMULATE_FAIL)
244 printk("mtspr: unknown spr %x\n", sprn);
602 break; 245 break;
603 } 246 }
604 break; 247 break;
@@ -629,36 +272,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
629 4, 0); 272 4, 0);
630 break; 273 break;
631 274
632 case 978: /* tlbwe */
633 emulated = kvmppc_emul_tlbwe(vcpu, inst);
634 break;
635
636 case 914: { /* tlbsx */
637 int index;
638 unsigned int as = get_mmucr_sts(vcpu);
639 unsigned int pid = get_mmucr_stid(vcpu);
640
641 rt = get_rt(inst);
642 ra = get_ra(inst);
643 rb = get_rb(inst);
644 rc = get_rc(inst);
645
646 ea = vcpu->arch.gpr[rb];
647 if (ra)
648 ea += vcpu->arch.gpr[ra];
649
650 index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
651 if (rc) {
652 if (index < 0)
653 vcpu->arch.cr &= ~0x20000000;
654 else
655 vcpu->arch.cr |= 0x20000000;
656 }
657 vcpu->arch.gpr[rt] = index;
658
659 }
660 break;
661
662 case 790: /* lhbrx */ 275 case 790: /* lhbrx */
663 rt = get_rt(inst); 276 rt = get_rt(inst);
664 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); 277 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@@ -674,14 +287,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
674 2, 0); 287 2, 0);
675 break; 288 break;
676 289
677 case 966: /* iccci */
678 break;
679
680 default: 290 default:
681 printk("unknown: op %d xop %d\n", get_op(inst), 291 /* Attempt core-specific emulation below. */
682 get_xop(inst));
683 emulated = EMULATE_FAIL; 292 emulated = EMULATE_FAIL;
684 break;
685 } 293 }
686 break; 294 break;
687 295
@@ -764,12 +372,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
764 break; 372 break;
765 373
766 default: 374 default:
767 printk("unknown op %d\n", get_op(inst));
768 emulated = EMULATE_FAIL; 375 emulated = EMULATE_FAIL;
769 break;
770 } 376 }
771 377
772 KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit); 378 if (emulated == EMULATE_FAIL) {
379 emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
380 if (emulated == EMULATE_FAIL) {
381 advance = 0;
382 printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
383 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
384 }
385 }
386
387 KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
773 388
774 if (advance) 389 if (advance)
775 vcpu->arch.pc += 4; /* Advance past emulated instruction. */ 390 vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8bef0efcdfe1..2822c8ccfaaf 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -28,9 +28,9 @@
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/kvm_ppc.h> 29#include <asm/kvm_ppc.h>
30#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
31#include "timing.h"
31#include "../mm/mmu_decl.h" 32#include "../mm/mmu_decl.h"
32 33
33
34gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 34gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
35{ 35{
36 return gfn; 36 return gfn;
@@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void)
99 99
100void kvm_arch_check_processor_compat(void *rtn) 100void kvm_arch_check_processor_compat(void *rtn)
101{ 101{
102 int r; 102 *(int *)rtn = kvmppc_core_check_processor_compat();
103
104 if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
105 r = 0;
106 else
107 r = -ENOTSUPP;
108
109 *(int *)rtn = r;
110} 103}
111 104
112struct kvm *kvm_arch_create_vm(void) 105struct kvm *kvm_arch_create_vm(void)
@@ -144,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext)
144 int r; 137 int r;
145 138
146 switch (ext) { 139 switch (ext) {
147 case KVM_CAP_USER_MEMORY:
148 r = 1;
149 break;
150 case KVM_CAP_COALESCED_MMIO: 140 case KVM_CAP_COALESCED_MMIO:
151 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 141 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
152 break; 142 break;
@@ -179,30 +169,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
179struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) 169struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
180{ 170{
181 struct kvm_vcpu *vcpu; 171 struct kvm_vcpu *vcpu;
182 int err; 172 vcpu = kvmppc_core_vcpu_create(kvm, id);
183 173 kvmppc_create_vcpu_debugfs(vcpu, id);
184 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
185 if (!vcpu) {
186 err = -ENOMEM;
187 goto out;
188 }
189
190 err = kvm_vcpu_init(vcpu, kvm, id);
191 if (err)
192 goto free_vcpu;
193
194 return vcpu; 174 return vcpu;
195
196free_vcpu:
197 kmem_cache_free(kvm_vcpu_cache, vcpu);
198out:
199 return ERR_PTR(err);
200} 175}
201 176
202void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 177void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
203{ 178{
204 kvm_vcpu_uninit(vcpu); 179 kvmppc_remove_vcpu_debugfs(vcpu);
205 kmem_cache_free(kvm_vcpu_cache, vcpu); 180 kvmppc_core_vcpu_free(vcpu);
206} 181}
207 182
208void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 183void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -212,16 +187,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
212 187
213int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 188int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
214{ 189{
215 unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; 190 return kvmppc_core_pending_dec(vcpu);
216
217 return test_bit(priority, &vcpu->arch.pending_exceptions);
218} 191}
219 192
220static void kvmppc_decrementer_func(unsigned long data) 193static void kvmppc_decrementer_func(unsigned long data)
221{ 194{
222 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 195 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
223 196
224 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); 197 kvmppc_core_queue_dec(vcpu);
225 198
226 if (waitqueue_active(&vcpu->wq)) { 199 if (waitqueue_active(&vcpu->wq)) {
227 wake_up_interruptible(&vcpu->wq); 200 wake_up_interruptible(&vcpu->wq);
@@ -242,96 +215,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
242 kvmppc_core_destroy_mmu(vcpu); 215 kvmppc_core_destroy_mmu(vcpu);
243} 216}
244 217
245/* Note: clearing MSR[DE] just means that the debug interrupt will not be
246 * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
247 * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
248 * will be delivered as an "imprecise debug event" (which is indicated by
249 * DBSR[IDE].
250 */
251static void kvmppc_disable_debug_interrupts(void)
252{
253 mtmsr(mfmsr() & ~MSR_DE);
254}
255
256static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
257{
258 kvmppc_disable_debug_interrupts();
259
260 mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
261 mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
262 mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
263 mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
264 mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
265 mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
266 mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
267 mtmsr(vcpu->arch.host_msr);
268}
269
270static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
271{
272 struct kvm_guest_debug *dbg = &vcpu->guest_debug;
273 u32 dbcr0 = 0;
274
275 vcpu->arch.host_msr = mfmsr();
276 kvmppc_disable_debug_interrupts();
277
278 /* Save host debug register state. */
279 vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
280 vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
281 vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
282 vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
283 vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
284 vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
285 vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
286
287 /* set registers up for guest */
288
289 if (dbg->bp[0]) {
290 mtspr(SPRN_IAC1, dbg->bp[0]);
291 dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
292 }
293 if (dbg->bp[1]) {
294 mtspr(SPRN_IAC2, dbg->bp[1]);
295 dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
296 }
297 if (dbg->bp[2]) {
298 mtspr(SPRN_IAC3, dbg->bp[2]);
299 dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
300 }
301 if (dbg->bp[3]) {
302 mtspr(SPRN_IAC4, dbg->bp[3]);
303 dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
304 }
305
306 mtspr(SPRN_DBCR0, dbcr0);
307 mtspr(SPRN_DBCR1, 0);
308 mtspr(SPRN_DBCR2, 0);
309}
310
311void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 218void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
312{ 219{
313 int i;
314
315 if (vcpu->guest_debug.enabled) 220 if (vcpu->guest_debug.enabled)
316 kvmppc_load_guest_debug_registers(vcpu); 221 kvmppc_core_load_guest_debugstate(vcpu);
317 222
318 /* Mark every guest entry in the shadow TLB entry modified, so that they 223 kvmppc_core_vcpu_load(vcpu, cpu);
319 * will all be reloaded on the next vcpu run (instead of being
320 * demand-faulted). */
321 for (i = 0; i <= tlb_44x_hwater; i++)
322 kvmppc_tlbe_set_modified(vcpu, i);
323} 224}
324 225
325void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 226void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
326{ 227{
327 if (vcpu->guest_debug.enabled) 228 if (vcpu->guest_debug.enabled)
328 kvmppc_restore_host_debug_state(vcpu); 229 kvmppc_core_load_host_debugstate(vcpu);
329 230
330 /* Don't leave guest TLB entries resident when being de-scheduled. */ 231 /* Don't leave guest TLB entries resident when being de-scheduled. */
331 /* XXX It would be nice to differentiate between heavyweight exit and 232 /* XXX It would be nice to differentiate between heavyweight exit and
332 * sched_out here, since we could avoid the TLB flush for heavyweight 233 * sched_out here, since we could avoid the TLB flush for heavyweight
333 * exits. */ 234 * exits. */
334 _tlbil_all(); 235 _tlbil_all();
236 kvmppc_core_vcpu_put(vcpu);
335} 237}
336 238
337int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 239int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -355,14 +257,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
355static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 257static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
356 struct kvm_run *run) 258 struct kvm_run *run)
357{ 259{
358 u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 260 ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
359 *gpr = run->dcr.data; 261 *gpr = run->dcr.data;
360} 262}
361 263
362static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 264static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
363 struct kvm_run *run) 265 struct kvm_run *run)
364{ 266{
365 u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 267 ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
366 268
367 if (run->mmio.len > sizeof(*gpr)) { 269 if (run->mmio.len > sizeof(*gpr)) {
368 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); 270 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -460,7 +362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
460 vcpu->arch.dcr_needed = 0; 362 vcpu->arch.dcr_needed = 0;
461 } 363 }
462 364
463 kvmppc_check_and_deliver_interrupts(vcpu); 365 kvmppc_core_deliver_interrupts(vcpu);
464 366
465 local_irq_disable(); 367 local_irq_disable();
466 kvm_guest_enter(); 368 kvm_guest_enter();
@@ -478,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
478 380
479int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 381int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
480{ 382{
481 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); 383 kvmppc_core_queue_external(vcpu, irq);
482 384
483 if (waitqueue_active(&vcpu->wq)) { 385 if (waitqueue_active(&vcpu->wq)) {
484 wake_up_interruptible(&vcpu->wq); 386 wake_up_interruptible(&vcpu->wq);
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
new file mode 100644
index 000000000000..47ee603f558e
--- /dev/null
+++ b/arch/powerpc/kvm/timing.c
@@ -0,0 +1,239 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19 */
20
21#include <linux/kvm_host.h>
22#include <linux/fs.h>
23#include <linux/seq_file.h>
24#include <linux/debugfs.h>
25#include <linux/uaccess.h>
26
27#include <asm/time.h>
28#include <asm-generic/div64.h>
29
30#include "timing.h"
31
32void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
33{
34 int i;
35
36 /* pause guest execution to avoid concurrent updates */
37 local_irq_disable();
38 mutex_lock(&vcpu->mutex);
39
40 vcpu->arch.last_exit_type = 0xDEAD;
41 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
42 vcpu->arch.timing_count_type[i] = 0;
43 vcpu->arch.timing_max_duration[i] = 0;
44 vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
45 vcpu->arch.timing_sum_duration[i] = 0;
46 vcpu->arch.timing_sum_quad_duration[i] = 0;
47 }
48 vcpu->arch.timing_last_exit = 0;
49 vcpu->arch.timing_exit.tv64 = 0;
50 vcpu->arch.timing_last_enter.tv64 = 0;
51
52 mutex_unlock(&vcpu->mutex);
53 local_irq_enable();
54}
55
56static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
57{
58 u64 old;
59
60 do_div(duration, tb_ticks_per_usec);
61 if (unlikely(duration > 0xFFFFFFFF)) {
62 printk(KERN_ERR"%s - duration too big -> overflow"
63 " duration %lld type %d exit #%d\n",
64 __func__, duration, type,
65 vcpu->arch.timing_count_type[type]);
66 return;
67 }
68
69 vcpu->arch.timing_count_type[type]++;
70
71 /* sum */
72 old = vcpu->arch.timing_sum_duration[type];
73 vcpu->arch.timing_sum_duration[type] += duration;
74 if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
75 printk(KERN_ERR"%s - wrap adding sum of durations"
76 " old %lld new %lld type %d exit # of type %d\n",
77 __func__, old, vcpu->arch.timing_sum_duration[type],
78 type, vcpu->arch.timing_count_type[type]);
79 }
80
81 /* square sum */
82 old = vcpu->arch.timing_sum_quad_duration[type];
83 vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
84 if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
85 printk(KERN_ERR"%s - wrap adding sum of squared durations"
86 " old %lld new %lld type %d exit # of type %d\n",
87 __func__, old,
88 vcpu->arch.timing_sum_quad_duration[type],
89 type, vcpu->arch.timing_count_type[type]);
90 }
91
92 /* set min/max */
93 if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
94 vcpu->arch.timing_min_duration[type] = duration;
95 if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
96 vcpu->arch.timing_max_duration[type] = duration;
97}
98
99void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
100{
101 u64 exit = vcpu->arch.timing_last_exit;
102 u64 enter = vcpu->arch.timing_last_enter.tv64;
103
104 /* save exit time, used next exit when the reenter time is known */
105 vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
106
107 if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
108 return; /* skip incomplete cycle (e.g. after reset) */
109
110 /* update statistics for average and standard deviation */
111 add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
112 /* enter -> timing_last_exit is time spent in guest - log this too */
113 add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
114 TIMEINGUEST);
115}
116
117static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
118 [MMIO_EXITS] = "MMIO",
119 [DCR_EXITS] = "DCR",
120 [SIGNAL_EXITS] = "SIGNAL",
121 [ITLB_REAL_MISS_EXITS] = "ITLBREAL",
122 [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT",
123 [DTLB_REAL_MISS_EXITS] = "DTLBREAL",
124 [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT",
125 [SYSCALL_EXITS] = "SYSCALL",
126 [ISI_EXITS] = "ISI",
127 [DSI_EXITS] = "DSI",
128 [EMULATED_INST_EXITS] = "EMULINST",
129 [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT",
130 [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE",
131 [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR",
132 [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR",
133 [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR",
134 [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR",
135 [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX",
136 [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE",
137 [EMULATED_RFI_EXITS] = "EMUL_RFI",
138 [DEC_EXITS] = "DEC",
139 [EXT_INTR_EXITS] = "EXTINT",
140 [HALT_WAKEUP] = "HALT",
141 [USR_PR_INST] = "USR_PR_INST",
142 [FP_UNAVAIL] = "FP_UNAVAIL",
143 [DEBUG_EXITS] = "DEBUG",
144 [TIMEINGUEST] = "TIMEINGUEST"
145};
146
147static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
148{
149 struct kvm_vcpu *vcpu = m->private;
150 int i;
151
152 seq_printf(m, "%s", "type count min max sum sum_squared\n");
153
154 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
155 seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
156 kvm_exit_names[i],
157 vcpu->arch.timing_count_type[i],
158 vcpu->arch.timing_min_duration[i],
159 vcpu->arch.timing_max_duration[i],
160 vcpu->arch.timing_sum_duration[i],
161 vcpu->arch.timing_sum_quad_duration[i]);
162 }
163 return 0;
164}
165
166/* Write 'c' to clear the timing statistics. */
167static ssize_t kvmppc_exit_timing_write(struct file *file,
168 const char __user *user_buf,
169 size_t count, loff_t *ppos)
170{
171 int err = -EINVAL;
172 char c;
173
174 if (count > 1) {
175 goto done;
176 }
177
178 if (get_user(c, user_buf)) {
179 err = -EFAULT;
180 goto done;
181 }
182
183 if (c == 'c') {
184 struct seq_file *seqf = (struct seq_file *)file->private_data;
185 struct kvm_vcpu *vcpu = seqf->private;
186 /* Write does not affect our buffers previously generated with
187 * show. seq_file is locked here to prevent races of init with
188 * a show call */
189 mutex_lock(&seqf->lock);
190 kvmppc_init_timing_stats(vcpu);
191 mutex_unlock(&seqf->lock);
192 err = count;
193 }
194
195done:
196 return err;
197}
198
199static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
200{
201 return single_open(file, kvmppc_exit_timing_show, inode->i_private);
202}
203
204static struct file_operations kvmppc_exit_timing_fops = {
205 .owner = THIS_MODULE,
206 .open = kvmppc_exit_timing_open,
207 .read = seq_read,
208 .write = kvmppc_exit_timing_write,
209 .llseek = seq_lseek,
210 .release = single_release,
211};
212
213void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
214{
215 static char dbg_fname[50];
216 struct dentry *debugfs_file;
217
218 snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
219 current->pid, id);
220 debugfs_file = debugfs_create_file(dbg_fname, 0666,
221 kvm_debugfs_dir, vcpu,
222 &kvmppc_exit_timing_fops);
223
224 if (!debugfs_file) {
225 printk(KERN_ERR"%s: error creating debugfs file %s\n",
226 __func__, dbg_fname);
227 return;
228 }
229
230 vcpu->arch.debugfs_exit_timing = debugfs_file;
231}
232
233void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
234{
235 if (vcpu->arch.debugfs_exit_timing) {
236 debugfs_remove(vcpu->arch.debugfs_exit_timing);
237 vcpu->arch.debugfs_exit_timing = NULL;
238 }
239}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
new file mode 100644
index 000000000000..bb13b1f3cd5a
--- /dev/null
+++ b/arch/powerpc/kvm/timing.h
@@ -0,0 +1,102 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright IBM Corp. 2008
16 *
17 * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
18 */
19
20#ifndef __POWERPC_KVM_EXITTIMING_H__
21#define __POWERPC_KVM_EXITTIMING_H__
22
23#include <linux/kvm_host.h>
24#include <asm/kvm_host.h>
25
26#ifdef CONFIG_KVM_EXIT_TIMING
27void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
28void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
29void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
30void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
31
32static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
33{
34 vcpu->arch.last_exit_type = type;
35}
36
37#else
38/* if exit timing is not configured there is no need to build the c file */
39static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
40static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
41static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
42 unsigned int id) {}
43static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
44static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
45#endif /* CONFIG_KVM_EXIT_TIMING */
46
47/* account the exit in kvm_stats */
48static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
49{
50 /* type has to be known at build time for optimization */
51 BUILD_BUG_ON(__builtin_constant_p(type));
52 switch (type) {
53 case EXT_INTR_EXITS:
54 vcpu->stat.ext_intr_exits++;
55 break;
56 case DEC_EXITS:
57 vcpu->stat.dec_exits++;
58 break;
59 case EMULATED_INST_EXITS:
60 vcpu->stat.emulated_inst_exits++;
61 break;
62 case DCR_EXITS:
63 vcpu->stat.dcr_exits++;
64 break;
65 case DSI_EXITS:
66 vcpu->stat.dsi_exits++;
67 break;
68 case ISI_EXITS:
69 vcpu->stat.isi_exits++;
70 break;
71 case SYSCALL_EXITS:
72 vcpu->stat.syscall_exits++;
73 break;
74 case DTLB_REAL_MISS_EXITS:
75 vcpu->stat.dtlb_real_miss_exits++;
76 break;
77 case DTLB_VIRT_MISS_EXITS:
78 vcpu->stat.dtlb_virt_miss_exits++;
79 break;
80 case MMIO_EXITS:
81 vcpu->stat.mmio_exits++;
82 break;
83 case ITLB_REAL_MISS_EXITS:
84 vcpu->stat.itlb_real_miss_exits++;
85 break;
86 case ITLB_VIRT_MISS_EXITS:
87 vcpu->stat.itlb_virt_miss_exits++;
88 break;
89 case SIGNAL_EXITS:
90 vcpu->stat.signal_exits++;
91 break;
92 }
93}
94
95/* wrapper to set exit time and account for it in kvm_stats */
96static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
97{
98 kvmppc_set_exit_type(vcpu, type);
99 kvmppc_account_exit_stat(vcpu, type);
100}
101
102#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index f7a69021b7bf..84e058f1e1cc 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq)
332 lpar_xirr_info_set((0xff << 24) | irq); 332 lpar_xirr_info_set((0xff << 24) | irq);
333} 333}
334 334
335static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) 335static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
336{ 336{
337 unsigned int irq; 337 unsigned int irq;
338 int status; 338 int status;
@@ -870,7 +870,7 @@ void xics_migrate_irqs_away(void)
870 870
871 /* Reset affinity to all cpus */ 871 /* Reset affinity to all cpus */
872 irq_desc[virq].affinity = CPU_MASK_ALL; 872 irq_desc[virq].affinity = CPU_MASK_ALL;
873 desc->chip->set_affinity(virq, CPU_MASK_ALL); 873 desc->chip->set_affinity(virq, cpu_all_mask);
874unlock: 874unlock:
875 spin_unlock_irqrestore(&desc->lock, flags); 875 spin_unlock_irqrestore(&desc->lock, flags);
876 } 876 }
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index c82babb70074..3e0d89dcdba2 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -806,7 +806,7 @@ static void mpic_end_ipi(unsigned int irq)
806 806
807#endif /* CONFIG_SMP */ 807#endif /* CONFIG_SMP */
808 808
809void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) 809void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
810{ 810{
811 struct mpic *mpic = mpic_from_irq(irq); 811 struct mpic *mpic = mpic_from_irq(irq);
812 unsigned int src = mpic_irq_to_hw(irq); 812 unsigned int src = mpic_irq_to_hw(irq);
@@ -818,7 +818,7 @@ void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
818 } else { 818 } else {
819 cpumask_t tmp; 819 cpumask_t tmp;
820 820
821 cpus_and(tmp, cpumask, cpu_online_map); 821 cpumask_and(&tmp, cpumask, cpu_online_mask);
822 822
823 mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 823 mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
824 mpic_physmask(cpus_addr(tmp)[0])); 824 mpic_physmask(cpus_addr(tmp)[0]));
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 6209c62a426d..3cef2af10f42 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
36 36
37extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type); 37extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
38extern void mpic_set_vector(unsigned int virq, unsigned int vector); 38extern void mpic_set_vector(unsigned int virq, unsigned int vector);
39extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask); 39extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
40 40
41#endif /* _POWERPC_SYSDEV_MPIC_H */ 41#endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8152fefc97b9..19577aeffd7b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -83,6 +83,7 @@ config S390
83 select HAVE_KRETPROBES 83 select HAVE_KRETPROBES
84 select HAVE_KVM if 64BIT 84 select HAVE_KVM if 64BIT
85 select HAVE_ARCH_TRACEHOOK 85 select HAVE_ARCH_TRACEHOOK
86 select INIT_ALL_POSSIBLE
86 87
87source "init/Kconfig" 88source "init/Kconfig"
88 89
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index e5a6a9ba3adf..d60a2eefb17b 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -14,7 +14,6 @@
14 14
15struct s390_idle_data { 15struct s390_idle_data {
16 spinlock_t lock; 16 spinlock_t lock;
17 unsigned int in_idle;
18 unsigned long long idle_count; 17 unsigned long long idle_count;
19 unsigned long long idle_enter; 18 unsigned long long idle_enter;
20 unsigned long long idle_time; 19 unsigned long long idle_time;
@@ -22,12 +21,12 @@ struct s390_idle_data {
22 21
23DECLARE_PER_CPU(struct s390_idle_data, s390_idle); 22DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
24 23
25void s390_idle_leave(void); 24void vtime_start_cpu(void);
26 25
27static inline void s390_idle_check(void) 26static inline void s390_idle_check(void)
28{ 27{
29 if ((&__get_cpu_var(s390_idle))->in_idle) 28 if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
30 s390_idle_leave(); 29 vtime_start_cpu();
31} 30}
32 31
33#endif /* _ASM_S390_CPU_H_ */ 32#endif /* _ASM_S390_CPU_H_ */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 133ce054fc89..521726430afa 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -11,7 +11,7 @@
11 11
12#include <asm/div64.h> 12#include <asm/div64.h>
13 13
14/* We want to use micro-second resolution. */ 14/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
15 15
16typedef unsigned long long cputime_t; 16typedef unsigned long long cputime_t;
17typedef unsigned long long cputime64_t; 17typedef unsigned long long cputime64_t;
@@ -53,9 +53,9 @@ __div(unsigned long long n, unsigned int base)
53#define cputime_ge(__a, __b) ((__a) >= (__b)) 53#define cputime_ge(__a, __b) ((__a) >= (__b))
54#define cputime_lt(__a, __b) ((__a) < (__b)) 54#define cputime_lt(__a, __b) ((__a) < (__b))
55#define cputime_le(__a, __b) ((__a) <= (__b)) 55#define cputime_le(__a, __b) ((__a) <= (__b))
56#define cputime_to_jiffies(__ct) (__div((__ct), 1000000 / HZ)) 56#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ))
57#define cputime_to_scaled(__ct) (__ct) 57#define cputime_to_scaled(__ct) (__ct)
58#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (1000000 / HZ)) 58#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ))
59 59
60#define cputime64_zero (0ULL) 60#define cputime64_zero (0ULL)
61#define cputime64_add(__a, __b) ((__a) + (__b)) 61#define cputime64_add(__a, __b) ((__a) + (__b))
@@ -64,7 +64,7 @@ __div(unsigned long long n, unsigned int base)
64static inline u64 64static inline u64
65cputime64_to_jiffies64(cputime64_t cputime) 65cputime64_to_jiffies64(cputime64_t cputime)
66{ 66{
67 do_div(cputime, 1000000 / HZ); 67 do_div(cputime, 4096000000ULL / HZ);
68 return cputime; 68 return cputime;
69} 69}
70 70
@@ -74,13 +74,13 @@ cputime64_to_jiffies64(cputime64_t cputime)
74static inline unsigned int 74static inline unsigned int
75cputime_to_msecs(const cputime_t cputime) 75cputime_to_msecs(const cputime_t cputime)
76{ 76{
77 return __div(cputime, 1000); 77 return __div(cputime, 4096000);
78} 78}
79 79
80static inline cputime_t 80static inline cputime_t
81msecs_to_cputime(const unsigned int m) 81msecs_to_cputime(const unsigned int m)
82{ 82{
83 return (cputime_t) m * 1000; 83 return (cputime_t) m * 4096000;
84} 84}
85 85
86/* 86/*
@@ -89,13 +89,13 @@ msecs_to_cputime(const unsigned int m)
89static inline unsigned int 89static inline unsigned int
90cputime_to_secs(const cputime_t cputime) 90cputime_to_secs(const cputime_t cputime)
91{ 91{
92 return __div(cputime, 1000000); 92 return __div(cputime, 2048000000) >> 1;
93} 93}
94 94
95static inline cputime_t 95static inline cputime_t
96secs_to_cputime(const unsigned int s) 96secs_to_cputime(const unsigned int s)
97{ 97{
98 return (cputime_t) s * 1000000; 98 return (cputime_t) s * 4096000000ULL;
99} 99}
100 100
101/* 101/*
@@ -104,7 +104,7 @@ secs_to_cputime(const unsigned int s)
104static inline cputime_t 104static inline cputime_t
105timespec_to_cputime(const struct timespec *value) 105timespec_to_cputime(const struct timespec *value)
106{ 106{
107 return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000; 107 return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
108} 108}
109 109
110static inline void 110static inline void
@@ -114,12 +114,12 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
114 register_pair rp; 114 register_pair rp;
115 115
116 rp.pair = cputime >> 1; 116 rp.pair = cputime >> 1;
117 asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); 117 asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
118 value->tv_nsec = rp.subreg.even * 1000; 118 value->tv_nsec = rp.subreg.even * 1000 / 4096;
119 value->tv_sec = rp.subreg.odd; 119 value->tv_sec = rp.subreg.odd;
120#else 120#else
121 value->tv_nsec = (cputime % 1000000) * 1000; 121 value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
122 value->tv_sec = cputime / 1000000; 122 value->tv_sec = cputime / 4096000000ULL;
123#endif 123#endif
124} 124}
125 125
@@ -131,7 +131,7 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
131static inline cputime_t 131static inline cputime_t
132timeval_to_cputime(const struct timeval *value) 132timeval_to_cputime(const struct timeval *value)
133{ 133{
134 return value->tv_usec + (u64) value->tv_sec * 1000000; 134 return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
135} 135}
136 136
137static inline void 137static inline void
@@ -141,12 +141,12 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
141 register_pair rp; 141 register_pair rp;
142 142
143 rp.pair = cputime >> 1; 143 rp.pair = cputime >> 1;
144 asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); 144 asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
145 value->tv_usec = rp.subreg.even; 145 value->tv_usec = rp.subreg.even / 4096;
146 value->tv_sec = rp.subreg.odd; 146 value->tv_sec = rp.subreg.odd;
147#else 147#else
148 value->tv_usec = cputime % 1000000; 148 value->tv_usec = cputime % 4096000000ULL;
149 value->tv_sec = cputime / 1000000; 149 value->tv_sec = cputime / 4096000000ULL;
150#endif 150#endif
151} 151}
152 152
@@ -156,13 +156,13 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
156static inline clock_t 156static inline clock_t
157cputime_to_clock_t(cputime_t cputime) 157cputime_to_clock_t(cputime_t cputime)
158{ 158{
159 return __div(cputime, 1000000 / USER_HZ); 159 return __div(cputime, 4096000000ULL / USER_HZ);
160} 160}
161 161
162static inline cputime_t 162static inline cputime_t
163clock_t_to_cputime(unsigned long x) 163clock_t_to_cputime(unsigned long x)
164{ 164{
165 return (cputime_t) x * (1000000 / USER_HZ); 165 return (cputime_t) x * (4096000000ULL / USER_HZ);
166} 166}
167 167
168/* 168/*
@@ -171,7 +171,7 @@ clock_t_to_cputime(unsigned long x)
171static inline clock_t 171static inline clock_t
172cputime64_to_clock_t(cputime64_t cputime) 172cputime64_to_clock_t(cputime64_t cputime)
173{ 173{
174 return __div(cputime, 1000000 / USER_HZ); 174 return __div(cputime, 4096000000ULL / USER_HZ);
175} 175}
176 176
177#endif /* _S390_CPUTIME_H */ 177#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 0bc51d52a899..ffdef5fe8587 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -67,11 +67,11 @@
67#define __LC_SYNC_ENTER_TIMER 0x248 67#define __LC_SYNC_ENTER_TIMER 0x248
68#define __LC_ASYNC_ENTER_TIMER 0x250 68#define __LC_ASYNC_ENTER_TIMER 0x250
69#define __LC_EXIT_TIMER 0x258 69#define __LC_EXIT_TIMER 0x258
70#define __LC_LAST_UPDATE_TIMER 0x260 70#define __LC_USER_TIMER 0x260
71#define __LC_USER_TIMER 0x268 71#define __LC_SYSTEM_TIMER 0x268
72#define __LC_SYSTEM_TIMER 0x270 72#define __LC_STEAL_TIMER 0x270
73#define __LC_LAST_UPDATE_CLOCK 0x278 73#define __LC_LAST_UPDATE_TIMER 0x278
74#define __LC_STEAL_CLOCK 0x280 74#define __LC_LAST_UPDATE_CLOCK 0x280
75#define __LC_RETURN_MCCK_PSW 0x288 75#define __LC_RETURN_MCCK_PSW 0x288
76#define __LC_KERNEL_STACK 0xC40 76#define __LC_KERNEL_STACK 0xC40
77#define __LC_THREAD_INFO 0xC44 77#define __LC_THREAD_INFO 0xC44
@@ -89,11 +89,11 @@
89#define __LC_SYNC_ENTER_TIMER 0x250 89#define __LC_SYNC_ENTER_TIMER 0x250
90#define __LC_ASYNC_ENTER_TIMER 0x258 90#define __LC_ASYNC_ENTER_TIMER 0x258
91#define __LC_EXIT_TIMER 0x260 91#define __LC_EXIT_TIMER 0x260
92#define __LC_LAST_UPDATE_TIMER 0x268 92#define __LC_USER_TIMER 0x268
93#define __LC_USER_TIMER 0x270 93#define __LC_SYSTEM_TIMER 0x270
94#define __LC_SYSTEM_TIMER 0x278 94#define __LC_STEAL_TIMER 0x278
95#define __LC_LAST_UPDATE_CLOCK 0x280 95#define __LC_LAST_UPDATE_TIMER 0x280
96#define __LC_STEAL_CLOCK 0x288 96#define __LC_LAST_UPDATE_CLOCK 0x288
97#define __LC_RETURN_MCCK_PSW 0x290 97#define __LC_RETURN_MCCK_PSW 0x290
98#define __LC_KERNEL_STACK 0xD40 98#define __LC_KERNEL_STACK 0xD40
99#define __LC_THREAD_INFO 0xD48 99#define __LC_THREAD_INFO 0xD48
@@ -106,8 +106,10 @@
106#define __LC_IPLDEV 0xDB8 106#define __LC_IPLDEV 0xDB8
107#define __LC_CURRENT 0xDD8 107#define __LC_CURRENT 0xDD8
108#define __LC_INT_CLOCK 0xDE8 108#define __LC_INT_CLOCK 0xDE8
109#define __LC_VDSO_PER_CPU 0xE38
109#endif /* __s390x__ */ 110#endif /* __s390x__ */
110 111
112#define __LC_PASTE 0xE40
111 113
112#define __LC_PANIC_MAGIC 0xE00 114#define __LC_PANIC_MAGIC 0xE00
113#ifndef __s390x__ 115#ifndef __s390x__
@@ -252,11 +254,11 @@ struct _lowcore
252 __u64 sync_enter_timer; /* 0x248 */ 254 __u64 sync_enter_timer; /* 0x248 */
253 __u64 async_enter_timer; /* 0x250 */ 255 __u64 async_enter_timer; /* 0x250 */
254 __u64 exit_timer; /* 0x258 */ 256 __u64 exit_timer; /* 0x258 */
255 __u64 last_update_timer; /* 0x260 */ 257 __u64 user_timer; /* 0x260 */
256 __u64 user_timer; /* 0x268 */ 258 __u64 system_timer; /* 0x268 */
257 __u64 system_timer; /* 0x270 */ 259 __u64 steal_timer; /* 0x270 */
258 __u64 last_update_clock; /* 0x278 */ 260 __u64 last_update_timer; /* 0x278 */
259 __u64 steal_clock; /* 0x280 */ 261 __u64 last_update_clock; /* 0x280 */
260 psw_t return_mcck_psw; /* 0x288 */ 262 psw_t return_mcck_psw; /* 0x288 */
261 __u8 pad8[0xc00-0x290]; /* 0x290 */ 263 __u8 pad8[0xc00-0x290]; /* 0x290 */
262 264
@@ -343,11 +345,11 @@ struct _lowcore
343 __u64 sync_enter_timer; /* 0x250 */ 345 __u64 sync_enter_timer; /* 0x250 */
344 __u64 async_enter_timer; /* 0x258 */ 346 __u64 async_enter_timer; /* 0x258 */
345 __u64 exit_timer; /* 0x260 */ 347 __u64 exit_timer; /* 0x260 */
346 __u64 last_update_timer; /* 0x268 */ 348 __u64 user_timer; /* 0x268 */
347 __u64 user_timer; /* 0x270 */ 349 __u64 system_timer; /* 0x270 */
348 __u64 system_timer; /* 0x278 */ 350 __u64 steal_timer; /* 0x278 */
349 __u64 last_update_clock; /* 0x280 */ 351 __u64 last_update_timer; /* 0x280 */
350 __u64 steal_clock; /* 0x288 */ 352 __u64 last_update_clock; /* 0x288 */
351 psw_t return_mcck_psw; /* 0x290 */ 353 psw_t return_mcck_psw; /* 0x290 */
352 __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */ 354 __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */
353 /* System info area */ 355 /* System info area */
@@ -381,7 +383,12 @@ struct _lowcore
381 /* whether the kernel died with panic() or not */ 383 /* whether the kernel died with panic() or not */
382 __u32 panic_magic; /* 0xe00 */ 384 __u32 panic_magic; /* 0xe00 */
383 385
384 __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ 386 /* Per cpu primary space access list */
387 __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */
388 __u32 vdso_per_cpu_data; /* 0xe3c */
389 __u32 paste[16]; /* 0xe40 */
390
391 __u8 pad13[0x11b8-0xe80]; /* 0xe80 */
385 392
386 /* 64 bit extparam used for pfault, diag 250 etc */ 393 /* 64 bit extparam used for pfault, diag 250 etc */
387 __u64 ext_params2; /* 0x11B8 */ 394 __u64 ext_params2; /* 0x11B8 */
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 024ef42ed6d7..3a8b26eb1f2e 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -99,7 +99,7 @@ static inline void restore_access_regs(unsigned int *acrs)
99 prev = __switch_to(prev,next); \ 99 prev = __switch_to(prev,next); \
100} while (0) 100} while (0)
101 101
102extern void account_vtime(struct task_struct *); 102extern void account_vtime(struct task_struct *, struct task_struct *);
103extern void account_tick_vtime(struct task_struct *); 103extern void account_tick_vtime(struct task_struct *);
104extern void account_system_vtime(struct task_struct *); 104extern void account_system_vtime(struct task_struct *);
105 105
@@ -121,7 +121,7 @@ static inline void cmma_init(void) { }
121 121
122#define finish_arch_switch(prev) do { \ 122#define finish_arch_switch(prev) do { \
123 set_fs(current->thread.mm_segment); \ 123 set_fs(current->thread.mm_segment); \
124 account_vtime(prev); \ 124 account_vtime(prev, current); \
125} while (0) 125} while (0)
126 126
127#define nop() asm volatile("nop") 127#define nop() asm volatile("nop")
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c1eaf9604da7..c544aa524535 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -47,6 +47,8 @@ struct thread_info {
47 unsigned int cpu; /* current CPU */ 47 unsigned int cpu; /* current CPU */
48 int preempt_count; /* 0 => preemptable, <0 => BUG */ 48 int preempt_count; /* 0 => preemptable, <0 => BUG */
49 struct restart_block restart_block; 49 struct restart_block restart_block;
50 __u64 user_timer;
51 __u64 system_timer;
50}; 52};
51 53
52/* 54/*
diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h
index 61705d60f995..e4bcab739c19 100644
--- a/arch/s390/include/asm/timer.h
+++ b/arch/s390/include/asm/timer.h
@@ -23,20 +23,18 @@ struct vtimer_list {
23 __u64 expires; 23 __u64 expires;
24 __u64 interval; 24 __u64 interval;
25 25
26 spinlock_t lock;
27 unsigned long magic;
28
29 void (*function)(unsigned long); 26 void (*function)(unsigned long);
30 unsigned long data; 27 unsigned long data;
31}; 28};
32 29
33/* the offset value will wrap after ca. 71 years */ 30/* the vtimer value will wrap after ca. 71 years */
34struct vtimer_queue { 31struct vtimer_queue {
35 struct list_head list; 32 struct list_head list;
36 spinlock_t lock; 33 spinlock_t lock;
37 __u64 to_expire; /* current event expire time */ 34 __u64 timer; /* last programmed timer */
38 __u64 offset; /* list offset to zero */ 35 __u64 elapsed; /* elapsed time of timer expire values */
39 __u64 idle; /* temp var for idle */ 36 __u64 idle; /* temp var for idle */
37 int do_spt; /* =1: reprogram cpu timer in idle */
40}; 38};
41 39
42extern void init_virt_timer(struct vtimer_list *timer); 40extern void init_virt_timer(struct vtimer_list *timer);
@@ -48,8 +46,8 @@ extern int del_virt_timer(struct vtimer_list *timer);
48extern void init_cpu_vtimer(void); 46extern void init_cpu_vtimer(void);
49extern void vtime_init(void); 47extern void vtime_init(void);
50 48
51extern void vtime_start_cpu_timer(void); 49extern void vtime_stop_cpu(void);
52extern void vtime_stop_cpu_timer(void); 50extern void vtime_start_leave(void);
53 51
54#endif /* __KERNEL__ */ 52#endif /* __KERNEL__ */
55 53
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index a44f4fe16a35..7bdd7c8ebc91 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -12,9 +12,9 @@
12#ifndef __ASSEMBLY__ 12#ifndef __ASSEMBLY__
13 13
14/* 14/*
15 * Note about this structure: 15 * Note about the vdso_data and vdso_per_cpu_data structures:
16 * 16 *
17 * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this 17 * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
18 * structure is supposed to be known only to the function in the vdso 18 * structure is supposed to be known only to the function in the vdso
19 * itself and may change without notice. 19 * itself and may change without notice.
20 */ 20 */
@@ -28,10 +28,21 @@ struct vdso_data {
28 __u64 wtom_clock_nsec; /* 0x28 */ 28 __u64 wtom_clock_nsec; /* 0x28 */
29 __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ 29 __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */
30 __u32 tz_dsttime; /* Type of dst correction 0x34 */ 30 __u32 tz_dsttime; /* Type of dst correction 0x34 */
31 __u32 ectg_available;
32};
33
34struct vdso_per_cpu_data {
35 __u64 ectg_timer_base;
36 __u64 ectg_user_time;
31}; 37};
32 38
33extern struct vdso_data *vdso_data; 39extern struct vdso_data *vdso_data;
34 40
41#ifdef CONFIG_64BIT
42int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore);
43void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore);
44#endif
45
35#endif /* __ASSEMBLY__ */ 46#endif /* __ASSEMBLY__ */
36 47
37#endif /* __KERNEL__ */ 48#endif /* __KERNEL__ */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e641f60bac99..67a60016babb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -48,6 +48,11 @@ int main(void)
48 DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); 48 DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
49 DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); 49 DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
50 DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); 50 DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
51 DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
52 DEFINE(__VDSO_ECTG_BASE,
53 offsetof(struct vdso_per_cpu_data, ectg_timer_base));
54 DEFINE(__VDSO_ECTG_USER,
55 offsetof(struct vdso_per_cpu_data, ectg_user_time));
51 /* constants used by the vdso */ 56 /* constants used by the vdso */
52 DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); 57 DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
53 DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); 58 DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 55de521aef77..1268aa2991bf 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -583,8 +583,8 @@ kernel_per:
583 583
584 .globl io_int_handler 584 .globl io_int_handler
585io_int_handler: 585io_int_handler:
586 stpt __LC_ASYNC_ENTER_TIMER
587 stck __LC_INT_CLOCK 586 stck __LC_INT_CLOCK
587 stpt __LC_ASYNC_ENTER_TIMER
588 SAVE_ALL_BASE __LC_SAVE_AREA+16 588 SAVE_ALL_BASE __LC_SAVE_AREA+16
589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
@@ -723,8 +723,8 @@ io_notify_resume:
723 723
724 .globl ext_int_handler 724 .globl ext_int_handler
725ext_int_handler: 725ext_int_handler:
726 stpt __LC_ASYNC_ENTER_TIMER
727 stck __LC_INT_CLOCK 726 stck __LC_INT_CLOCK
727 stpt __LC_ASYNC_ENTER_TIMER
728 SAVE_ALL_BASE __LC_SAVE_AREA+16 728 SAVE_ALL_BASE __LC_SAVE_AREA+16
729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
@@ -750,6 +750,7 @@ __critical_end:
750 750
751 .globl mcck_int_handler 751 .globl mcck_int_handler
752mcck_int_handler: 752mcck_int_handler:
753 stck __LC_INT_CLOCK
753 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer 754 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer
754 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs 755 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs
755 SAVE_ALL_BASE __LC_SAVE_AREA+32 756 SAVE_ALL_BASE __LC_SAVE_AREA+32
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 16bb4fd1a403..c6fbde13971a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
177 .if !\sync 177 .if !\sync
178 ni \psworg+1,0xfd # clear wait state bit 178 ni \psworg+1,0xfd # clear wait state bit
179 .endif 179 .endif
180 lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user 180 lg %r14,__LC_VDSO_PER_CPU
181 lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user
181 stpt __LC_EXIT_TIMER 182 stpt __LC_EXIT_TIMER
183 mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
184 lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user
182 lpswe \psworg # back to caller 185 lpswe \psworg # back to caller
183 .endm 186 .endm
184 187
@@ -559,8 +562,8 @@ kernel_per:
559 */ 562 */
560 .globl io_int_handler 563 .globl io_int_handler
561io_int_handler: 564io_int_handler:
562 stpt __LC_ASYNC_ENTER_TIMER
563 stck __LC_INT_CLOCK 565 stck __LC_INT_CLOCK
566 stpt __LC_ASYNC_ENTER_TIMER
564 SAVE_ALL_BASE __LC_SAVE_AREA+32 567 SAVE_ALL_BASE __LC_SAVE_AREA+32
565 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 568 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
566 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 569 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
@@ -721,8 +724,8 @@ io_notify_resume:
721 */ 724 */
722 .globl ext_int_handler 725 .globl ext_int_handler
723ext_int_handler: 726ext_int_handler:
724 stpt __LC_ASYNC_ENTER_TIMER
725 stck __LC_INT_CLOCK 727 stck __LC_INT_CLOCK
728 stpt __LC_ASYNC_ENTER_TIMER
726 SAVE_ALL_BASE __LC_SAVE_AREA+32 729 SAVE_ALL_BASE __LC_SAVE_AREA+32
727 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 730 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
728 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 731 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
@@ -746,6 +749,7 @@ __critical_end:
746 */ 749 */
747 .globl mcck_int_handler 750 .globl mcck_int_handler
748mcck_int_handler: 751mcck_int_handler:
752 stck __LC_INT_CLOCK
749 la %r1,4095 # revalidate r1 753 la %r1,4095 # revalidate r1
750 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer 754 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
751 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs 755 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
@@ -979,23 +983,23 @@ cleanup_sysc_return:
979 983
980cleanup_sysc_leave: 984cleanup_sysc_leave:
981 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) 985 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn)
982 je 2f 986 je 3f
983 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
984 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) 987 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
985 je 2f 988 jhe 0f
986 mvc __LC_RETURN_PSW(16),SP_PSW(%r15) 989 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
9900: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
987 cghi %r12,__LC_MCK_OLD_PSW 991 cghi %r12,__LC_MCK_OLD_PSW
988 jne 0f 992 jne 1f
989 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) 993 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
990 j 1f 994 j 2f
9910: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) 9951: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
9921: lmg %r0,%r11,SP_R0(%r15) 9962: lmg %r0,%r11,SP_R0(%r15)
993 lg %r15,SP_R15(%r15) 997 lg %r15,SP_R15(%r15)
9942: la %r12,__LC_RETURN_PSW 9983: la %r12,__LC_RETURN_PSW
995 br %r14 999 br %r14
996cleanup_sysc_leave_insn: 1000cleanup_sysc_leave_insn:
997 .quad sysc_done - 4 1001 .quad sysc_done - 4
998 .quad sysc_done - 8 1002 .quad sysc_done - 16
999 1003
1000cleanup_io_return: 1004cleanup_io_return:
1001 mvc __LC_RETURN_PSW(8),0(%r12) 1005 mvc __LC_RETURN_PSW(8),0(%r12)
@@ -1005,23 +1009,23 @@ cleanup_io_return:
1005 1009
1006cleanup_io_leave: 1010cleanup_io_leave:
1007 clc 8(8,%r12),BASED(cleanup_io_leave_insn) 1011 clc 8(8,%r12),BASED(cleanup_io_leave_insn)
1008 je 2f 1012 je 3f
1009 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
1010 clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) 1013 clc 8(8,%r12),BASED(cleanup_io_leave_insn+8)
1011 je 2f 1014 jhe 0f
1012 mvc __LC_RETURN_PSW(16),SP_PSW(%r15) 1015 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
10160: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
1013 cghi %r12,__LC_MCK_OLD_PSW 1017 cghi %r12,__LC_MCK_OLD_PSW
1014 jne 0f 1018 jne 1f
1015 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) 1019 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
1016 j 1f 1020 j 2f
10170: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) 10211: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
10181: lmg %r0,%r11,SP_R0(%r15) 10222: lmg %r0,%r11,SP_R0(%r15)
1019 lg %r15,SP_R15(%r15) 1023 lg %r15,SP_R15(%r15)
10202: la %r12,__LC_RETURN_PSW 10243: la %r12,__LC_RETURN_PSW
1021 br %r14 1025 br %r14
1022cleanup_io_leave_insn: 1026cleanup_io_leave_insn:
1023 .quad io_done - 4 1027 .quad io_done - 4
1024 .quad io_done - 8 1028 .quad io_done - 16
1025 1029
1026/* 1030/*
1027 * Integer constants 1031 * Integer constants
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 3ccd36b24b8f..f9f70aa15244 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -87,6 +87,8 @@ startup_continue:
87 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area 87 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
88 # move IPL device to lowcore 88 # move IPL device to lowcore
89 mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) 89 mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
90 lghi %r0,__LC_PASTE
91 stg %r0,__LC_VDSO_PER_CPU
90# 92#
91# Setup stack 93# Setup stack
92# 94#
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 04f8c67a6101..b6110bdf8dc2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -38,6 +38,7 @@
38#include <linux/utsname.h> 38#include <linux/utsname.h>
39#include <linux/tick.h> 39#include <linux/tick.h>
40#include <linux/elfcore.h> 40#include <linux/elfcore.h>
41#include <linux/kernel_stat.h>
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
43#include <asm/system.h> 44#include <asm/system.h>
@@ -45,7 +46,6 @@
45#include <asm/processor.h> 46#include <asm/processor.h>
46#include <asm/irq.h> 47#include <asm/irq.h>
47#include <asm/timer.h> 48#include <asm/timer.h>
48#include <asm/cpu.h>
49#include "entry.h" 49#include "entry.h"
50 50
51asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); 51asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -75,36 +75,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
75 return sf->gprs[8]; 75 return sf->gprs[8];
76} 76}
77 77
78DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
79 .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
80};
81
82static int s390_idle_enter(void)
83{
84 struct s390_idle_data *idle;
85
86 idle = &__get_cpu_var(s390_idle);
87 spin_lock(&idle->lock);
88 idle->idle_count++;
89 idle->in_idle = 1;
90 idle->idle_enter = get_clock();
91 spin_unlock(&idle->lock);
92 vtime_stop_cpu_timer();
93 return NOTIFY_OK;
94}
95
96void s390_idle_leave(void)
97{
98 struct s390_idle_data *idle;
99
100 vtime_start_cpu_timer();
101 idle = &__get_cpu_var(s390_idle);
102 spin_lock(&idle->lock);
103 idle->idle_time += get_clock() - idle->idle_enter;
104 idle->in_idle = 0;
105 spin_unlock(&idle->lock);
106}
107
108extern void s390_handle_mcck(void); 78extern void s390_handle_mcck(void);
109/* 79/*
110 * The idle loop on a S390... 80 * The idle loop on a S390...
@@ -117,10 +87,6 @@ static void default_idle(void)
117 local_irq_enable(); 87 local_irq_enable();
118 return; 88 return;
119 } 89 }
120 if (s390_idle_enter() == NOTIFY_BAD) {
121 local_irq_enable();
122 return;
123 }
124#ifdef CONFIG_HOTPLUG_CPU 90#ifdef CONFIG_HOTPLUG_CPU
125 if (cpu_is_offline(smp_processor_id())) { 91 if (cpu_is_offline(smp_processor_id())) {
126 preempt_enable_no_resched(); 92 preempt_enable_no_resched();
@@ -130,7 +96,6 @@ static void default_idle(void)
130 local_mcck_disable(); 96 local_mcck_disable();
131 if (test_thread_flag(TIF_MCCK_PENDING)) { 97 if (test_thread_flag(TIF_MCCK_PENDING)) {
132 local_mcck_enable(); 98 local_mcck_enable();
133 s390_idle_leave();
134 local_irq_enable(); 99 local_irq_enable();
135 s390_handle_mcck(); 100 s390_handle_mcck();
136 return; 101 return;
@@ -138,9 +103,9 @@ static void default_idle(void)
138 trace_hardirqs_on(); 103 trace_hardirqs_on();
139 /* Don't trace preempt off for idle. */ 104 /* Don't trace preempt off for idle. */
140 stop_critical_timings(); 105 stop_critical_timings();
141 /* Wait for external, I/O or machine check interrupt. */ 106 /* Stop virtual timer and halt the cpu. */
142 __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | 107 vtime_stop_cpu();
143 PSW_MASK_IO | PSW_MASK_EXT); 108 /* Reenable preemption tracer. */
144 start_critical_timings(); 109 start_critical_timings();
145} 110}
146 111
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index e019b419efc6..a0d2d55d7fb3 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code)
119 struct pt_regs *old_regs; 119 struct pt_regs *old_regs;
120 120
121 old_regs = set_irq_regs(regs); 121 old_regs = set_irq_regs(regs);
122 irq_enter();
123 s390_idle_check(); 122 s390_idle_check();
123 irq_enter();
124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) 124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
125 /* Serve timer interrupts first. */ 125 /* Serve timer interrupts first. */
126 clock_comparator_work(); 126 clock_comparator_work();
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b7a1efd5522c..d825f4950e4e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -427,6 +427,8 @@ setup_lowcore(void)
427 /* enable extended save area */ 427 /* enable extended save area */
428 __ctl_set_bit(14, 29); 428 __ctl_set_bit(14, 29);
429 } 429 }
430#else
431 lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
430#endif 432#endif
431 set_prefix((u32)(unsigned long) lc); 433 set_prefix((u32)(unsigned long) lc);
432} 434}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6fc78541dc57..9c0ccb532a45 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -47,6 +47,7 @@
47#include <asm/lowcore.h> 47#include <asm/lowcore.h>
48#include <asm/sclp.h> 48#include <asm/sclp.h>
49#include <asm/cpu.h> 49#include <asm/cpu.h>
50#include <asm/vdso.h>
50#include "entry.h" 51#include "entry.h"
51 52
52/* 53/*
@@ -55,12 +56,6 @@
55struct _lowcore *lowcore_ptr[NR_CPUS]; 56struct _lowcore *lowcore_ptr[NR_CPUS];
56EXPORT_SYMBOL(lowcore_ptr); 57EXPORT_SYMBOL(lowcore_ptr);
57 58
58cpumask_t cpu_online_map = CPU_MASK_NONE;
59EXPORT_SYMBOL(cpu_online_map);
60
61cpumask_t cpu_possible_map = CPU_MASK_ALL;
62EXPORT_SYMBOL(cpu_possible_map);
63
64static struct task_struct *current_set[NR_CPUS]; 59static struct task_struct *current_set[NR_CPUS];
65 60
66static u8 smp_cpu_type; 61static u8 smp_cpu_type;
@@ -506,6 +501,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
506 goto out; 501 goto out;
507 lowcore->extended_save_area_addr = (u32) save_area; 502 lowcore->extended_save_area_addr = (u32) save_area;
508 } 503 }
504#else
505 if (vdso_alloc_per_cpu(cpu, lowcore))
506 goto out;
509#endif 507#endif
510 lowcore_ptr[cpu] = lowcore; 508 lowcore_ptr[cpu] = lowcore;
511 return 0; 509 return 0;
@@ -528,6 +526,8 @@ static void smp_free_lowcore(int cpu)
528#ifndef CONFIG_64BIT 526#ifndef CONFIG_64BIT
529 if (MACHINE_HAS_IEEE) 527 if (MACHINE_HAS_IEEE)
530 free_page((unsigned long) lowcore->extended_save_area_addr); 528 free_page((unsigned long) lowcore->extended_save_area_addr);
529#else
530 vdso_free_per_cpu(cpu, lowcore);
531#endif 531#endif
532 free_page(lowcore->panic_stack - PAGE_SIZE); 532 free_page(lowcore->panic_stack - PAGE_SIZE);
533 free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); 533 free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
@@ -670,6 +670,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
670 lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); 670 lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
671 panic_stack = __get_free_page(GFP_KERNEL); 671 panic_stack = __get_free_page(GFP_KERNEL);
672 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); 672 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
673 BUG_ON(!lowcore || !panic_stack || !async_stack);
673#ifndef CONFIG_64BIT 674#ifndef CONFIG_64BIT
674 if (MACHINE_HAS_IEEE) 675 if (MACHINE_HAS_IEEE)
675 save_area = get_zeroed_page(GFP_KERNEL); 676 save_area = get_zeroed_page(GFP_KERNEL);
@@ -683,6 +684,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
683#ifndef CONFIG_64BIT 684#ifndef CONFIG_64BIT
684 if (MACHINE_HAS_IEEE) 685 if (MACHINE_HAS_IEEE)
685 lowcore->extended_save_area_addr = (u32) save_area; 686 lowcore->extended_save_area_addr = (u32) save_area;
687#else
688 BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore));
686#endif 689#endif
687 set_prefix((u32)(unsigned long) lowcore); 690 set_prefix((u32)(unsigned long) lowcore);
688 local_mcck_enable(); 691 local_mcck_enable();
@@ -851,9 +854,11 @@ static ssize_t show_idle_count(struct sys_device *dev,
851 unsigned long long idle_count; 854 unsigned long long idle_count;
852 855
853 idle = &per_cpu(s390_idle, dev->id); 856 idle = &per_cpu(s390_idle, dev->id);
854 spin_lock_irq(&idle->lock); 857 spin_lock(&idle->lock);
855 idle_count = idle->idle_count; 858 idle_count = idle->idle_count;
856 spin_unlock_irq(&idle->lock); 859 if (idle->idle_enter)
860 idle_count++;
861 spin_unlock(&idle->lock);
857 return sprintf(buf, "%llu\n", idle_count); 862 return sprintf(buf, "%llu\n", idle_count);
858} 863}
859static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); 864static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -862,18 +867,17 @@ static ssize_t show_idle_time(struct sys_device *dev,
862 struct sysdev_attribute *attr, char *buf) 867 struct sysdev_attribute *attr, char *buf)
863{ 868{
864 struct s390_idle_data *idle; 869 struct s390_idle_data *idle;
865 unsigned long long new_time; 870 unsigned long long now, idle_time, idle_enter;
866 871
867 idle = &per_cpu(s390_idle, dev->id); 872 idle = &per_cpu(s390_idle, dev->id);
868 spin_lock_irq(&idle->lock); 873 spin_lock(&idle->lock);
869 if (idle->in_idle) { 874 now = get_clock();
870 new_time = get_clock(); 875 idle_time = idle->idle_time;
871 idle->idle_time += new_time - idle->idle_enter; 876 idle_enter = idle->idle_enter;
872 idle->idle_enter = new_time; 877 if (idle_enter != 0ULL && idle_enter < now)
873 } 878 idle_time += now - idle_enter;
874 new_time = idle->idle_time; 879 spin_unlock(&idle->lock);
875 spin_unlock_irq(&idle->lock); 880 return sprintf(buf, "%llu\n", idle_time >> 12);
876 return sprintf(buf, "%llu\n", new_time >> 12);
877} 881}
878static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); 882static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
879 883
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 5be981a36c3e..d649600df5b9 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -160,7 +160,7 @@ void init_cpu_timer(void)
160 cd->min_delta_ns = 1; 160 cd->min_delta_ns = 1;
161 cd->max_delta_ns = LONG_MAX; 161 cd->max_delta_ns = LONG_MAX;
162 cd->rating = 400; 162 cd->rating = 400;
163 cd->cpumask = cpumask_of_cpu(cpu); 163 cd->cpumask = cpumask_of(cpu);
164 cd->set_next_event = s390_next_event; 164 cd->set_next_event = s390_next_event;
165 cd->set_mode = s390_set_mode; 165 cd->set_mode = s390_set_mode;
166 166
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 10a6ccef4412..25a6a82f1c02 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -31,9 +31,6 @@
31#include <asm/sections.h> 31#include <asm/sections.h>
32#include <asm/vdso.h> 32#include <asm/vdso.h>
33 33
34/* Max supported size for symbol names */
35#define MAX_SYMNAME 64
36
37#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) 34#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
38extern char vdso32_start, vdso32_end; 35extern char vdso32_start, vdso32_end;
39static void *vdso32_kbase = &vdso32_start; 36static void *vdso32_kbase = &vdso32_start;
@@ -71,6 +68,119 @@ static union {
71struct vdso_data *vdso_data = &vdso_data_store.data; 68struct vdso_data *vdso_data = &vdso_data_store.data;
72 69
73/* 70/*
71 * Setup vdso data page.
72 */
73static void vdso_init_data(struct vdso_data *vd)
74{
75 unsigned int facility_list;
76
77 facility_list = stfl();
78 vd->ectg_available = switch_amode && (facility_list & 1);
79}
80
81#ifdef CONFIG_64BIT
82/*
83 * Setup per cpu vdso data page.
84 */
85static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
86{
87}
88
89/*
90 * Allocate/free per cpu vdso data.
91 */
92#ifdef CONFIG_64BIT
93#define SEGMENT_ORDER 2
94#else
95#define SEGMENT_ORDER 1
96#endif
97
98int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
99{
100 unsigned long segment_table, page_table, page_frame;
101 u32 *psal, *aste;
102 int i;
103
104 lowcore->vdso_per_cpu_data = __LC_PASTE;
105
106 if (!switch_amode || !vdso_enabled)
107 return 0;
108
109 segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
110 page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
111 page_frame = get_zeroed_page(GFP_KERNEL);
112 if (!segment_table || !page_table || !page_frame)
113 goto out;
114
115 clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
116 PAGE_SIZE << SEGMENT_ORDER);
117 clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
118 256*sizeof(unsigned long));
119
120 *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
121 *(unsigned long *) page_table = _PAGE_RO + page_frame;
122
123 psal = (u32 *) (page_table + 256*sizeof(unsigned long));
124 aste = psal + 32;
125
126 for (i = 4; i < 32; i += 4)
127 psal[i] = 0x80000000;
128
129 lowcore->paste[4] = (u32)(addr_t) psal;
130 psal[0] = 0x20000000;
131 psal[2] = (u32)(addr_t) aste;
132 *(unsigned long *) (aste + 2) = segment_table +
133 _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
134 aste[4] = (u32)(addr_t) psal;
135 lowcore->vdso_per_cpu_data = page_frame;
136
137 vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);
138 return 0;
139
140out:
141 free_page(page_frame);
142 free_page(page_table);
143 free_pages(segment_table, SEGMENT_ORDER);
144 return -ENOMEM;
145}
146
147#ifdef CONFIG_HOTPLUG_CPU
148void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
149{
150 unsigned long segment_table, page_table, page_frame;
151 u32 *psal, *aste;
152
153 if (!switch_amode || !vdso_enabled)
154 return;
155
156 psal = (u32 *)(addr_t) lowcore->paste[4];
157 aste = (u32 *)(addr_t) psal[2];
158 segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
159 page_table = *(unsigned long *) segment_table;
160 page_frame = *(unsigned long *) page_table;
161
162 free_page(page_frame);
163 free_page(page_table);
164 free_pages(segment_table, SEGMENT_ORDER);
165}
166#endif /* CONFIG_HOTPLUG_CPU */
167
168static void __vdso_init_cr5(void *dummy)
169{
170 unsigned long cr5;
171
172 cr5 = offsetof(struct _lowcore, paste);
173 __ctl_load(cr5, 5, 5);
174}
175
176static void vdso_init_cr5(void)
177{
178 if (switch_amode && vdso_enabled)
179 on_each_cpu(__vdso_init_cr5, NULL, 1);
180}
181#endif /* CONFIG_64BIT */
182
183/*
74 * This is called from binfmt_elf, we create the special vma for the 184 * This is called from binfmt_elf, we create the special vma for the
75 * vDSO and insert it into the mm struct tree 185 * vDSO and insert it into the mm struct tree
76 */ 186 */
@@ -172,6 +282,9 @@ static int __init vdso_init(void)
172{ 282{
173 int i; 283 int i;
174 284
285 if (!vdso_enabled)
286 return 0;
287 vdso_init_data(vdso_data);
175#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) 288#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
176 /* Calculate the size of the 32 bit vDSO */ 289 /* Calculate the size of the 32 bit vDSO */
177 vdso32_pages = ((&vdso32_end - &vdso32_start 290 vdso32_pages = ((&vdso32_end - &vdso32_start
@@ -208,6 +321,10 @@ static int __init vdso_init(void)
208 } 321 }
209 vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); 322 vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
210 vdso64_pagelist[vdso64_pages] = NULL; 323 vdso64_pagelist[vdso64_pages] = NULL;
324#ifndef CONFIG_SMP
325 BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore));
326#endif
327 vdso_init_cr5();
211#endif /* CONFIG_64BIT */ 328#endif /* CONFIG_64BIT */
212 329
213 get_page(virt_to_page(vdso_data)); 330 get_page(virt_to_page(vdso_data));
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 488e31a3c0e7..9ce8caafdb4e 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -22,7 +22,12 @@ __kernel_clock_getres:
22 cghi %r2,CLOCK_REALTIME 22 cghi %r2,CLOCK_REALTIME
23 je 0f 23 je 0f
24 cghi %r2,CLOCK_MONOTONIC 24 cghi %r2,CLOCK_MONOTONIC
25 je 0f
26 cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
25 jne 2f 27 jne 2f
28 larl %r5,_vdso_data
29 icm %r0,15,__LC_ECTG_OK(%r5)
30 jz 2f
260: ltgr %r3,%r3 310: ltgr %r3,%r3
27 jz 1f /* res == NULL */ 32 jz 1f /* res == NULL */
28 larl %r1,3f 33 larl %r1,3f
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 738a410b7eb2..79dbfee831ec 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -22,8 +22,10 @@ __kernel_clock_gettime:
22 larl %r5,_vdso_data 22 larl %r5,_vdso_data
23 cghi %r2,CLOCK_REALTIME 23 cghi %r2,CLOCK_REALTIME
24 je 4f 24 je 4f
25 cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
26 je 9f
25 cghi %r2,CLOCK_MONOTONIC 27 cghi %r2,CLOCK_MONOTONIC
26 jne 9f 28 jne 12f
27 29
28 /* CLOCK_MONOTONIC */ 30 /* CLOCK_MONOTONIC */
29 ltgr %r3,%r3 31 ltgr %r3,%r3
@@ -42,7 +44,7 @@ __kernel_clock_gettime:
42 alg %r0,__VDSO_WTOM_SEC(%r5) 44 alg %r0,__VDSO_WTOM_SEC(%r5)
43 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ 45 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
44 jne 0b 46 jne 0b
45 larl %r5,10f 47 larl %r5,13f
461: clg %r1,0(%r5) 481: clg %r1,0(%r5)
47 jl 2f 49 jl 2f
48 slg %r1,0(%r5) 50 slg %r1,0(%r5)
@@ -68,7 +70,7 @@ __kernel_clock_gettime:
68 lg %r0,__VDSO_XTIME_SEC(%r5) 70 lg %r0,__VDSO_XTIME_SEC(%r5)
69 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ 71 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
70 jne 5b 72 jne 5b
71 larl %r5,10f 73 larl %r5,13f
726: clg %r1,0(%r5) 746: clg %r1,0(%r5)
73 jl 7f 75 jl 7f
74 slg %r1,0(%r5) 76 slg %r1,0(%r5)
@@ -79,11 +81,38 @@ __kernel_clock_gettime:
798: lghi %r2,0 818: lghi %r2,0
80 br %r14 82 br %r14
81 83
84 /* CLOCK_THREAD_CPUTIME_ID for this thread */
859: icm %r0,15,__VDSO_ECTG_OK(%r5)
86 jz 12f
87 ear %r2,%a4
88 llilh %r4,0x0100
89 sar %a4,%r4
90 lghi %r4,0
91 sacf 512 /* Magic ectg instruction */
92 .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
93 sacf 0
94 sar %a4,%r2
95 algr %r1,%r0 /* r1 = cputime as TOD value */
96 mghi %r1,1000 /* convert to nanoseconds */
97 srlg %r1,%r1,12 /* r1 = cputime in nanosec */
98 lgr %r4,%r1
99 larl %r5,13f
100 srlg %r1,%r1,9 /* divide by 1000000000 */
101 mlg %r0,8(%r5)
102 srlg %r0,%r0,11 /* r0 = tv_sec */
103 stg %r0,0(%r3)
104 msg %r0,0(%r5) /* calculate tv_nsec */
105 slgr %r4,%r0 /* r4 = tv_nsec */
106 stg %r4,8(%r3)
107 lghi %r2,0
108 br %r14
109
82 /* Fallback to system call */ 110 /* Fallback to system call */
839: lghi %r1,__NR_clock_gettime 11112: lghi %r1,__NR_clock_gettime
84 svc 0 112 svc 0
85 br %r14 113 br %r14
86 114
8710: .quad 1000000000 11513: .quad 1000000000
11614: .quad 19342813113834067
88 .cfi_endproc 117 .cfi_endproc
89 .size __kernel_clock_gettime,.-__kernel_clock_gettime 118 .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 75a6e62ea973..2fb36e462194 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -23,19 +23,43 @@
23#include <asm/s390_ext.h> 23#include <asm/s390_ext.h>
24#include <asm/timer.h> 24#include <asm/timer.h>
25#include <asm/irq_regs.h> 25#include <asm/irq_regs.h>
26#include <asm/cpu.h>
26 27
27static ext_int_info_t ext_int_info_timer; 28static ext_int_info_t ext_int_info_timer;
29
28static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); 30static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
29 31
32DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
33 .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
34};
35
36static inline __u64 get_vtimer(void)
37{
38 __u64 timer;
39
40 asm volatile("STPT %0" : "=m" (timer));
41 return timer;
42}
43
44static inline void set_vtimer(__u64 expires)
45{
46 __u64 timer;
47
48 asm volatile (" STPT %0\n" /* Store current cpu timer value */
49 " SPT %1" /* Set new value immediatly afterwards */
50 : "=m" (timer) : "m" (expires) );
51 S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
52 S390_lowcore.last_update_timer = expires;
53}
54
30/* 55/*
31 * Update process times based on virtual cpu times stored by entry.S 56 * Update process times based on virtual cpu times stored by entry.S
32 * to the lowcore fields user_timer, system_timer & steal_clock. 57 * to the lowcore fields user_timer, system_timer & steal_clock.
33 */ 58 */
34void account_process_tick(struct task_struct *tsk, int user_tick) 59static void do_account_vtime(struct task_struct *tsk, int hardirq_offset)
35{ 60{
36 cputime_t cputime; 61 struct thread_info *ti = task_thread_info(tsk);
37 __u64 timer, clock; 62 __u64 timer, clock, user, system, steal;
38 int rcu_user_flag;
39 63
40 timer = S390_lowcore.last_update_timer; 64 timer = S390_lowcore.last_update_timer;
41 clock = S390_lowcore.last_update_clock; 65 clock = S390_lowcore.last_update_clock;
@@ -44,50 +68,41 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
44 : "=m" (S390_lowcore.last_update_timer), 68 : "=m" (S390_lowcore.last_update_timer),
45 "=m" (S390_lowcore.last_update_clock) ); 69 "=m" (S390_lowcore.last_update_clock) );
46 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 70 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
47 S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; 71 S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
48 72
49 cputime = S390_lowcore.user_timer >> 12; 73 user = S390_lowcore.user_timer - ti->user_timer;
50 rcu_user_flag = cputime != 0; 74 S390_lowcore.steal_timer -= user;
51 S390_lowcore.user_timer -= cputime << 12; 75 ti->user_timer = S390_lowcore.user_timer;
52 S390_lowcore.steal_clock -= cputime << 12; 76 account_user_time(tsk, user, user);
53 account_user_time(tsk, cputime); 77
54 78 system = S390_lowcore.system_timer - ti->system_timer;
55 cputime = S390_lowcore.system_timer >> 12; 79 S390_lowcore.steal_timer -= system;
56 S390_lowcore.system_timer -= cputime << 12; 80 ti->system_timer = S390_lowcore.system_timer;
57 S390_lowcore.steal_clock -= cputime << 12; 81 account_system_time(tsk, hardirq_offset, system, system);
58 account_system_time(tsk, HARDIRQ_OFFSET, cputime); 82
59 83 steal = S390_lowcore.steal_timer;
60 cputime = S390_lowcore.steal_clock; 84 if ((s64) steal > 0) {
61 if ((__s64) cputime > 0) { 85 S390_lowcore.steal_timer = 0;
62 cputime >>= 12; 86 account_steal_time(steal);
63 S390_lowcore.steal_clock -= cputime << 12;
64 account_steal_time(tsk, cputime);
65 } 87 }
66} 88}
67 89
68/* 90void account_vtime(struct task_struct *prev, struct task_struct *next)
69 * Update process times based on virtual cpu times stored by entry.S
70 * to the lowcore fields user_timer, system_timer & steal_clock.
71 */
72void account_vtime(struct task_struct *tsk)
73{ 91{
74 cputime_t cputime; 92 struct thread_info *ti;
75 __u64 timer; 93
76 94 do_account_vtime(prev, 0);
77 timer = S390_lowcore.last_update_timer; 95 ti = task_thread_info(prev);
78 asm volatile (" STPT %0" /* Store current cpu timer value */ 96 ti->user_timer = S390_lowcore.user_timer;
79 : "=m" (S390_lowcore.last_update_timer) ); 97 ti->system_timer = S390_lowcore.system_timer;
80 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 98 ti = task_thread_info(next);
81 99 S390_lowcore.user_timer = ti->user_timer;
82 cputime = S390_lowcore.user_timer >> 12; 100 S390_lowcore.system_timer = ti->system_timer;
83 S390_lowcore.user_timer -= cputime << 12; 101}
84 S390_lowcore.steal_clock -= cputime << 12;
85 account_user_time(tsk, cputime);
86 102
87 cputime = S390_lowcore.system_timer >> 12; 103void account_process_tick(struct task_struct *tsk, int user_tick)
88 S390_lowcore.system_timer -= cputime << 12; 104{
89 S390_lowcore.steal_clock -= cputime << 12; 105 do_account_vtime(tsk, HARDIRQ_OFFSET);
90 account_system_time(tsk, 0, cputime);
91} 106}
92 107
93/* 108/*
@@ -96,80 +111,131 @@ void account_vtime(struct task_struct *tsk)
96 */ 111 */
97void account_system_vtime(struct task_struct *tsk) 112void account_system_vtime(struct task_struct *tsk)
98{ 113{
99 cputime_t cputime; 114 struct thread_info *ti = task_thread_info(tsk);
100 __u64 timer; 115 __u64 timer, system;
101 116
102 timer = S390_lowcore.last_update_timer; 117 timer = S390_lowcore.last_update_timer;
103 asm volatile (" STPT %0" /* Store current cpu timer value */ 118 S390_lowcore.last_update_timer = get_vtimer();
104 : "=m" (S390_lowcore.last_update_timer) );
105 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 119 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
106 120
107 cputime = S390_lowcore.system_timer >> 12; 121 system = S390_lowcore.system_timer - ti->system_timer;
108 S390_lowcore.system_timer -= cputime << 12; 122 S390_lowcore.steal_timer -= system;
109 S390_lowcore.steal_clock -= cputime << 12; 123 ti->system_timer = S390_lowcore.system_timer;
110 account_system_time(tsk, 0, cputime); 124 account_system_time(tsk, 0, system, system);
111} 125}
112EXPORT_SYMBOL_GPL(account_system_vtime); 126EXPORT_SYMBOL_GPL(account_system_vtime);
113 127
114static inline void set_vtimer(__u64 expires) 128void vtime_start_cpu(void)
115{
116 __u64 timer;
117
118 asm volatile (" STPT %0\n" /* Store current cpu timer value */
119 " SPT %1" /* Set new value immediatly afterwards */
120 : "=m" (timer) : "m" (expires) );
121 S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
122 S390_lowcore.last_update_timer = expires;
123
124 /* store expire time for this CPU timer */
125 __get_cpu_var(virt_cpu_timer).to_expire = expires;
126}
127
128void vtime_start_cpu_timer(void)
129{ 129{
130 struct vtimer_queue *vt_list; 130 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
131 131 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
132 vt_list = &__get_cpu_var(virt_cpu_timer); 132 __u64 idle_time, expires;
133 133
134 /* CPU timer interrupt is pending, don't reprogramm it */ 134 /* Account time spent with enabled wait psw loaded as idle time. */
135 if (vt_list->idle & 1LL<<63) 135 idle_time = S390_lowcore.int_clock - idle->idle_enter;
136 return; 136 account_idle_time(idle_time);
137 S390_lowcore.last_update_clock = S390_lowcore.int_clock;
138
139 /* Account system time spent going idle. */
140 S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
141 S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer;
142
143 /* Restart vtime CPU timer */
144 if (vq->do_spt) {
145 /* Program old expire value but first save progress. */
146 expires = vq->idle - S390_lowcore.async_enter_timer;
147 expires += get_vtimer();
148 set_vtimer(expires);
149 } else {
150 /* Don't account the CPU timer delta while the cpu was idle. */
151 vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer;
152 }
137 153
138 if (!list_empty(&vt_list->list)) 154 spin_lock(&idle->lock);
139 set_vtimer(vt_list->idle); 155 idle->idle_time += idle_time;
156 idle->idle_enter = 0ULL;
157 idle->idle_count++;
158 spin_unlock(&idle->lock);
140} 159}
141 160
142void vtime_stop_cpu_timer(void) 161void vtime_stop_cpu(void)
143{ 162{
144 struct vtimer_queue *vt_list; 163 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
145 164 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
146 vt_list = &__get_cpu_var(virt_cpu_timer); 165 psw_t psw;
147 166
148 /* nothing to do */ 167 /* Wait for external, I/O or machine check interrupt. */
149 if (list_empty(&vt_list->list)) { 168 psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
150 vt_list->idle = VTIMER_MAX_SLICE; 169
151 goto fire; 170 /* Check if the CPU timer needs to be reprogrammed. */
171 if (vq->do_spt) {
172 __u64 vmax = VTIMER_MAX_SLICE;
173 /*
174 * The inline assembly is equivalent to
175 * vq->idle = get_cpu_timer();
176 * set_cpu_timer(VTIMER_MAX_SLICE);
177 * idle->idle_enter = get_clock();
178 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
179 * PSW_MASK_IO | PSW_MASK_EXT);
180 * The difference is that the inline assembly makes sure that
181 * the last three instruction are stpt, stck and lpsw in that
182 * order. This is done to increase the precision.
183 */
184 asm volatile(
185#ifndef CONFIG_64BIT
186 " basr 1,0\n"
187 "0: ahi 1,1f-0b\n"
188 " st 1,4(%2)\n"
189#else /* CONFIG_64BIT */
190 " larl 1,1f\n"
191 " stg 1,8(%2)\n"
192#endif /* CONFIG_64BIT */
193 " stpt 0(%4)\n"
194 " spt 0(%5)\n"
195 " stck 0(%3)\n"
196#ifndef CONFIG_64BIT
197 " lpsw 0(%2)\n"
198#else /* CONFIG_64BIT */
199 " lpswe 0(%2)\n"
200#endif /* CONFIG_64BIT */
201 "1:"
202 : "=m" (idle->idle_enter), "=m" (vq->idle)
203 : "a" (&psw), "a" (&idle->idle_enter),
204 "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
205 : "memory", "cc", "1");
206 } else {
207 /*
208 * The inline assembly is equivalent to
209 * vq->idle = get_cpu_timer();
210 * idle->idle_enter = get_clock();
211 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
212 * PSW_MASK_IO | PSW_MASK_EXT);
213 * The difference is that the inline assembly makes sure that
214 * the last three instruction are stpt, stck and lpsw in that
215 * order. This is done to increase the precision.
216 */
217 asm volatile(
218#ifndef CONFIG_64BIT
219 " basr 1,0\n"
220 "0: ahi 1,1f-0b\n"
221 " st 1,4(%2)\n"
222#else /* CONFIG_64BIT */
223 " larl 1,1f\n"
224 " stg 1,8(%2)\n"
225#endif /* CONFIG_64BIT */
226 " stpt 0(%4)\n"
227 " stck 0(%3)\n"
228#ifndef CONFIG_64BIT
229 " lpsw 0(%2)\n"
230#else /* CONFIG_64BIT */
231 " lpswe 0(%2)\n"
232#endif /* CONFIG_64BIT */
233 "1:"
234 : "=m" (idle->idle_enter), "=m" (vq->idle)
235 : "a" (&psw), "a" (&idle->idle_enter),
236 "a" (&vq->idle), "m" (psw)
237 : "memory", "cc", "1");
152 } 238 }
153
154 /* store the actual expire value */
155 asm volatile ("STPT %0" : "=m" (vt_list->idle));
156
157 /*
158 * If the CPU timer is negative we don't reprogramm
159 * it because we will get instantly an interrupt.
160 */
161 if (vt_list->idle & 1LL<<63)
162 return;
163
164 vt_list->offset += vt_list->to_expire - vt_list->idle;
165
166 /*
167 * We cannot halt the CPU timer, we just write a value that
168 * nearly never expires (only after 71 years) and re-write
169 * the stored expire value if we continue the timer
170 */
171 fire:
172 set_vtimer(VTIMER_MAX_SLICE);
173} 239}
174 240
175/* 241/*
@@ -195,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
195 */ 261 */
196static void do_callbacks(struct list_head *cb_list) 262static void do_callbacks(struct list_head *cb_list)
197{ 263{
198 struct vtimer_queue *vt_list; 264 struct vtimer_queue *vq;
199 struct vtimer_list *event, *tmp; 265 struct vtimer_list *event, *tmp;
200 void (*fn)(unsigned long);
201 unsigned long data;
202 266
203 if (list_empty(cb_list)) 267 if (list_empty(cb_list))
204 return; 268 return;
205 269
206 vt_list = &__get_cpu_var(virt_cpu_timer); 270 vq = &__get_cpu_var(virt_cpu_timer);
207 271
208 list_for_each_entry_safe(event, tmp, cb_list, entry) { 272 list_for_each_entry_safe(event, tmp, cb_list, entry) {
209 fn = event->function; 273 list_del_init(&event->entry);
210 data = event->data; 274 (event->function)(event->data);
211 fn(data); 275 if (event->interval) {
212 276 /* Recharge interval timer */
213 if (!event->interval) 277 event->expires = event->interval + vq->elapsed;
214 /* delete one shot timer */ 278 spin_lock(&vq->lock);
215 list_del_init(&event->entry); 279 list_add_sorted(event, &vq->list);
216 else { 280 spin_unlock(&vq->lock);
217 /* move interval timer back to list */
218 spin_lock(&vt_list->lock);
219 list_del_init(&event->entry);
220 list_add_sorted(event, &vt_list->list);
221 spin_unlock(&vt_list->lock);
222 } 281 }
223 } 282 }
224} 283}
@@ -228,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list)
228 */ 287 */
229static void do_cpu_timer_interrupt(__u16 error_code) 288static void do_cpu_timer_interrupt(__u16 error_code)
230{ 289{
231 __u64 next, delta; 290 struct vtimer_queue *vq;
232 struct vtimer_queue *vt_list;
233 struct vtimer_list *event, *tmp; 291 struct vtimer_list *event, *tmp;
234 struct list_head *ptr; 292 struct list_head cb_list; /* the callback queue */
235 /* the callback queue */ 293 __u64 elapsed, next;
236 struct list_head cb_list;
237 294
238 INIT_LIST_HEAD(&cb_list); 295 INIT_LIST_HEAD(&cb_list);
239 vt_list = &__get_cpu_var(virt_cpu_timer); 296 vq = &__get_cpu_var(virt_cpu_timer);
240 297
241 /* walk timer list, fire all expired events */ 298 /* walk timer list, fire all expired events */
242 spin_lock(&vt_list->lock); 299 spin_lock(&vq->lock);
243 300
244 if (vt_list->to_expire < VTIMER_MAX_SLICE) 301 elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer);
245 vt_list->offset += vt_list->to_expire; 302 BUG_ON((s64) elapsed < 0);
246 303 vq->elapsed = 0;
247 list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { 304 list_for_each_entry_safe(event, tmp, &vq->list, entry) {
248 if (event->expires > vt_list->offset) 305 if (event->expires < elapsed)
249 /* found first unexpired event, leave */ 306 /* move expired timer to the callback queue */
250 break; 307 list_move_tail(&event->entry, &cb_list);
251 308 else
252 /* re-charge interval timer, we have to add the offset */ 309 event->expires -= elapsed;
253 if (event->interval)
254 event->expires = event->interval + vt_list->offset;
255
256 /* move expired timer to the callback queue */
257 list_move_tail(&event->entry, &cb_list);
258 } 310 }
259 spin_unlock(&vt_list->lock); 311 spin_unlock(&vq->lock);
312
313 vq->do_spt = list_empty(&cb_list);
260 do_callbacks(&cb_list); 314 do_callbacks(&cb_list);
261 315
262 /* next event is first in list */ 316 /* next event is first in list */
263 spin_lock(&vt_list->lock); 317 next = VTIMER_MAX_SLICE;
264 if (!list_empty(&vt_list->list)) { 318 spin_lock(&vq->lock);
265 ptr = vt_list->list.next; 319 if (!list_empty(&vq->list)) {
266 event = list_entry(ptr, struct vtimer_list, entry); 320 event = list_first_entry(&vq->list, struct vtimer_list, entry);
267 next = event->expires - vt_list->offset; 321 next = event->expires;
268 322 } else
269 /* add the expired time from this interrupt handler 323 vq->do_spt = 0;
270 * and the callback functions 324 spin_unlock(&vq->lock);
271 */ 325 /*
272 asm volatile ("STPT %0" : "=m" (delta)); 326 * To improve precision add the time spent by the
273 delta = 0xffffffffffffffffLL - delta + 1; 327 * interrupt handler to the elapsed time.
274 vt_list->offset += delta; 328 * Note: CPU timer counts down and we got an interrupt,
275 next -= delta; 329 * the current content is negative
276 } else { 330 */
277 vt_list->offset = 0; 331 elapsed = S390_lowcore.async_enter_timer - get_vtimer();
278 next = VTIMER_MAX_SLICE; 332 set_vtimer(next - elapsed);
279 } 333 vq->timer = next - elapsed;
280 spin_unlock(&vt_list->lock); 334 vq->elapsed = elapsed;
281 set_vtimer(next);
282} 335}
283 336
284void init_virt_timer(struct vtimer_list *timer) 337void init_virt_timer(struct vtimer_list *timer)
285{ 338{
286 timer->function = NULL; 339 timer->function = NULL;
287 INIT_LIST_HEAD(&timer->entry); 340 INIT_LIST_HEAD(&timer->entry);
288 spin_lock_init(&timer->lock);
289} 341}
290EXPORT_SYMBOL(init_virt_timer); 342EXPORT_SYMBOL(init_virt_timer);
291 343
@@ -299,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer)
299 */ 351 */
300static void internal_add_vtimer(struct vtimer_list *timer) 352static void internal_add_vtimer(struct vtimer_list *timer)
301{ 353{
354 struct vtimer_queue *vq;
302 unsigned long flags; 355 unsigned long flags;
303 __u64 done; 356 __u64 left, expires;
304 struct vtimer_list *event;
305 struct vtimer_queue *vt_list;
306 357
307 vt_list = &per_cpu(virt_cpu_timer, timer->cpu); 358 vq = &per_cpu(virt_cpu_timer, timer->cpu);
308 spin_lock_irqsave(&vt_list->lock, flags); 359 spin_lock_irqsave(&vq->lock, flags);
309 360
310 BUG_ON(timer->cpu != smp_processor_id()); 361 BUG_ON(timer->cpu != smp_processor_id());
311 362
312 /* if list is empty we only have to set the timer */ 363 if (list_empty(&vq->list)) {
313 if (list_empty(&vt_list->list)) { 364 /* First timer on this cpu, just program it. */
314 /* reset the offset, this may happen if the last timer was 365 list_add(&timer->entry, &vq->list);
315 * just deleted by mod_virt_timer and the interrupt 366 set_vtimer(timer->expires);
316 * didn't happen until here 367 vq->timer = timer->expires;
317 */ 368 vq->elapsed = 0;
318 vt_list->offset = 0; 369 } else {
319 goto fire; 370 /* Check progress of old timers. */
371 expires = timer->expires;
372 left = get_vtimer();
373 if (likely((s64) expires < (s64) left)) {
374 /* The new timer expires before the current timer. */
375 set_vtimer(expires);
376 vq->elapsed += vq->timer - left;
377 vq->timer = expires;
378 } else {
379 vq->elapsed += vq->timer - left;
380 vq->timer = left;
381 }
382 /* Insert new timer into per cpu list. */
383 timer->expires += vq->elapsed;
384 list_add_sorted(timer, &vq->list);
320 } 385 }
321 386
322 /* save progress */ 387 spin_unlock_irqrestore(&vq->lock, flags);
323 asm volatile ("STPT %0" : "=m" (done));
324
325 /* calculate completed work */
326 done = vt_list->to_expire - done + vt_list->offset;
327 vt_list->offset = 0;
328
329 list_for_each_entry(event, &vt_list->list, entry)
330 event->expires -= done;
331
332 fire:
333 list_add_sorted(timer, &vt_list->list);
334
335 /* get first element, which is the next vtimer slice */
336 event = list_entry(vt_list->list.next, struct vtimer_list, entry);
337
338 set_vtimer(event->expires);
339 spin_unlock_irqrestore(&vt_list->lock, flags);
340 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ 388 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */
341 put_cpu(); 389 put_cpu();
342} 390}
@@ -381,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic);
381 * If we change a pending timer the function must be called on the CPU 429 * If we change a pending timer the function must be called on the CPU
382 * where the timer is running on, e.g. by smp_call_function_single() 430 * where the timer is running on, e.g. by smp_call_function_single()
383 * 431 *
384 * The original mod_timer adds the timer if it is not pending. For compatibility 432 * The original mod_timer adds the timer if it is not pending. For
385 * we do the same. The timer will be added on the current CPU as a oneshot timer. 433 * compatibility we do the same. The timer will be added on the current
434 * CPU as a oneshot timer.
386 * 435 *
387 * returns whether it has modified a pending timer (1) or not (0) 436 * returns whether it has modified a pending timer (1) or not (0)
388 */ 437 */
389int mod_virt_timer(struct vtimer_list *timer, __u64 expires) 438int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
390{ 439{
391 struct vtimer_queue *vt_list; 440 struct vtimer_queue *vq;
392 unsigned long flags; 441 unsigned long flags;
393 int cpu; 442 int cpu;
394 443
@@ -404,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
404 return 1; 453 return 1;
405 454
406 cpu = get_cpu(); 455 cpu = get_cpu();
407 vt_list = &per_cpu(virt_cpu_timer, cpu); 456 vq = &per_cpu(virt_cpu_timer, cpu);
408 457
409 /* check if we run on the right CPU */ 458 /* check if we run on the right CPU */
410 BUG_ON(timer->cpu != cpu); 459 BUG_ON(timer->cpu != cpu);
411 460
412 /* disable interrupts before test if timer is pending */ 461 /* disable interrupts before test if timer is pending */
413 spin_lock_irqsave(&vt_list->lock, flags); 462 spin_lock_irqsave(&vq->lock, flags);
414 463
415 /* if timer isn't pending add it on the current CPU */ 464 /* if timer isn't pending add it on the current CPU */
416 if (!vtimer_pending(timer)) { 465 if (!vtimer_pending(timer)) {
417 spin_unlock_irqrestore(&vt_list->lock, flags); 466 spin_unlock_irqrestore(&vq->lock, flags);
418 /* we do not activate an interval timer with mod_virt_timer */ 467 /* we do not activate an interval timer with mod_virt_timer */
419 timer->interval = 0; 468 timer->interval = 0;
420 timer->expires = expires; 469 timer->expires = expires;
@@ -431,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
431 timer->interval = expires; 480 timer->interval = expires;
432 481
433 /* the timer can't expire anymore so we can release the lock */ 482 /* the timer can't expire anymore so we can release the lock */
434 spin_unlock_irqrestore(&vt_list->lock, flags); 483 spin_unlock_irqrestore(&vq->lock, flags);
435 internal_add_vtimer(timer); 484 internal_add_vtimer(timer);
436 return 1; 485 return 1;
437} 486}
@@ -445,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer);
445int del_virt_timer(struct vtimer_list *timer) 494int del_virt_timer(struct vtimer_list *timer)
446{ 495{
447 unsigned long flags; 496 unsigned long flags;
448 struct vtimer_queue *vt_list; 497 struct vtimer_queue *vq;
449 498
450 /* check if timer is pending */ 499 /* check if timer is pending */
451 if (!vtimer_pending(timer)) 500 if (!vtimer_pending(timer))
452 return 0; 501 return 0;
453 502
454 vt_list = &per_cpu(virt_cpu_timer, timer->cpu); 503 vq = &per_cpu(virt_cpu_timer, timer->cpu);
455 spin_lock_irqsave(&vt_list->lock, flags); 504 spin_lock_irqsave(&vq->lock, flags);
456 505
457 /* we don't interrupt a running timer, just let it expire! */ 506 /* we don't interrupt a running timer, just let it expire! */
458 list_del_init(&timer->entry); 507 list_del_init(&timer->entry);
459 508
460 /* last timer removed */ 509 spin_unlock_irqrestore(&vq->lock, flags);
461 if (list_empty(&vt_list->list)) {
462 vt_list->to_expire = 0;
463 vt_list->offset = 0;
464 }
465
466 spin_unlock_irqrestore(&vt_list->lock, flags);
467 return 1; 510 return 1;
468} 511}
469EXPORT_SYMBOL(del_virt_timer); 512EXPORT_SYMBOL(del_virt_timer);
@@ -473,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer);
473 */ 516 */
474void init_cpu_vtimer(void) 517void init_cpu_vtimer(void)
475{ 518{
476 struct vtimer_queue *vt_list; 519 struct vtimer_queue *vq;
477 520
478 /* kick the virtual timer */ 521 /* kick the virtual timer */
479 S390_lowcore.exit_timer = VTIMER_MAX_SLICE;
480 S390_lowcore.last_update_timer = VTIMER_MAX_SLICE;
481 asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
482 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); 522 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
523 asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer));
524
525 /* initialize per cpu vtimer structure */
526 vq = &__get_cpu_var(virt_cpu_timer);
527 INIT_LIST_HEAD(&vq->list);
528 spin_lock_init(&vq->lock);
483 529
484 /* enable cpu timer interrupts */ 530 /* enable cpu timer interrupts */
485 __ctl_set_bit(0,10); 531 __ctl_set_bit(0,10);
486
487 vt_list = &__get_cpu_var(virt_cpu_timer);
488 INIT_LIST_HEAD(&vt_list->list);
489 spin_lock_init(&vt_list->lock);
490 vt_list->to_expire = 0;
491 vt_list->offset = 0;
492 vt_list->idle = 0;
493
494} 532}
495 533
496void __init vtime_init(void) 534void __init vtime_init(void)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8b00eb2ddf57..be8497186b96 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -113,8 +113,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
113int kvm_dev_ioctl_check_extension(long ext) 113int kvm_dev_ioctl_check_extension(long ext)
114{ 114{
115 switch (ext) { 115 switch (ext) {
116 case KVM_CAP_USER_MEMORY:
117 return 1;
118 default: 116 default:
119 return 0; 117 return 0;
120 } 118 }
@@ -185,8 +183,6 @@ struct kvm *kvm_arch_create_vm(void)
185 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 183 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
186 VM_EVENT(kvm, 3, "%s", "vm created"); 184 VM_EVENT(kvm, 3, "%s", "vm created");
187 185
188 try_module_get(THIS_MODULE);
189
190 return kvm; 186 return kvm;
191out_nodbf: 187out_nodbf:
192 free_page((unsigned long)(kvm->arch.sca)); 188 free_page((unsigned long)(kvm->arch.sca));
@@ -196,13 +192,33 @@ out_nokvm:
196 return ERR_PTR(rc); 192 return ERR_PTR(rc);
197} 193}
198 194
195void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
196{
197 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
198 free_page((unsigned long)(vcpu->arch.sie_block));
199 kvm_vcpu_uninit(vcpu);
200 kfree(vcpu);
201}
202
203static void kvm_free_vcpus(struct kvm *kvm)
204{
205 unsigned int i;
206
207 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
208 if (kvm->vcpus[i]) {
209 kvm_arch_vcpu_destroy(kvm->vcpus[i]);
210 kvm->vcpus[i] = NULL;
211 }
212 }
213}
214
199void kvm_arch_destroy_vm(struct kvm *kvm) 215void kvm_arch_destroy_vm(struct kvm *kvm)
200{ 216{
201 debug_unregister(kvm->arch.dbf); 217 kvm_free_vcpus(kvm);
202 kvm_free_physmem(kvm); 218 kvm_free_physmem(kvm);
203 free_page((unsigned long)(kvm->arch.sca)); 219 free_page((unsigned long)(kvm->arch.sca));
220 debug_unregister(kvm->arch.dbf);
204 kfree(kvm); 221 kfree(kvm);
205 module_put(THIS_MODULE);
206} 222}
207 223
208/* Section: vcpu related */ 224/* Section: vcpu related */
@@ -213,8 +229,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
213 229
214void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 230void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
215{ 231{
216 /* kvm common code refers to this, but does'nt call it */ 232 /* Nothing todo */
217 BUG();
218} 233}
219 234
220void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 235void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -308,8 +323,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
308 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 323 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
309 vcpu->arch.sie_block); 324 vcpu->arch.sie_block);
310 325
311 try_module_get(THIS_MODULE);
312
313 return vcpu; 326 return vcpu;
314out_free_cpu: 327out_free_cpu:
315 kfree(vcpu); 328 kfree(vcpu);
@@ -317,14 +330,6 @@ out_nomem:
317 return ERR_PTR(rc); 330 return ERR_PTR(rc);
318} 331}
319 332
320void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
321{
322 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
323 free_page((unsigned long)(vcpu->arch.sie_block));
324 kfree(vcpu);
325 module_put(THIS_MODULE);
326}
327
328int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 333int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
329{ 334{
330 /* kvm common code refers to this, but never calls it */ 335 /* kvm common code refers to this, but never calls it */
diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 85b660c17eb0..c24e9c6a1736 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -31,7 +31,7 @@ enum {
31}; 31};
32 32
33void smp_message_recv(unsigned int msg); 33void smp_message_recv(unsigned int msg);
34void smp_timer_broadcast(cpumask_t mask); 34void smp_timer_broadcast(const struct cpumask *mask);
35 35
36void local_timer_interrupt(void); 36void local_timer_interrupt(void);
37void local_timer_setup(unsigned int cpu); 37void local_timer_setup(unsigned int cpu);
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 95f0085e098a..279d9cc4a007 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -5,7 +5,6 @@
5 5
6/* sched_domains SD_NODE_INIT for sh machines */ 6/* sched_domains SD_NODE_INIT for sh machines */
7#define SD_NODE_INIT (struct sched_domain) { \ 7#define SD_NODE_INIT (struct sched_domain) { \
8 .span = CPU_MASK_NONE, \
9 .parent = NULL, \ 8 .parent = NULL, \
10 .child = NULL, \ 9 .child = NULL, \
11 .groups = NULL, \ 10 .groups = NULL, \
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 3c5ad1660bbc..8f4027412614 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -31,12 +31,6 @@
31int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ 31int __cpu_number_map[NR_CPUS]; /* Map physical to logical */
32int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ 32int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */
33 33
34cpumask_t cpu_possible_map;
35EXPORT_SYMBOL(cpu_possible_map);
36
37cpumask_t cpu_online_map;
38EXPORT_SYMBOL(cpu_online_map);
39
40static inline void __init smp_store_cpu_info(unsigned int cpu) 34static inline void __init smp_store_cpu_info(unsigned int cpu)
41{ 35{
42 struct sh_cpuinfo *c = cpu_data + cpu; 36 struct sh_cpuinfo *c = cpu_data + cpu;
@@ -190,11 +184,11 @@ void arch_send_call_function_single_ipi(int cpu)
190 plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE); 184 plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE);
191} 185}
192 186
193void smp_timer_broadcast(cpumask_t mask) 187void smp_timer_broadcast(const struct cpumask *mask)
194{ 188{
195 int cpu; 189 int cpu;
196 190
197 for_each_cpu_mask(cpu, mask) 191 for_each_cpu(cpu, mask)
198 plat_send_ipi(cpu, SMP_MSG_TIMER); 192 plat_send_ipi(cpu, SMP_MSG_TIMER);
199} 193}
200 194
diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/timers/timer-broadcast.c
index c2317635230f..96e8eaea1e62 100644
--- a/arch/sh/kernel/timers/timer-broadcast.c
+++ b/arch/sh/kernel/timers/timer-broadcast.c
@@ -51,7 +51,7 @@ void __cpuinit local_timer_setup(unsigned int cpu)
51 clk->mult = 1; 51 clk->mult = 1;
52 clk->set_mode = dummy_timer_set_mode; 52 clk->set_mode = dummy_timer_set_mode;
53 clk->broadcast = smp_timer_broadcast; 53 clk->broadcast = smp_timer_broadcast;
54 clk->cpumask = cpumask_of_cpu(cpu); 54 clk->cpumask = cpumask_of(cpu);
55 55
56 clockevents_register_device(clk); 56 clockevents_register_device(clk);
57} 57}
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index 3c61ddd4d43e..0db3f9510336 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -263,7 +263,7 @@ static int tmu_timer_init(void)
263 tmu0_clockevent.min_delta_ns = 263 tmu0_clockevent.min_delta_ns =
264 clockevent_delta2ns(1, &tmu0_clockevent); 264 clockevent_delta2ns(1, &tmu0_clockevent);
265 265
266 tmu0_clockevent.cpumask = cpumask_of_cpu(0); 266 tmu0_clockevent.cpumask = cpumask_of(0);
267 267
268 clockevents_register_device(&tmu0_clockevent); 268 clockevents_register_device(&tmu0_clockevent);
269 269
diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
index a8180e546a48..8408d9d2a662 100644
--- a/arch/sparc/include/asm/smp_32.h
+++ b/arch/sparc/include/asm/smp_32.h
@@ -29,8 +29,6 @@
29 */ 29 */
30 30
31extern unsigned char boot_cpu_id; 31extern unsigned char boot_cpu_id;
32extern cpumask_t phys_cpu_present_map;
33#define cpu_possible_map phys_cpu_present_map
34 32
35typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, 33typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long,
36 unsigned long, unsigned long); 34 unsigned long, unsigned long);
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index a3ea2bcb95de..cab8e0286871 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -312,7 +312,8 @@ static void sun4u_irq_enable(unsigned int virt_irq)
312 } 312 }
313} 313}
314 314
315static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask) 315static void sun4u_set_affinity(unsigned int virt_irq,
316 const struct cpumask *mask)
316{ 317{
317 sun4u_irq_enable(virt_irq); 318 sun4u_irq_enable(virt_irq);
318} 319}
@@ -362,7 +363,8 @@ static void sun4v_irq_enable(unsigned int virt_irq)
362 ino, err); 363 ino, err);
363} 364}
364 365
365static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask) 366static void sun4v_set_affinity(unsigned int virt_irq,
367 const struct cpumask *mask)
366{ 368{
367 unsigned int ino = virt_irq_table[virt_irq].dev_ino; 369 unsigned int ino = virt_irq_table[virt_irq].dev_ino;
368 unsigned long cpuid = irq_choose_cpu(virt_irq); 370 unsigned long cpuid = irq_choose_cpu(virt_irq);
@@ -429,7 +431,8 @@ static void sun4v_virq_enable(unsigned int virt_irq)
429 dev_handle, dev_ino, err); 431 dev_handle, dev_ino, err);
430} 432}
431 433
432static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask) 434static void sun4v_virt_set_affinity(unsigned int virt_irq,
435 const struct cpumask *mask)
433{ 436{
434 unsigned long cpuid, dev_handle, dev_ino; 437 unsigned long cpuid, dev_handle, dev_ino;
435 int err; 438 int err;
@@ -851,7 +854,7 @@ void fixup_irqs(void)
851 !(irq_desc[irq].status & IRQ_PER_CPU)) { 854 !(irq_desc[irq].status & IRQ_PER_CPU)) {
852 if (irq_desc[irq].chip->set_affinity) 855 if (irq_desc[irq].chip->set_affinity)
853 irq_desc[irq].chip->set_affinity(irq, 856 irq_desc[irq].chip->set_affinity(irq,
854 irq_desc[irq].affinity); 857 &irq_desc[irq].affinity);
855 } 858 }
856 spin_unlock_irqrestore(&irq_desc[irq].lock, flags); 859 spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
857 } 860 }
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 46e231f7c5ce..322046cdf85f 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -780,7 +780,7 @@ out:
780 if (nid != -1) { 780 if (nid != -1) {
781 cpumask_t numa_mask = node_to_cpumask(nid); 781 cpumask_t numa_mask = node_to_cpumask(nid);
782 782
783 irq_set_affinity(irq, numa_mask); 783 irq_set_affinity(irq, &numa_mask);
784 } 784 }
785 785
786 return irq; 786 return irq;
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 2e680f34f727..0d0cd815e83e 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -288,7 +288,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
288 if (nid != -1) { 288 if (nid != -1) {
289 cpumask_t numa_mask = node_to_cpumask(nid); 289 cpumask_t numa_mask = node_to_cpumask(nid);
290 290
291 irq_set_affinity(irq, numa_mask); 291 irq_set_affinity(irq, &numa_mask);
292 } 292 }
293 err = request_irq(irq, sparc64_msiq_interrupt, 0, 293 err = request_irq(irq, sparc64_msiq_interrupt, 0,
294 "MSIQ", 294 "MSIQ",
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index e396c1f17a92..1e5ac4e282e1 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -39,8 +39,6 @@ volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
39unsigned char boot_cpu_id = 0; 39unsigned char boot_cpu_id = 0;
40unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */ 40unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
41 41
42cpumask_t cpu_online_map = CPU_MASK_NONE;
43cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
44cpumask_t smp_commenced_mask = CPU_MASK_NONE; 42cpumask_t smp_commenced_mask = CPU_MASK_NONE;
45 43
46/* The only guaranteed locking primitive available on all Sparc 44/* The only guaranteed locking primitive available on all Sparc
@@ -334,7 +332,7 @@ void __init smp_setup_cpu_possible_map(void)
334 instance = 0; 332 instance = 0;
335 while (!cpu_find_by_instance(instance, NULL, &mid)) { 333 while (!cpu_find_by_instance(instance, NULL, &mid)) {
336 if (mid < NR_CPUS) { 334 if (mid < NR_CPUS) {
337 cpu_set(mid, phys_cpu_present_map); 335 cpu_set(mid, cpu_possible_map);
338 cpu_set(mid, cpu_present_map); 336 cpu_set(mid, cpu_present_map);
339 } 337 }
340 instance++; 338 instance++;
@@ -354,7 +352,7 @@ void __init smp_prepare_boot_cpu(void)
354 352
355 current_thread_info()->cpu = cpuid; 353 current_thread_info()->cpu = cpuid;
356 cpu_set(cpuid, cpu_online_map); 354 cpu_set(cpuid, cpu_online_map);
357 cpu_set(cpuid, phys_cpu_present_map); 355 cpu_set(cpuid, cpu_possible_map);
358} 356}
359 357
360int __cpuinit __cpu_up(unsigned int cpu) 358int __cpuinit __cpu_up(unsigned int cpu)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index bfe99d82d458..46329799f346 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -49,14 +49,10 @@
49 49
50int sparc64_multi_core __read_mostly; 50int sparc64_multi_core __read_mostly;
51 51
52cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
53cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
54DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; 52DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
55cpumask_t cpu_core_map[NR_CPUS] __read_mostly = 53cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
56 { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; 54 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
57 55
58EXPORT_SYMBOL(cpu_possible_map);
59EXPORT_SYMBOL(cpu_online_map);
60EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 56EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
61EXPORT_SYMBOL(cpu_core_map); 57EXPORT_SYMBOL(cpu_core_map);
62 58
diff --git a/arch/sparc/kernel/sparc_ksyms_32.c b/arch/sparc/kernel/sparc_ksyms_32.c
index a4d45fc29b21..e1e97639231b 100644
--- a/arch/sparc/kernel/sparc_ksyms_32.c
+++ b/arch/sparc/kernel/sparc_ksyms_32.c
@@ -112,10 +112,6 @@ EXPORT_PER_CPU_SYMBOL(__cpu_data);
112#ifdef CONFIG_SMP 112#ifdef CONFIG_SMP
113/* IRQ implementation. */ 113/* IRQ implementation. */
114EXPORT_SYMBOL(synchronize_irq); 114EXPORT_SYMBOL(synchronize_irq);
115
116/* CPU online map and active count. */
117EXPORT_SYMBOL(cpu_online_map);
118EXPORT_SYMBOL(phys_cpu_present_map);
119#endif 115#endif
120 116
121EXPORT_SYMBOL(__udelay); 117EXPORT_SYMBOL(__udelay);
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 141da3759091..9df8f095a8b1 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -763,7 +763,7 @@ void __devinit setup_sparc64_timer(void)
763 sevt = &__get_cpu_var(sparc64_events); 763 sevt = &__get_cpu_var(sparc64_events);
764 764
765 memcpy(sevt, &sparc64_clockevent, sizeof(*sevt)); 765 memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
766 sevt->cpumask = cpumask_of_cpu(smp_processor_id()); 766 sevt->cpumask = cpumask_of(smp_processor_id());
767 767
768 clockevents_register_device(sevt); 768 clockevents_register_device(sevt);
769} 769}
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 045772142844..98351c78bc81 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -25,13 +25,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
25#include "irq_user.h" 25#include "irq_user.h"
26#include "os.h" 26#include "os.h"
27 27
28/* CPU online map, set by smp_boot_cpus */
29cpumask_t cpu_online_map = CPU_MASK_NONE;
30cpumask_t cpu_possible_map = CPU_MASK_NONE;
31
32EXPORT_SYMBOL(cpu_online_map);
33EXPORT_SYMBOL(cpu_possible_map);
34
35/* Per CPU bogomips and other parameters 28/* Per CPU bogomips and other parameters
36 * The only piece used here is the ipi pipe, which is set before SMP is 29 * The only piece used here is the ipi pipe, which is set before SMP is
37 * started and never changed. 30 * started and never changed.
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 47f04f4a3464..b13a87a3ec95 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -50,7 +50,7 @@ static int itimer_next_event(unsigned long delta,
50static struct clock_event_device itimer_clockevent = { 50static struct clock_event_device itimer_clockevent = {
51 .name = "itimer", 51 .name = "itimer",
52 .rating = 250, 52 .rating = 250,
53 .cpumask = CPU_MASK_ALL, 53 .cpumask = cpu_all_mask,
54 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 54 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
55 .set_mode = itimer_set_mode, 55 .set_mode = itimer_set_mode,
56 .set_next_event = itimer_next_event, 56 .set_next_event = itimer_next_event,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0f44add3e0b7..249d1e0824b5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -601,19 +601,20 @@ config IOMMU_HELPER
601 601
602config MAXSMP 602config MAXSMP
603 bool "Configure Maximum number of SMP Processors and NUMA Nodes" 603 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
604 depends on X86_64 && SMP && BROKEN 604 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
605 select CPUMASK_OFFSTACK
605 default n 606 default n
606 help 607 help
607 Configure maximum number of CPUS and NUMA Nodes for this architecture. 608 Configure maximum number of CPUS and NUMA Nodes for this architecture.
608 If unsure, say N. 609 If unsure, say N.
609 610
610config NR_CPUS 611config NR_CPUS
611 int "Maximum number of CPUs (2-512)" if !MAXSMP 612 int "Maximum number of CPUs" if SMP && !MAXSMP
612 range 2 512 613 range 2 512 if SMP && !MAXSMP
613 depends on SMP 614 default "1" if !SMP
614 default "4096" if MAXSMP 615 default "4096" if MAXSMP
615 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 616 default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
616 default "8" 617 default "8" if SMP
617 help 618 help
618 This allows you to specify the maximum number of CPUs which this 619 This allows you to specify the maximum number of CPUs which this
619 kernel will support. The maximum supported value is 512 and the 620 kernel will support. The maximum supported value is 512 and the
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index b195f85526e3..9dabd00e9805 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -24,15 +24,14 @@
24#include <asm/ucontext.h> 24#include <asm/ucontext.h>
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/i387.h> 26#include <asm/i387.h>
27#include <asm/ia32.h>
28#include <asm/ptrace.h> 27#include <asm/ptrace.h>
29#include <asm/ia32_unistd.h> 28#include <asm/ia32_unistd.h>
30#include <asm/user32.h> 29#include <asm/user32.h>
31#include <asm/sigcontext32.h> 30#include <asm/sigcontext32.h>
32#include <asm/proto.h> 31#include <asm/proto.h>
33#include <asm/vdso.h> 32#include <asm/vdso.h>
34
35#include <asm/sigframe.h> 33#include <asm/sigframe.h>
34#include <asm/sys_ia32.h>
36 35
37#define DEBUG_SIG 0 36#define DEBUG_SIG 0
38 37
diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c
index d21991ce606c..29cdcd02ead3 100644
--- a/arch/x86/ia32/ipc32.c
+++ b/arch/x86/ia32/ipc32.c
@@ -8,6 +8,7 @@
8#include <linux/shm.h> 8#include <linux/shm.h>
9#include <linux/ipc.h> 9#include <linux/ipc.h>
10#include <linux/compat.h> 10#include <linux/compat.h>
11#include <asm/sys_ia32.h>
11 12
12asmlinkage long sys32_ipc(u32 call, int first, int second, int third, 13asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
13 compat_uptr_t ptr, u32 fifth) 14 compat_uptr_t ptr, u32 fifth)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 2e09dcd3c0a6..6c0d7f6231af 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -44,8 +44,8 @@
44#include <asm/types.h> 44#include <asm/types.h>
45#include <asm/uaccess.h> 45#include <asm/uaccess.h>
46#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <asm/ia32.h>
48#include <asm/vgtod.h> 47#include <asm/vgtod.h>
48#include <asm/sys_ia32.h>
49 49
50#define AA(__x) ((unsigned long)(__x)) 50#define AA(__x) ((unsigned long)(__x))
51 51
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 25caa0738af5..ab1d51a8855e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -54,7 +54,6 @@ extern int disable_apic;
54extern int is_vsmp_box(void); 54extern int is_vsmp_box(void);
55extern void xapic_wait_icr_idle(void); 55extern void xapic_wait_icr_idle(void);
56extern u32 safe_xapic_wait_icr_idle(void); 56extern u32 safe_xapic_wait_icr_idle(void);
57extern u64 xapic_icr_read(void);
58extern void xapic_icr_write(u32, u32); 57extern void xapic_icr_write(u32, u32);
59extern int setup_profiling_timer(unsigned int); 58extern int setup_profiling_timer(unsigned int);
60 59
@@ -93,7 +92,7 @@ static inline u32 native_apic_msr_read(u32 reg)
93} 92}
94 93
95#ifndef CONFIG_X86_32 94#ifndef CONFIG_X86_32
96extern int x2apic, x2apic_preenabled; 95extern int x2apic;
97extern void check_x2apic(void); 96extern void check_x2apic(void);
98extern void enable_x2apic(void); 97extern void enable_x2apic(void);
99extern void enable_IR_x2apic(void); 98extern void enable_IR_x2apic(void);
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index ce547f24a1cd..d8dd9f537911 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -9,12 +9,12 @@ static inline int apic_id_registered(void)
9 return (1); 9 return (1);
10} 10}
11 11
12static inline cpumask_t target_cpus(void) 12static inline const cpumask_t *target_cpus(void)
13{ 13{
14#ifdef CONFIG_SMP 14#ifdef CONFIG_SMP
15 return cpu_online_map; 15 return &cpu_online_map;
16#else 16#else
17 return cpumask_of_cpu(0); 17 return &cpumask_of_cpu(0);
18#endif 18#endif
19} 19}
20 20
@@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid)
79 79
80static inline int cpu_present_to_apicid(int mps_cpu) 80static inline int cpu_present_to_apicid(int mps_cpu)
81{ 81{
82 if (mps_cpu < NR_CPUS) 82 if (mps_cpu < nr_cpu_ids)
83 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); 83 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
84 84
85 return BAD_APICID; 85 return BAD_APICID;
@@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[];
94/* Mapping from cpu number to logical apicid */ 94/* Mapping from cpu number to logical apicid */
95static inline int cpu_to_logical_apicid(int cpu) 95static inline int cpu_to_logical_apicid(int cpu)
96{ 96{
97 if (cpu >= NR_CPUS) 97 if (cpu >= nr_cpu_ids)
98 return BAD_APICID; 98 return BAD_APICID;
99 return cpu_physical_id(cpu); 99 return cpu_physical_id(cpu);
100} 100}
@@ -119,16 +119,34 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
119} 119}
120 120
121/* As we are using single CPU as destination, pick only one CPU here */ 121/* As we are using single CPU as destination, pick only one CPU here */
122static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 122static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
123{ 123{
124 int cpu; 124 int cpu;
125 int apicid; 125 int apicid;
126 126
127 cpu = first_cpu(cpumask); 127 cpu = first_cpu(*cpumask);
128 apicid = cpu_to_logical_apicid(cpu); 128 apicid = cpu_to_logical_apicid(cpu);
129 return apicid; 129 return apicid;
130} 130}
131 131
132static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
133 const struct cpumask *andmask)
134{
135 int cpu;
136
137 /*
138 * We're using fixed IRQ delivery, can only return one phys APIC ID.
139 * May as well be the first.
140 */
141 for_each_cpu_and(cpu, cpumask, andmask)
142 if (cpumask_test_cpu(cpu, cpu_online_mask))
143 break;
144 if (cpu < nr_cpu_ids)
145 return cpu_to_logical_apicid(cpu);
146
147 return BAD_APICID;
148}
149
132static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) 150static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
133{ 151{
134 return cpuid_apic >> index_msb; 152 return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
index 9404c535b7ec..27fcd01b3ae6 100644
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ b/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,25 +1,22 @@
1#ifndef __ASM_MACH_IPI_H 1#ifndef __ASM_MACH_IPI_H
2#define __ASM_MACH_IPI_H 2#define __ASM_MACH_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
5void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const struct cpumask *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
10 11
11static inline void send_IPI_allbutself(int vector) 12static inline void send_IPI_allbutself(int vector)
12{ 13{
13 cpumask_t mask = cpu_online_map; 14 send_IPI_mask_allbutself(cpu_online_mask, vector);
14 cpu_clear(smp_processor_id(), mask);
15
16 if (!cpus_empty(mask))
17 send_IPI_mask(mask, vector);
18} 15}
19 16
20static inline void send_IPI_all(int vector) 17static inline void send_IPI_all(int vector)
21{ 18{
22 send_IPI_mask(cpu_online_map, vector); 19 send_IPI_mask(cpu_online_mask, vector);
23} 20}
24 21
25#endif /* __ASM_MACH_IPI_H */ 22#endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e6b82b17b072..dc27705f5443 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr)
320 _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); 320 _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
321} 321}
322 322
323#define SYS_VECTOR_FREE 0
324#define SYS_VECTOR_ALLOCED 1
325
326extern int first_system_vector; 323extern int first_system_vector;
327extern char system_vectors[]; 324/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
325extern unsigned long used_vectors[];
328 326
329static inline void alloc_system_vector(int vector) 327static inline void alloc_system_vector(int vector)
330{ 328{
331 if (system_vectors[vector] == SYS_VECTOR_FREE) { 329 if (!test_bit(vector, used_vectors)) {
332 system_vectors[vector] = SYS_VECTOR_ALLOCED; 330 set_bit(vector, used_vectors);
333 if (first_system_vector > vector) 331 if (first_system_vector > vector)
334 first_system_vector = vector; 332 first_system_vector = vector;
335 } else 333 } else
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index a2e545c91c35..ca5ffb2856b6 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size);
90 90
91#endif /* CONFIG_X86_32 */ 91#endif /* CONFIG_X86_32 */
92 92
93extern int add_efi_memmap;
93extern void efi_reserve_early(void); 94extern void efi_reserve_early(void);
94extern void efi_call_phys_prelog(void); 95extern void efi_call_phys_prelog(void);
95extern void efi_call_phys_epilog(void); 96extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index e24ef876915f..51ac1230294e 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,14 +9,14 @@ static inline int apic_id_registered(void)
9 return (1); 9 return (1);
10} 10}
11 11
12static inline cpumask_t target_cpus_cluster(void) 12static inline const cpumask_t *target_cpus_cluster(void)
13{ 13{
14 return CPU_MASK_ALL; 14 return &CPU_MASK_ALL;
15} 15}
16 16
17static inline cpumask_t target_cpus(void) 17static inline const cpumask_t *target_cpus(void)
18{ 18{
19 return cpumask_of_cpu(smp_processor_id()); 19 return &cpumask_of_cpu(smp_processor_id());
20} 20}
21 21
22#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) 22#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
@@ -80,9 +80,10 @@ extern int apic_version [MAX_APICS];
80static inline void setup_apic_routing(void) 80static inline void setup_apic_routing(void)
81{ 81{
82 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); 82 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
83 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", 83 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
84 (apic_version[apic] == 0x14) ? 84 (apic_version[apic] == 0x14) ?
85 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); 85 "Physical Cluster" : "Logical Cluster",
86 nr_ioapics, cpus_addr(*target_cpus())[0]);
86} 87}
87 88
88static inline int multi_timer_check(int apic, int irq) 89static inline int multi_timer_check(int apic, int irq)
@@ -100,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu)
100{ 101{
101 if (!mps_cpu) 102 if (!mps_cpu)
102 return boot_cpu_physical_apicid; 103 return boot_cpu_physical_apicid;
103 else if (mps_cpu < NR_CPUS) 104 else if (mps_cpu < nr_cpu_ids)
104 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); 105 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
105 else 106 else
106 return BAD_APICID; 107 return BAD_APICID;
@@ -120,9 +121,9 @@ extern u8 cpu_2_logical_apicid[];
120static inline int cpu_to_logical_apicid(int cpu) 121static inline int cpu_to_logical_apicid(int cpu)
121{ 122{
122#ifdef CONFIG_SMP 123#ifdef CONFIG_SMP
123 if (cpu >= NR_CPUS) 124 if (cpu >= nr_cpu_ids)
124 return BAD_APICID; 125 return BAD_APICID;
125 return (int)cpu_2_logical_apicid[cpu]; 126 return (int)cpu_2_logical_apicid[cpu];
126#else 127#else
127 return logical_smp_processor_id(); 128 return logical_smp_processor_id();
128#endif 129#endif
@@ -146,14 +147,15 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
146 return (1); 147 return (1);
147} 148}
148 149
149static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) 150static inline unsigned int
151cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
150{ 152{
151 int num_bits_set; 153 int num_bits_set;
152 int cpus_found = 0; 154 int cpus_found = 0;
153 int cpu; 155 int cpu;
154 int apicid; 156 int apicid;
155 157
156 num_bits_set = cpus_weight(cpumask); 158 num_bits_set = cpumask_weight(cpumask);
157 /* Return id to all */ 159 /* Return id to all */
158 if (num_bits_set == NR_CPUS) 160 if (num_bits_set == NR_CPUS)
159 return 0xFF; 161 return 0xFF;
@@ -161,10 +163,10 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
161 * The cpus in the mask must all be on the apic cluster. If are not 163 * The cpus in the mask must all be on the apic cluster. If are not
162 * on the same apicid cluster return default value of TARGET_CPUS. 164 * on the same apicid cluster return default value of TARGET_CPUS.
163 */ 165 */
164 cpu = first_cpu(cpumask); 166 cpu = cpumask_first(cpumask);
165 apicid = cpu_to_logical_apicid(cpu); 167 apicid = cpu_to_logical_apicid(cpu);
166 while (cpus_found < num_bits_set) { 168 while (cpus_found < num_bits_set) {
167 if (cpu_isset(cpu, cpumask)) { 169 if (cpumask_test_cpu(cpu, cpumask)) {
168 int new_apicid = cpu_to_logical_apicid(cpu); 170 int new_apicid = cpu_to_logical_apicid(cpu);
169 if (apicid_cluster(apicid) != 171 if (apicid_cluster(apicid) !=
170 apicid_cluster(new_apicid)){ 172 apicid_cluster(new_apicid)){
@@ -179,14 +181,14 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
179 return apicid; 181 return apicid;
180} 182}
181 183
182static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 184static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
183{ 185{
184 int num_bits_set; 186 int num_bits_set;
185 int cpus_found = 0; 187 int cpus_found = 0;
186 int cpu; 188 int cpu;
187 int apicid; 189 int apicid;
188 190
189 num_bits_set = cpus_weight(cpumask); 191 num_bits_set = cpus_weight(*cpumask);
190 /* Return id to all */ 192 /* Return id to all */
191 if (num_bits_set == NR_CPUS) 193 if (num_bits_set == NR_CPUS)
192 return cpu_to_logical_apicid(0); 194 return cpu_to_logical_apicid(0);
@@ -194,10 +196,52 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
194 * The cpus in the mask must all be on the apic cluster. If are not 196 * The cpus in the mask must all be on the apic cluster. If are not
195 * on the same apicid cluster return default value of TARGET_CPUS. 197 * on the same apicid cluster return default value of TARGET_CPUS.
196 */ 198 */
197 cpu = first_cpu(cpumask); 199 cpu = first_cpu(*cpumask);
200 apicid = cpu_to_logical_apicid(cpu);
201 while (cpus_found < num_bits_set) {
202 if (cpu_isset(cpu, *cpumask)) {
203 int new_apicid = cpu_to_logical_apicid(cpu);
204 if (apicid_cluster(apicid) !=
205 apicid_cluster(new_apicid)){
206 printk ("%s: Not a valid mask!\n", __func__);
207 return cpu_to_logical_apicid(0);
208 }
209 apicid = new_apicid;
210 cpus_found++;
211 }
212 cpu++;
213 }
214 return apicid;
215}
216
217
218static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
219 const struct cpumask *andmask)
220{
221 int num_bits_set;
222 int cpus_found = 0;
223 int cpu;
224 int apicid = cpu_to_logical_apicid(0);
225 cpumask_var_t cpumask;
226
227 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
228 return apicid;
229
230 cpumask_and(cpumask, inmask, andmask);
231 cpumask_and(cpumask, cpumask, cpu_online_mask);
232
233 num_bits_set = cpumask_weight(cpumask);
234 /* Return id to all */
235 if (num_bits_set == NR_CPUS)
236 goto exit;
237 /*
238 * The cpus in the mask must all be on the apic cluster. If are not
239 * on the same apicid cluster return default value of TARGET_CPUS.
240 */
241 cpu = cpumask_first(cpumask);
198 apicid = cpu_to_logical_apicid(cpu); 242 apicid = cpu_to_logical_apicid(cpu);
199 while (cpus_found < num_bits_set) { 243 while (cpus_found < num_bits_set) {
200 if (cpu_isset(cpu, cpumask)) { 244 if (cpumask_test_cpu(cpu, cpumask)) {
201 int new_apicid = cpu_to_logical_apicid(cpu); 245 int new_apicid = cpu_to_logical_apicid(cpu);
202 if (apicid_cluster(apicid) != 246 if (apicid_cluster(apicid) !=
203 apicid_cluster(new_apicid)){ 247 apicid_cluster(new_apicid)){
@@ -209,6 +253,8 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
209 } 253 }
210 cpu++; 254 cpu++;
211 } 255 }
256exit:
257 free_cpumask_var(cpumask);
212 return apicid; 258 return apicid;
213} 259}
214 260
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
index 632a955fcc0a..7e8ed24d4b8a 100644
--- a/arch/x86/include/asm/es7000/ipi.h
+++ b/arch/x86/include/asm/es7000/ipi.h
@@ -1,24 +1,22 @@
1#ifndef __ASM_ES7000_IPI_H 1#ifndef __ASM_ES7000_IPI_H
2#define __ASM_ES7000_IPI_H 2#define __ASM_ES7000_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
5void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const struct cpumask *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
10 11
11static inline void send_IPI_allbutself(int vector) 12static inline void send_IPI_allbutself(int vector)
12{ 13{
13 cpumask_t mask = cpu_online_map; 14 send_IPI_mask_allbutself(cpu_online_mask, vector);
14 cpu_clear(smp_processor_id(), mask);
15 if (!cpus_empty(mask))
16 send_IPI_mask(mask, vector);
17} 15}
18 16
19static inline void send_IPI_all(int vector) 17static inline void send_IPI_all(int vector)
20{ 18{
21 send_IPI_mask(cpu_online_map, vector); 19 send_IPI_mask(cpu_online_mask, vector);
22} 20}
23 21
24#endif /* __ASM_ES7000_IPI_H */ 22#endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 0ac17d33a8c7..746f37a7963a 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -24,7 +24,7 @@ struct genapic {
24 int (*probe)(void); 24 int (*probe)(void);
25 25
26 int (*apic_id_registered)(void); 26 int (*apic_id_registered)(void);
27 cpumask_t (*target_cpus)(void); 27 const struct cpumask *(*target_cpus)(void);
28 int int_delivery_mode; 28 int int_delivery_mode;
29 int int_dest_mode; 29 int int_dest_mode;
30 int ESR_DISABLE; 30 int ESR_DISABLE;
@@ -57,12 +57,16 @@ struct genapic {
57 57
58 unsigned (*get_apic_id)(unsigned long x); 58 unsigned (*get_apic_id)(unsigned long x);
59 unsigned long apic_id_mask; 59 unsigned long apic_id_mask;
60 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 60 unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
61 cpumask_t (*vector_allocation_domain)(int cpu); 61 unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
62 const struct cpumask *andmask);
63 void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
62 64
63#ifdef CONFIG_SMP 65#ifdef CONFIG_SMP
64 /* ipi */ 66 /* ipi */
65 void (*send_IPI_mask)(cpumask_t mask, int vector); 67 void (*send_IPI_mask)(const struct cpumask *mask, int vector);
68 void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
69 int vector);
66 void (*send_IPI_allbutself)(int vector); 70 void (*send_IPI_allbutself)(int vector);
67 void (*send_IPI_all)(int vector); 71 void (*send_IPI_all)(int vector);
68#endif 72#endif
@@ -114,6 +118,7 @@ struct genapic {
114 APICFUNC(get_apic_id) \ 118 APICFUNC(get_apic_id) \
115 .apic_id_mask = APIC_ID_MASK, \ 119 .apic_id_mask = APIC_ID_MASK, \
116 APICFUNC(cpu_mask_to_apicid) \ 120 APICFUNC(cpu_mask_to_apicid) \
121 APICFUNC(cpu_mask_to_apicid_and) \
117 APICFUNC(vector_allocation_domain) \ 122 APICFUNC(vector_allocation_domain) \
118 APICFUNC(acpi_madt_oem_check) \ 123 APICFUNC(acpi_madt_oem_check) \
119 IPIFUNC(send_IPI_mask) \ 124 IPIFUNC(send_IPI_mask) \
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 2cae011668b7..adf32fb56aa6 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_GENAPIC_64_H 1#ifndef _ASM_X86_GENAPIC_64_H
2#define _ASM_X86_GENAPIC_64_H 2#define _ASM_X86_GENAPIC_64_H
3 3
4#include <linux/cpumask.h>
5
4/* 6/*
5 * Copyright 2004 James Cleverdon, IBM. 7 * Copyright 2004 James Cleverdon, IBM.
6 * Subject to the GNU Public License, v.2 8 * Subject to the GNU Public License, v.2
@@ -18,16 +20,20 @@ struct genapic {
18 u32 int_delivery_mode; 20 u32 int_delivery_mode;
19 u32 int_dest_mode; 21 u32 int_dest_mode;
20 int (*apic_id_registered)(void); 22 int (*apic_id_registered)(void);
21 cpumask_t (*target_cpus)(void); 23 const struct cpumask *(*target_cpus)(void);
22 cpumask_t (*vector_allocation_domain)(int cpu); 24 void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
23 void (*init_apic_ldr)(void); 25 void (*init_apic_ldr)(void);
24 /* ipi */ 26 /* ipi */
25 void (*send_IPI_mask)(cpumask_t mask, int vector); 27 void (*send_IPI_mask)(const struct cpumask *mask, int vector);
28 void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
29 int vector);
26 void (*send_IPI_allbutself)(int vector); 30 void (*send_IPI_allbutself)(int vector);
27 void (*send_IPI_all)(int vector); 31 void (*send_IPI_all)(int vector);
28 void (*send_IPI_self)(int vector); 32 void (*send_IPI_self)(int vector);
29 /* */ 33 /* */
30 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 34 unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
35 unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
36 const struct cpumask *andmask);
31 unsigned int (*phys_pkg_id)(int index_msb); 37 unsigned int (*phys_pkg_id)(int index_msb);
32 unsigned int (*get_apic_id)(unsigned long x); 38 unsigned int (*get_apic_id)(unsigned long x);
33 unsigned long (*set_apic_id)(unsigned int id); 39 unsigned long (*set_apic_id)(unsigned int id);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index f89dffb28aa9..c745a306f7d3 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
117 native_apic_mem_write(APIC_ICR, cfg); 117 native_apic_mem_write(APIC_ICR, cfg);
118} 118}
119 119
120static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) 120static inline void send_IPI_mask_sequence(const struct cpumask *mask,
121 int vector)
121{ 122{
122 unsigned long flags; 123 unsigned long flags;
123 unsigned long query_cpu; 124 unsigned long query_cpu;
@@ -128,11 +129,29 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
128 * - mbligh 129 * - mbligh
129 */ 130 */
130 local_irq_save(flags); 131 local_irq_save(flags);
131 for_each_cpu_mask_nr(query_cpu, mask) { 132 for_each_cpu(query_cpu, mask) {
132 __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), 133 __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
133 vector, APIC_DEST_PHYSICAL); 134 vector, APIC_DEST_PHYSICAL);
134 } 135 }
135 local_irq_restore(flags); 136 local_irq_restore(flags);
136} 137}
137 138
139static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
140 int vector)
141{
142 unsigned long flags;
143 unsigned int query_cpu;
144 unsigned int this_cpu = smp_processor_id();
145
146 /* See Hack comment above */
147
148 local_irq_save(flags);
149 for_each_cpu(query_cpu, mask)
150 if (query_cpu != this_cpu)
151 __send_IPI_dest_field(
152 per_cpu(x86_cpu_to_apicid, query_cpu),
153 vector, APIC_DEST_PHYSICAL);
154 local_irq_restore(flags);
155}
156
138#endif /* _ASM_X86_IPI_H */ 157#endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 28e409fc73f3..592688ed04d3 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -33,7 +33,7 @@ static inline int irq_canonicalize(int irq)
33 33
34#ifdef CONFIG_HOTPLUG_CPU 34#ifdef CONFIG_HOTPLUG_CPU
35#include <linux/cpumask.h> 35#include <linux/cpumask.h>
36extern void fixup_irqs(cpumask_t map); 36extern void fixup_irqs(void);
37#endif 37#endif
38 38
39extern unsigned int do_IRQ(struct pt_regs *regs); 39extern unsigned int do_IRQ(struct pt_regs *regs);
@@ -42,5 +42,6 @@ extern void native_init_IRQ(void);
42 42
43/* Interrupt vector management */ 43/* Interrupt vector management */
44extern DECLARE_BITMAP(used_vectors, NR_VECTORS); 44extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
45extern int vector_used_by_percpu_irq(unsigned int vector);
45 46
46#endif /* _ASM_X86_IRQ_H */ 47#endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8346be87cfa1..97215a458e5f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
21 21
22#include <asm/pvclock-abi.h> 22#include <asm/pvclock-abi.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/mtrr.h>
24 25
25#define KVM_MAX_VCPUS 16 26#define KVM_MAX_VCPUS 16
26#define KVM_MEMORY_SLOTS 32 27#define KVM_MEMORY_SLOTS 32
@@ -86,6 +87,7 @@
86#define KVM_MIN_FREE_MMU_PAGES 5 87#define KVM_MIN_FREE_MMU_PAGES 5
87#define KVM_REFILL_PAGES 25 88#define KVM_REFILL_PAGES 25
88#define KVM_MAX_CPUID_ENTRIES 40 89#define KVM_MAX_CPUID_ENTRIES 40
90#define KVM_NR_FIXED_MTRR_REGION 88
89#define KVM_NR_VAR_MTRR 8 91#define KVM_NR_VAR_MTRR 8
90 92
91extern spinlock_t kvm_lock; 93extern spinlock_t kvm_lock;
@@ -180,6 +182,8 @@ struct kvm_mmu_page {
180 struct list_head link; 182 struct list_head link;
181 struct hlist_node hash_link; 183 struct hlist_node hash_link;
182 184
185 struct list_head oos_link;
186
183 /* 187 /*
184 * The following two entries are used to key the shadow page in the 188 * The following two entries are used to key the shadow page in the
185 * hash table. 189 * hash table.
@@ -190,13 +194,16 @@ struct kvm_mmu_page {
190 u64 *spt; 194 u64 *spt;
191 /* hold the gfn of each spte inside spt */ 195 /* hold the gfn of each spte inside spt */
192 gfn_t *gfns; 196 gfn_t *gfns;
193 unsigned long slot_bitmap; /* One bit set per slot which has memory 197 /*
194 * in this shadow page. 198 * One bit set per slot which has memory
195 */ 199 * in this shadow page.
200 */
201 DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
196 int multimapped; /* More than one parent_pte? */ 202 int multimapped; /* More than one parent_pte? */
197 int root_count; /* Currently serving as active root */ 203 int root_count; /* Currently serving as active root */
198 bool unsync; 204 bool unsync;
199 bool unsync_children; 205 bool global;
206 unsigned int unsync_children;
200 union { 207 union {
201 u64 *parent_pte; /* !multimapped */ 208 u64 *parent_pte; /* !multimapped */
202 struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ 209 struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -327,8 +334,10 @@ struct kvm_vcpu_arch {
327 334
328 bool nmi_pending; 335 bool nmi_pending;
329 bool nmi_injected; 336 bool nmi_injected;
337 bool nmi_window_open;
330 338
331 u64 mtrr[0x100]; 339 struct mtrr_state_type mtrr_state;
340 u32 pat;
332}; 341};
333 342
334struct kvm_mem_alias { 343struct kvm_mem_alias {
@@ -350,11 +359,13 @@ struct kvm_arch{
350 */ 359 */
351 struct list_head active_mmu_pages; 360 struct list_head active_mmu_pages;
352 struct list_head assigned_dev_head; 361 struct list_head assigned_dev_head;
362 struct list_head oos_global_pages;
353 struct dmar_domain *intel_iommu_domain; 363 struct dmar_domain *intel_iommu_domain;
354 struct kvm_pic *vpic; 364 struct kvm_pic *vpic;
355 struct kvm_ioapic *vioapic; 365 struct kvm_ioapic *vioapic;
356 struct kvm_pit *vpit; 366 struct kvm_pit *vpit;
357 struct hlist_head irq_ack_notifier_list; 367 struct hlist_head irq_ack_notifier_list;
368 int vapics_in_nmi_mode;
358 369
359 int round_robin_prev_vcpu; 370 int round_robin_prev_vcpu;
360 unsigned int tss_addr; 371 unsigned int tss_addr;
@@ -378,6 +389,7 @@ struct kvm_vm_stat {
378 u32 mmu_recycled; 389 u32 mmu_recycled;
379 u32 mmu_cache_miss; 390 u32 mmu_cache_miss;
380 u32 mmu_unsync; 391 u32 mmu_unsync;
392 u32 mmu_unsync_global;
381 u32 remote_tlb_flush; 393 u32 remote_tlb_flush;
382 u32 lpages; 394 u32 lpages;
383}; 395};
@@ -397,6 +409,7 @@ struct kvm_vcpu_stat {
397 u32 halt_exits; 409 u32 halt_exits;
398 u32 halt_wakeup; 410 u32 halt_wakeup;
399 u32 request_irq_exits; 411 u32 request_irq_exits;
412 u32 request_nmi_exits;
400 u32 irq_exits; 413 u32 irq_exits;
401 u32 host_state_reload; 414 u32 host_state_reload;
402 u32 efer_reload; 415 u32 efer_reload;
@@ -405,6 +418,7 @@ struct kvm_vcpu_stat {
405 u32 insn_emulation_fail; 418 u32 insn_emulation_fail;
406 u32 hypercalls; 419 u32 hypercalls;
407 u32 irq_injections; 420 u32 irq_injections;
421 u32 nmi_injections;
408}; 422};
409 423
410struct descriptor_table { 424struct descriptor_table {
@@ -477,6 +491,7 @@ struct kvm_x86_ops {
477 491
478 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 492 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
479 int (*get_tdp_level)(void); 493 int (*get_tdp_level)(void);
494 int (*get_mt_mask_shift)(void);
480}; 495};
481 496
482extern struct kvm_x86_ops *kvm_x86_ops; 497extern struct kvm_x86_ops *kvm_x86_ops;
@@ -490,7 +505,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
490void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); 505void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
491void kvm_mmu_set_base_ptes(u64 base_pte); 506void kvm_mmu_set_base_ptes(u64 base_pte);
492void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 507void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
493 u64 dirty_mask, u64 nx_mask, u64 x_mask); 508 u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
494 509
495int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 510int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
496void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 511void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -587,12 +602,14 @@ unsigned long segment_base(u16 selector);
587 602
588void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); 603void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
589void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 604void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
590 const u8 *new, int bytes); 605 const u8 *new, int bytes,
606 bool guest_initiated);
591int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); 607int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
592void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); 608void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
593int kvm_mmu_load(struct kvm_vcpu *vcpu); 609int kvm_mmu_load(struct kvm_vcpu *vcpu);
594void kvm_mmu_unload(struct kvm_vcpu *vcpu); 610void kvm_mmu_unload(struct kvm_vcpu *vcpu);
595void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 611void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
612void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
596 613
597int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 614int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
598 615
@@ -607,6 +624,8 @@ void kvm_disable_tdp(void);
607int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); 624int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
608int complete_pio(struct kvm_vcpu *vcpu); 625int complete_pio(struct kvm_vcpu *vcpu);
609 626
627struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
628
610static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) 629static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
611{ 630{
612 struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); 631 struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
@@ -702,18 +721,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
702 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 721 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
703} 722}
704 723
705#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
706#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
707#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
708#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
709#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
710#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
711#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
712#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
713#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
714#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
715#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
716
717#define MSR_IA32_TIME_STAMP_COUNTER 0x010 724#define MSR_IA32_TIME_STAMP_COUNTER 0x010
718 725
719#define TSS_IOPB_BASE_OFFSET 0x66 726#define TSS_IOPB_BASE_OFFSET 0x66
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 25179a29f208..6a159732881a 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -123,6 +123,7 @@ struct decode_cache {
123 u8 ad_bytes; 123 u8 ad_bytes;
124 u8 rex_prefix; 124 u8 rex_prefix;
125 struct operand src; 125 struct operand src;
126 struct operand src2;
126 struct operand dst; 127 struct operand dst;
127 bool has_seg_override; 128 bool has_seg_override;
128 u8 seg_override; 129 u8 seg_override;
@@ -146,22 +147,18 @@ struct x86_emulate_ctxt {
146 /* Register state before/after emulation. */ 147 /* Register state before/after emulation. */
147 struct kvm_vcpu *vcpu; 148 struct kvm_vcpu *vcpu;
148 149
149 /* Linear faulting address (if emulating a page-faulting instruction) */
150 unsigned long eflags; 150 unsigned long eflags;
151
152 /* Emulated execution mode, represented by an X86EMUL_MODE value. */ 151 /* Emulated execution mode, represented by an X86EMUL_MODE value. */
153 int mode; 152 int mode;
154
155 u32 cs_base; 153 u32 cs_base;
156 154
157 /* decode cache */ 155 /* decode cache */
158
159 struct decode_cache decode; 156 struct decode_cache decode;
160}; 157};
161 158
162/* Repeat String Operation Prefix */ 159/* Repeat String Operation Prefix */
163#define REPE_PREFIX 1 160#define REPE_PREFIX 1
164#define REPNE_PREFIX 2 161#define REPNE_PREFIX 2
165 162
166/* Execution mode, passed to the emulator. */ 163/* Execution mode, passed to the emulator. */
167#define X86EMUL_MODE_REAL 0 /* Real mode. */ 164#define X86EMUL_MODE_REAL 0 /* Real mode. */
@@ -170,7 +167,7 @@ struct x86_emulate_ctxt {
170#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ 167#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
171 168
172/* Host execution mode. */ 169/* Host execution mode. */
173#if defined(__i386__) 170#if defined(CONFIG_X86_32)
174#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 171#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
175#elif defined(CONFIG_X86_64) 172#elif defined(CONFIG_X86_64)
176#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 173#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index 6cb3a467e067..cc09cbbee27e 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
8 8
9#define APIC_DFR_VALUE (APIC_DFR_FLAT) 9#define APIC_DFR_VALUE (APIC_DFR_FLAT)
10 10
11static inline cpumask_t target_cpus(void) 11static inline const struct cpumask *target_cpus(void)
12{ 12{
13#ifdef CONFIG_SMP 13#ifdef CONFIG_SMP
14 return cpu_online_map; 14 return cpu_online_mask;
15#else 15#else
16 return cpumask_of_cpu(0); 16 return cpumask_of(0);
17#endif 17#endif
18} 18}
19 19
@@ -28,6 +28,7 @@ static inline cpumask_t target_cpus(void)
28#define apic_id_registered (genapic->apic_id_registered) 28#define apic_id_registered (genapic->apic_id_registered)
29#define init_apic_ldr (genapic->init_apic_ldr) 29#define init_apic_ldr (genapic->init_apic_ldr)
30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
31#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
31#define phys_pkg_id (genapic->phys_pkg_id) 32#define phys_pkg_id (genapic->phys_pkg_id)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 33#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) 34#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
@@ -61,9 +62,19 @@ static inline int apic_id_registered(void)
61 return physid_isset(read_apic_id(), phys_cpu_present_map); 62 return physid_isset(read_apic_id(), phys_cpu_present_map);
62} 63}
63 64
64static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 65static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
65{ 66{
66 return cpus_addr(cpumask)[0]; 67 return cpumask_bits(cpumask)[0];
68}
69
70static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
71 const struct cpumask *andmask)
72{
73 unsigned long mask1 = cpumask_bits(cpumask)[0];
74 unsigned long mask2 = cpumask_bits(andmask)[0];
75 unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
76
77 return (unsigned int)(mask1 & mask2 & mask3);
67} 78}
68 79
69static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) 80static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
@@ -88,7 +99,7 @@ static inline int apicid_to_node(int logical_apicid)
88#endif 99#endif
89} 100}
90 101
91static inline cpumask_t vector_allocation_domain(int cpu) 102static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
92{ 103{
93 /* Careful. Some cpus do not strictly honor the set of cpus 104 /* Careful. Some cpus do not strictly honor the set of cpus
94 * specified in the interrupt destination when using lowest 105 * specified in the interrupt destination when using lowest
@@ -98,8 +109,7 @@ static inline cpumask_t vector_allocation_domain(int cpu)
98 * deliver interrupts to the wrong hyperthread when only one 109 * deliver interrupts to the wrong hyperthread when only one
99 * hyperthread was specified in the interrupt desitination. 110 * hyperthread was specified in the interrupt desitination.
100 */ 111 */
101 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 112 *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
102 return domain;
103} 113}
104#endif 114#endif
105 115
@@ -131,7 +141,7 @@ static inline int cpu_to_logical_apicid(int cpu)
131 141
132static inline int cpu_present_to_apicid(int mps_cpu) 142static inline int cpu_present_to_apicid(int mps_cpu)
133{ 143{
134 if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) 144 if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
135 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); 145 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
136 else 146 else
137 return BAD_APICID; 147 return BAD_APICID;
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
index fabca01ebacf..191312d155da 100644
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,7 +4,8 @@
4/* Avoid include hell */ 4/* Avoid include hell */
5#define NMI_VECTOR 0x02 5#define NMI_VECTOR 0x02
6 6
7void send_IPI_mask_bitmask(cpumask_t mask, int vector); 7void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
8void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
8void __send_IPI_shortcut(unsigned int shortcut, int vector); 9void __send_IPI_shortcut(unsigned int shortcut, int vector);
9 10
10extern int no_broadcast; 11extern int no_broadcast;
@@ -12,28 +13,27 @@ extern int no_broadcast;
12#ifdef CONFIG_X86_64 13#ifdef CONFIG_X86_64
13#include <asm/genapic.h> 14#include <asm/genapic.h>
14#define send_IPI_mask (genapic->send_IPI_mask) 15#define send_IPI_mask (genapic->send_IPI_mask)
16#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
15#else 17#else
16static inline void send_IPI_mask(cpumask_t mask, int vector) 18static inline void send_IPI_mask(const struct cpumask *mask, int vector)
17{ 19{
18 send_IPI_mask_bitmask(mask, vector); 20 send_IPI_mask_bitmask(mask, vector);
19} 21}
22void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
20#endif 23#endif
21 24
22static inline void __local_send_IPI_allbutself(int vector) 25static inline void __local_send_IPI_allbutself(int vector)
23{ 26{
24 if (no_broadcast || vector == NMI_VECTOR) { 27 if (no_broadcast || vector == NMI_VECTOR)
25 cpumask_t mask = cpu_online_map; 28 send_IPI_mask_allbutself(cpu_online_mask, vector);
26 29 else
27 cpu_clear(smp_processor_id(), mask);
28 send_IPI_mask(mask, vector);
29 } else
30 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); 30 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
31} 31}
32 32
33static inline void __local_send_IPI_all(int vector) 33static inline void __local_send_IPI_all(int vector)
34{ 34{
35 if (no_broadcast || vector == NMI_VECTOR) 35 if (no_broadcast || vector == NMI_VECTOR)
36 send_IPI_mask(cpu_online_map, vector); 36 send_IPI_mask(cpu_online_mask, vector);
37 else 37 else
38 __send_IPI_shortcut(APIC_DEST_ALLINC, vector); 38 __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
39} 39}
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index e430f47df667..48553e958ad5 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -24,6 +24,7 @@
24#define check_phys_apicid_present (genapic->check_phys_apicid_present) 24#define check_phys_apicid_present (genapic->check_phys_apicid_present)
25#define check_apicid_used (genapic->check_apicid_used) 25#define check_apicid_used (genapic->check_apicid_used)
26#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 26#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
27#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
27#define vector_allocation_domain (genapic->vector_allocation_domain) 28#define vector_allocation_domain (genapic->vector_allocation_domain)
28#define enable_apic_mode (genapic->enable_apic_mode) 29#define enable_apic_mode (genapic->enable_apic_mode)
29#define phys_pkg_id (genapic->phys_pkg_id) 30#define phys_pkg_id (genapic->phys_pkg_id)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 91885c28f66b..62d14ce3cd00 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -6,13 +6,13 @@
6#include <asm/mpspec_def.h> 6#include <asm/mpspec_def.h>
7 7
8extern int apic_version[MAX_APICS]; 8extern int apic_version[MAX_APICS];
9extern int pic_mode;
9 10
10#ifdef CONFIG_X86_32 11#ifdef CONFIG_X86_32
11#include <mach_mpspec.h> 12#include <mach_mpspec.h>
12 13
13extern unsigned int def_to_bigsmp; 14extern unsigned int def_to_bigsmp;
14extern u8 apicid_2_node[]; 15extern u8 apicid_2_node[];
15extern int pic_mode;
16 16
17#ifdef CONFIG_X86_NUMAQ 17#ifdef CONFIG_X86_NUMAQ
18extern int mp_bus_id_to_node[MAX_MP_BUSSES]; 18extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 7c1e4258b31e..cb988aab716d 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -57,6 +57,31 @@ struct mtrr_gentry {
57}; 57};
58#endif /* !__i386__ */ 58#endif /* !__i386__ */
59 59
60struct mtrr_var_range {
61 u32 base_lo;
62 u32 base_hi;
63 u32 mask_lo;
64 u32 mask_hi;
65};
66
67/* In the Intel processor's MTRR interface, the MTRR type is always held in
68 an 8 bit field: */
69typedef u8 mtrr_type;
70
71#define MTRR_NUM_FIXED_RANGES 88
72#define MTRR_MAX_VAR_RANGES 256
73
74struct mtrr_state_type {
75 struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
76 mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
77 unsigned char enabled;
78 unsigned char have_fixed;
79 mtrr_type def_type;
80};
81
82#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
83#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
84
60/* These are the various ioctls */ 85/* These are the various ioctls */
61#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) 86#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry)
62#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) 87#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index 0bf2a06b7a4e..c80f00d29965 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -7,9 +7,9 @@
7 7
8#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 8#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
9 9
10static inline cpumask_t target_cpus(void) 10static inline const cpumask_t *target_cpus(void)
11{ 11{
12 return CPU_MASK_ALL; 12 return &CPU_MASK_ALL;
13} 13}
14 14
15#define NO_BALANCE_IRQ (1) 15#define NO_BALANCE_IRQ (1)
@@ -122,7 +122,13 @@ static inline void enable_apic_mode(void)
122 * We use physical apicids here, not logical, so just return the default 122 * We use physical apicids here, not logical, so just return the default
123 * physical broadcast to stop people from breaking us 123 * physical broadcast to stop people from breaking us
124 */ 124 */
125static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 125static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
126{
127 return (int) 0xF;
128}
129
130static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
131 const struct cpumask *andmask)
126{ 132{
127 return (int) 0xF; 133 return (int) 0xF;
128} 134}
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
index 935588d286cf..a8374c652778 100644
--- a/arch/x86/include/asm/numaq/ipi.h
+++ b/arch/x86/include/asm/numaq/ipi.h
@@ -1,25 +1,22 @@
1#ifndef __ASM_NUMAQ_IPI_H 1#ifndef __ASM_NUMAQ_IPI_H
2#define __ASM_NUMAQ_IPI_H 2#define __ASM_NUMAQ_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t, int vector); 4void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
5void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const struct cpumask *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
10 11
11static inline void send_IPI_allbutself(int vector) 12static inline void send_IPI_allbutself(int vector)
12{ 13{
13 cpumask_t mask = cpu_online_map; 14 send_IPI_mask_allbutself(cpu_online_mask, vector);
14 cpu_clear(smp_processor_id(), mask);
15
16 if (!cpus_empty(mask))
17 send_IPI_mask(mask, vector);
18} 15}
19 16
20static inline void send_IPI_all(int vector) 17static inline void send_IPI_all(int vector)
21{ 18{
22 send_IPI_mask(cpu_online_map, vector); 19 send_IPI_mask(cpu_online_mask, vector);
23} 20}
24 21
25#endif /* __ASM_NUMAQ_IPI_H */ 22#endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/pci/pci.h b/arch/x86/include/asm/pci_x86.h
index 1959018aac02..e60fd3e14bdf 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -57,7 +57,8 @@ extern struct pci_ops pci_root_ops;
57struct irq_info { 57struct irq_info {
58 u8 bus, devfn; /* Bus, device and function */ 58 u8 bus, devfn; /* Bus, device and function */
59 struct { 59 struct {
60 u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ 60 u8 link; /* IRQ line ID, chipset dependent,
61 0 = not routed */
61 u16 bitmap; /* Available IRQs */ 62 u16 bitmap; /* Available IRQs */
62 } __attribute__((packed)) irq[4]; 63 } __attribute__((packed)) irq[4];
63 u8 slot; /* Slot number, 0=onboard */ 64 u8 slot; /* Slot number, 0=onboard */
@@ -69,11 +70,13 @@ struct irq_routing_table {
69 u16 version; /* PIRQ_VERSION */ 70 u16 version; /* PIRQ_VERSION */
70 u16 size; /* Table size in bytes */ 71 u16 size; /* Table size in bytes */
71 u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ 72 u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */
72 u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ 73 u16 exclusive_irqs; /* IRQs devoted exclusively to
73 u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ 74 PCI usage */
75 u16 rtr_vendor, rtr_device; /* Vendor and device ID of
76 interrupt router */
74 u32 miniport_data; /* Crap */ 77 u32 miniport_data; /* Crap */
75 u8 rfu[11]; 78 u8 rfu[11];
76 u8 checksum; /* Modulo 256 checksum must give zero */ 79 u8 checksum; /* Modulo 256 checksum must give 0 */
77 struct irq_info slots[0]; 80 struct irq_info slots[0];
78} __attribute__((packed)); 81} __attribute__((packed));
79 82
@@ -148,15 +151,15 @@ static inline unsigned int mmio_config_readl(void __iomem *pos)
148 151
149static inline void mmio_config_writeb(void __iomem *pos, u8 val) 152static inline void mmio_config_writeb(void __iomem *pos, u8 val)
150{ 153{
151 asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); 154 asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
152} 155}
153 156
154static inline void mmio_config_writew(void __iomem *pos, u16 val) 157static inline void mmio_config_writew(void __iomem *pos, u16 val)
155{ 158{
156 asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); 159 asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
157} 160}
158 161
159static inline void mmio_config_writel(void __iomem *pos, u32 val) 162static inline void mmio_config_writel(void __iomem *pos, u32 val)
160{ 163{
161 asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); 164 asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
162} 165}
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index d12811ce51d9..830b9fcb6427 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@ struct smp_ops {
60 void (*cpu_die)(unsigned int cpu); 60 void (*cpu_die)(unsigned int cpu);
61 void (*play_dead)(void); 61 void (*play_dead)(void);
62 62
63 void (*send_call_func_ipi)(cpumask_t mask); 63 void (*send_call_func_ipi)(const struct cpumask *mask);
64 void (*send_call_func_single_ipi)(int cpu); 64 void (*send_call_func_single_ipi)(int cpu);
65}; 65};
66 66
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu)
125 125
126static inline void arch_send_call_function_ipi(cpumask_t mask) 126static inline void arch_send_call_function_ipi(cpumask_t mask)
127{ 127{
128 smp_ops.send_call_func_ipi(mask); 128 smp_ops.send_call_func_ipi(&mask);
129} 129}
130 130
131void cpu_disable_common(void); 131void cpu_disable_common(void);
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu);
138void native_play_dead(void); 138void native_play_dead(void);
139void play_dead_common(void); 139void play_dead_common(void);
140 140
141void native_send_call_func_ipi(cpumask_t mask); 141void native_send_call_func_ipi(const struct cpumask *mask);
142void native_send_call_func_single_ipi(int cpu); 142void native_send_call_func_single_ipi(int cpu);
143 143
144extern void prefill_possible_map(void); 144extern void prefill_possible_map(void);
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 9b3070f1c2ac..99327d1be49f 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -14,13 +14,13 @@
14 14
15#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 15#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
16 16
17static inline cpumask_t target_cpus(void) 17static inline const cpumask_t *target_cpus(void)
18{ 18{
19 /* CPU_MASK_ALL (0xff) has undefined behaviour with 19 /* CPU_MASK_ALL (0xff) has undefined behaviour with
20 * dest_LowestPrio mode logical clustered apic interrupt routing 20 * dest_LowestPrio mode logical clustered apic interrupt routing
21 * Just start on cpu 0. IRQ balancing will spread load 21 * Just start on cpu 0. IRQ balancing will spread load
22 */ 22 */
23 return cpumask_of_cpu(0); 23 return &cpumask_of_cpu(0);
24} 24}
25 25
26#define INT_DELIVERY_MODE (dest_LowestPrio) 26#define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -137,14 +137,14 @@ static inline void enable_apic_mode(void)
137{ 137{
138} 138}
139 139
140static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 140static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
141{ 141{
142 int num_bits_set; 142 int num_bits_set;
143 int cpus_found = 0; 143 int cpus_found = 0;
144 int cpu; 144 int cpu;
145 int apicid; 145 int apicid;
146 146
147 num_bits_set = cpus_weight(cpumask); 147 num_bits_set = cpus_weight(*cpumask);
148 /* Return id to all */ 148 /* Return id to all */
149 if (num_bits_set == NR_CPUS) 149 if (num_bits_set == NR_CPUS)
150 return (int) 0xFF; 150 return (int) 0xFF;
@@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
152 * The cpus in the mask must all be on the apic cluster. If are not 152 * The cpus in the mask must all be on the apic cluster. If are not
153 * on the same apicid cluster return default value of TARGET_CPUS. 153 * on the same apicid cluster return default value of TARGET_CPUS.
154 */ 154 */
155 cpu = first_cpu(cpumask); 155 cpu = first_cpu(*cpumask);
156 apicid = cpu_to_logical_apicid(cpu); 156 apicid = cpu_to_logical_apicid(cpu);
157 while (cpus_found < num_bits_set) { 157 while (cpus_found < num_bits_set) {
158 if (cpu_isset(cpu, cpumask)) { 158 if (cpu_isset(cpu, *cpumask)) {
159 int new_apicid = cpu_to_logical_apicid(cpu); 159 int new_apicid = cpu_to_logical_apicid(cpu);
160 if (apicid_cluster(apicid) != 160 if (apicid_cluster(apicid) !=
161 apicid_cluster(new_apicid)){ 161 apicid_cluster(new_apicid)){
@@ -170,6 +170,49 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
170 return apicid; 170 return apicid;
171} 171}
172 172
173static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
174 const struct cpumask *andmask)
175{
176 int num_bits_set;
177 int cpus_found = 0;
178 int cpu;
179 int apicid = 0xFF;
180 cpumask_var_t cpumask;
181
182 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
183 return (int) 0xFF;
184
185 cpumask_and(cpumask, inmask, andmask);
186 cpumask_and(cpumask, cpumask, cpu_online_mask);
187
188 num_bits_set = cpumask_weight(cpumask);
189 /* Return id to all */
190 if (num_bits_set == nr_cpu_ids)
191 goto exit;
192 /*
193 * The cpus in the mask must all be on the apic cluster. If are not
194 * on the same apicid cluster return default value of TARGET_CPUS.
195 */
196 cpu = cpumask_first(cpumask);
197 apicid = cpu_to_logical_apicid(cpu);
198 while (cpus_found < num_bits_set) {
199 if (cpumask_test_cpu(cpu, cpumask)) {
200 int new_apicid = cpu_to_logical_apicid(cpu);
201 if (apicid_cluster(apicid) !=
202 apicid_cluster(new_apicid)){
203 printk ("%s: Not a valid mask!\n", __func__);
204 return 0xFF;
205 }
206 apicid = apicid | new_apicid;
207 cpus_found++;
208 }
209 cpu++;
210 }
211exit:
212 free_cpumask_var(cpumask);
213 return apicid;
214}
215
173/* cpuid returns the value latched in the HW at reset, not the APIC ID 216/* cpuid returns the value latched in the HW at reset, not the APIC ID
174 * register's value. For any box whose BIOS changes APIC IDs, like 217 * register's value. For any box whose BIOS changes APIC IDs, like
175 * clustered APIC systems, we must use hard_smp_processor_id. 218 * clustered APIC systems, we must use hard_smp_processor_id.
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h
index 53bd1e7bd7b4..a8a2c24f50cc 100644
--- a/arch/x86/include/asm/summit/ipi.h
+++ b/arch/x86/include/asm/summit/ipi.h
@@ -1,9 +1,10 @@
1#ifndef __ASM_SUMMIT_IPI_H 1#ifndef __ASM_SUMMIT_IPI_H
2#define __ASM_SUMMIT_IPI_H 2#define __ASM_SUMMIT_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
5void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const cpumask_t *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
14 cpu_clear(smp_processor_id(), mask); 15 cpu_clear(smp_processor_id(), mask);
15 16
16 if (!cpus_empty(mask)) 17 if (!cpus_empty(mask))
17 send_IPI_mask(mask, vector); 18 send_IPI_mask(&mask, vector);
18} 19}
19 20
20static inline void send_IPI_all(int vector) 21static inline void send_IPI_all(int vector)
21{ 22{
22 send_IPI_mask(cpu_online_map, vector); 23 send_IPI_mask(&cpu_online_map, vector);
23} 24}
24 25
25#endif /* __ASM_SUMMIT_IPI_H */ 26#endif /* __ASM_SUMMIT_IPI_H */
diff --git a/arch/x86/kvm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e869..1b8afa78e869 100644
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/include/asm/svm.h
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
new file mode 100644
index 000000000000..ffb08be2a530
--- /dev/null
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -0,0 +1,101 @@
1/*
2 * sys_ia32.h - Linux ia32 syscall interfaces
3 *
4 * Copyright (c) 2008 Jaswinder Singh Rajput
5 *
6 * This file is released under the GPLv2.
7 * See the file COPYING for more details.
8 */
9
10#ifndef _ASM_X86_SYS_IA32_H
11#define _ASM_X86_SYS_IA32_H
12
13#include <linux/compiler.h>
14#include <linux/linkage.h>
15#include <linux/types.h>
16#include <linux/signal.h>
17#include <asm/compat.h>
18#include <asm/ia32.h>
19
20/* ia32/sys_ia32.c */
21asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long);
22asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
23
24asmlinkage long sys32_stat64(char __user *, struct stat64 __user *);
25asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
26asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
27asmlinkage long sys32_fstatat(unsigned int, char __user *,
28 struct stat64 __user *, int);
29struct mmap_arg_struct;
30asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
31asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
32
33asmlinkage long sys32_pipe(int __user *);
34struct sigaction32;
35struct old_sigaction32;
36asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
37 struct sigaction32 __user *, unsigned int);
38asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
39 struct old_sigaction32 __user *);
40asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
41 compat_sigset_t __user *, unsigned int);
42asmlinkage long sys32_alarm(unsigned int);
43
44struct sel_arg_struct;
45asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
46asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
47asmlinkage long sys32_sysfs(int, u32, u32);
48
49asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
50 struct compat_timespec __user *);
51asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
52asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
53
54#ifdef CONFIG_SYSCTL_SYSCALL
55struct sysctl_ia32;
56asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
57#endif
58
59asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
60asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
61
62asmlinkage long sys32_personality(unsigned long);
63asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
64
65asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
66 unsigned long, unsigned long, unsigned long);
67
68struct oldold_utsname;
69struct old_utsname;
70asmlinkage long sys32_olduname(struct oldold_utsname __user *);
71long sys32_uname(struct old_utsname __user *);
72
73long sys32_ustat(unsigned, struct ustat32 __user *);
74
75asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
76 compat_uptr_t __user *, struct pt_regs *);
77asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
78
79long sys32_lseek(unsigned int, int, unsigned int);
80long sys32_kill(int, int);
81long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
82long sys32_vm86_warning(void);
83long sys32_lookup_dcookie(u32, u32, char __user *, size_t);
84
85asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
86asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
87 unsigned, unsigned, int);
88asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
89asmlinkage long sys32_fallocate(int, int, unsigned,
90 unsigned, unsigned, unsigned);
91
92/* ia32/ia32_signal.c */
93asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
94asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
95 stack_ia32_t __user *, struct pt_regs *);
96asmlinkage long sys32_sigreturn(struct pt_regs *);
97asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
98
99/* ia32/ipc32.c */
100asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
101#endif /* _ASM_X86_SYS_IA32_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ff386ff50ed7..79e31e9dcdda 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -226,6 +226,8 @@ extern cpumask_t cpu_coregroup_map(int cpu);
226#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) 226#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
227#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) 227#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
228#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) 228#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
229#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
230#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
229 231
230/* indicates that pointers to the topology cpumask_t maps are valid */ 232/* indicates that pointers to the topology cpumask_t maps are valid */
231#define arch_provides_topology_pointers yes 233#define arch_provides_topology_pointers yes
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index e2363253bbbf..50423c7b56b2 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -133,61 +133,61 @@ struct bau_msg_payload {
133 * see table 4.2.3.0.1 in broacast_assist spec. 133 * see table 4.2.3.0.1 in broacast_assist spec.
134 */ 134 */
135struct bau_msg_header { 135struct bau_msg_header {
136 int dest_subnodeid:6; /* must be zero */ 136 unsigned int dest_subnodeid:6; /* must be zero */
137 /* bits 5:0 */ 137 /* bits 5:0 */
138 int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ 138 unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
139 /* bits 20:6 */ 139 /* bits 20:6 */ /* first bit in node_map */
140 int command:8; /* message type */ 140 unsigned int command:8; /* message type */
141 /* bits 28:21 */ 141 /* bits 28:21 */
142 /* 0x38: SN3net EndPoint Message */ 142 /* 0x38: SN3net EndPoint Message */
143 int rsvd_1:3; /* must be zero */ 143 unsigned int rsvd_1:3; /* must be zero */
144 /* bits 31:29 */ 144 /* bits 31:29 */
145 /* int will align on 32 bits */ 145 /* int will align on 32 bits */
146 int rsvd_2:9; /* must be zero */ 146 unsigned int rsvd_2:9; /* must be zero */
147 /* bits 40:32 */ 147 /* bits 40:32 */
148 /* Suppl_A is 56-41 */ 148 /* Suppl_A is 56-41 */
149 int payload_2a:8; /* becomes byte 16 of msg */ 149 unsigned int payload_2a:8;/* becomes byte 16 of msg */
150 /* bits 48:41 */ /* not currently using */ 150 /* bits 48:41 */ /* not currently using */
151 int payload_2b:8; /* becomes byte 17 of msg */ 151 unsigned int payload_2b:8;/* becomes byte 17 of msg */
152 /* bits 56:49 */ /* not currently using */ 152 /* bits 56:49 */ /* not currently using */
153 /* Address field (96:57) is never used as an 153 /* Address field (96:57) is never used as an
154 address (these are address bits 42:3) */ 154 address (these are address bits 42:3) */
155 int rsvd_3:1; /* must be zero */ 155 unsigned int rsvd_3:1; /* must be zero */
156 /* bit 57 */ 156 /* bit 57 */
157 /* address bits 27:4 are payload */ 157 /* address bits 27:4 are payload */
158 /* these 24 bits become bytes 12-14 of msg */ 158 /* these 24 bits become bytes 12-14 of msg */
159 int replied_to:1; /* sent as 0 by the source to byte 12 */ 159 unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
160 /* bit 58 */ 160 /* bit 58 */
161 161
162 int payload_1a:5; /* not currently used */ 162 unsigned int payload_1a:5;/* not currently used */
163 /* bits 63:59 */ 163 /* bits 63:59 */
164 int payload_1b:8; /* not currently used */ 164 unsigned int payload_1b:8;/* not currently used */
165 /* bits 71:64 */ 165 /* bits 71:64 */
166 int payload_1c:8; /* not currently used */ 166 unsigned int payload_1c:8;/* not currently used */
167 /* bits 79:72 */ 167 /* bits 79:72 */
168 int payload_1d:2; /* not currently used */ 168 unsigned int payload_1d:2;/* not currently used */
169 /* bits 81:80 */ 169 /* bits 81:80 */
170 170
171 int rsvd_4:7; /* must be zero */ 171 unsigned int rsvd_4:7; /* must be zero */
172 /* bits 88:82 */ 172 /* bits 88:82 */
173 int sw_ack_flag:1; /* software acknowledge flag */ 173 unsigned int sw_ack_flag:1;/* software acknowledge flag */
174 /* bit 89 */ 174 /* bit 89 */
175 /* INTD trasactions at destination are to 175 /* INTD trasactions at destination are to
176 wait for software acknowledge */ 176 wait for software acknowledge */
177 int rsvd_5:6; /* must be zero */ 177 unsigned int rsvd_5:6; /* must be zero */
178 /* bits 95:90 */ 178 /* bits 95:90 */
179 int rsvd_6:5; /* must be zero */ 179 unsigned int rsvd_6:5; /* must be zero */
180 /* bits 100:96 */ 180 /* bits 100:96 */
181 int int_both:1; /* if 1, interrupt both sockets on the blade */ 181 unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */
182 /* bit 101*/ 182 /* bit 101*/
183 int fairness:3; /* usually zero */ 183 unsigned int fairness:3;/* usually zero */
184 /* bits 104:102 */ 184 /* bits 104:102 */
185 int multilevel:1; /* multi-level multicast format */ 185 unsigned int multilevel:1; /* multi-level multicast format */
186 /* bit 105 */ 186 /* bit 105 */
187 /* 0 for TLB: endpoint multi-unicast messages */ 187 /* 0 for TLB: endpoint multi-unicast messages */
188 int chaining:1; /* next descriptor is part of this activation*/ 188 unsigned int chaining:1;/* next descriptor is part of this activation*/
189 /* bit 106 */ 189 /* bit 106 */
190 int rsvd_7:21; /* must be zero */ 190 unsigned int rsvd_7:21; /* must be zero */
191 /* bits 127:107 */ 191 /* bits 127:107 */
192}; 192};
193 193
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
new file mode 100644
index 000000000000..593636275238
--- /dev/null
+++ b/arch/x86/include/asm/virtext.h
@@ -0,0 +1,132 @@
1/* CPU virtualization extensions handling
2 *
3 * This should carry the code for handling CPU virtualization extensions
4 * that needs to live in the kernel core.
5 *
6 * Author: Eduardo Habkost <ehabkost@redhat.com>
7 *
8 * Copyright (C) 2008, Red Hat Inc.
9 *
10 * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
14 */
15#ifndef _ASM_X86_VIRTEX_H
16#define _ASM_X86_VIRTEX_H
17
18#include <asm/processor.h>
19#include <asm/system.h>
20
21#include <asm/vmx.h>
22#include <asm/svm.h>
23
24/*
25 * VMX functions:
26 */
27
28static inline int cpu_has_vmx(void)
29{
30 unsigned long ecx = cpuid_ecx(1);
31 return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
32}
33
34
35/** Disable VMX on the current CPU
36 *
37 * vmxoff causes a undefined-opcode exception if vmxon was not run
38 * on the CPU previously. Only call this function if you know VMX
39 * is enabled.
40 */
41static inline void cpu_vmxoff(void)
42{
43 asm volatile (ASM_VMX_VMXOFF : : : "cc");
44 write_cr4(read_cr4() & ~X86_CR4_VMXE);
45}
46
47static inline int cpu_vmx_enabled(void)
48{
49 return read_cr4() & X86_CR4_VMXE;
50}
51
52/** Disable VMX if it is enabled on the current CPU
53 *
54 * You shouldn't call this if cpu_has_vmx() returns 0.
55 */
56static inline void __cpu_emergency_vmxoff(void)
57{
58 if (cpu_vmx_enabled())
59 cpu_vmxoff();
60}
61
62/** Disable VMX if it is supported and enabled on the current CPU
63 */
64static inline void cpu_emergency_vmxoff(void)
65{
66 if (cpu_has_vmx())
67 __cpu_emergency_vmxoff();
68}
69
70
71
72
73/*
74 * SVM functions:
75 */
76
77/** Check if the CPU has SVM support
78 *
79 * You can use the 'msg' arg to get a message describing the problem,
80 * if the function returns zero. Simply pass NULL if you are not interested
81 * on the messages; gcc should take care of not generating code for
82 * the messages on this case.
83 */
84static inline int cpu_has_svm(const char **msg)
85{
86 uint32_t eax, ebx, ecx, edx;
87
88 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
89 if (msg)
90 *msg = "not amd";
91 return 0;
92 }
93
94 cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
95 if (eax < SVM_CPUID_FUNC) {
96 if (msg)
97 *msg = "can't execute cpuid_8000000a";
98 return 0;
99 }
100
101 cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
102 if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
103 if (msg)
104 *msg = "svm not available";
105 return 0;
106 }
107 return 1;
108}
109
110
111/** Disable SVM on the current CPU
112 *
113 * You should call this only if cpu_has_svm() returned true.
114 */
115static inline void cpu_svm_disable(void)
116{
117 uint64_t efer;
118
119 wrmsrl(MSR_VM_HSAVE_PA, 0);
120 rdmsrl(MSR_EFER, efer);
121 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
122}
123
124/** Makes sure SVM is disabled, if it is supported on the CPU
125 */
126static inline void cpu_emergency_svm_disable(void)
127{
128 if (cpu_has_svm(NULL))
129 cpu_svm_disable();
130}
131
132#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/include/asm/vmx.h
index ec5edc339da6..d0238e6151d8 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -63,10 +63,13 @@
63 63
64#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 64#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
65#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 65#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
66#define VM_EXIT_SAVE_IA32_PAT 0x00040000
67#define VM_EXIT_LOAD_IA32_PAT 0x00080000
66 68
67#define VM_ENTRY_IA32E_MODE 0x00000200 69#define VM_ENTRY_IA32E_MODE 0x00000200
68#define VM_ENTRY_SMM 0x00000400 70#define VM_ENTRY_SMM 0x00000400
69#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 71#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
72#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
70 73
71/* VMCS Encodings */ 74/* VMCS Encodings */
72enum vmcs_field { 75enum vmcs_field {
@@ -112,6 +115,8 @@ enum vmcs_field {
112 VMCS_LINK_POINTER_HIGH = 0x00002801, 115 VMCS_LINK_POINTER_HIGH = 0x00002801,
113 GUEST_IA32_DEBUGCTL = 0x00002802, 116 GUEST_IA32_DEBUGCTL = 0x00002802,
114 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, 117 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
118 GUEST_IA32_PAT = 0x00002804,
119 GUEST_IA32_PAT_HIGH = 0x00002805,
115 GUEST_PDPTR0 = 0x0000280a, 120 GUEST_PDPTR0 = 0x0000280a,
116 GUEST_PDPTR0_HIGH = 0x0000280b, 121 GUEST_PDPTR0_HIGH = 0x0000280b,
117 GUEST_PDPTR1 = 0x0000280c, 122 GUEST_PDPTR1 = 0x0000280c,
@@ -120,6 +125,8 @@ enum vmcs_field {
120 GUEST_PDPTR2_HIGH = 0x0000280f, 125 GUEST_PDPTR2_HIGH = 0x0000280f,
121 GUEST_PDPTR3 = 0x00002810, 126 GUEST_PDPTR3 = 0x00002810,
122 GUEST_PDPTR3_HIGH = 0x00002811, 127 GUEST_PDPTR3_HIGH = 0x00002811,
128 HOST_IA32_PAT = 0x00002c00,
129 HOST_IA32_PAT_HIGH = 0x00002c01,
123 PIN_BASED_VM_EXEC_CONTROL = 0x00004000, 130 PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
124 CPU_BASED_VM_EXEC_CONTROL = 0x00004002, 131 CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
125 EXCEPTION_BITMAP = 0x00004004, 132 EXCEPTION_BITMAP = 0x00004004,
@@ -331,8 +338,9 @@ enum vmcs_field {
331 338
332#define AR_RESERVD_MASK 0xfffe0f00 339#define AR_RESERVD_MASK 0xfffe0f00
333 340
334#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 341#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0)
335#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 342#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1)
343#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2)
336 344
337#define VMX_NR_VPIDS (1 << 16) 345#define VMX_NR_VPIDS (1 << 16)
338#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 346#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
@@ -356,4 +364,19 @@ enum vmcs_field {
356 364
357#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 365#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
358 366
367
368#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
369#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
370#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
371#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
372#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
373#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
374#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
375#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
376#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
377#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
378#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
379
380
381
359#endif 382#endif
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da717b350..658e29e0f49b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1296,7 +1296,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
1296 * we don't need to preallocate the protection domains anymore. 1296 * we don't need to preallocate the protection domains anymore.
1297 * For now we have to. 1297 * For now we have to.
1298 */ 1298 */
1299void prealloc_protection_domains(void) 1299static void prealloc_protection_domains(void)
1300{ 1300{
1301 struct pci_dev *dev = NULL; 1301 struct pci_dev *dev = NULL;
1302 struct dma_ops_domain *dma_dom; 1302 struct dma_ops_domain *dma_dom;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800c55ca..fb85e8d466cc 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -243,7 +243,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
243} 243}
244 244
245/* Function to enable the hardware */ 245/* Function to enable the hardware */
246void __init iommu_enable(struct amd_iommu *iommu) 246static void __init iommu_enable(struct amd_iommu *iommu)
247{ 247{
248 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " 248 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
249 "at %02x:%02x.%x cap 0x%hx\n", 249 "at %02x:%02x.%x cap 0x%hx\n",
@@ -256,7 +256,7 @@ void __init iommu_enable(struct amd_iommu *iommu)
256} 256}
257 257
258/* Function to enable IOMMU event logging and event interrupts */ 258/* Function to enable IOMMU event logging and event interrupts */
259void __init iommu_enable_event_logging(struct amd_iommu *iommu) 259static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
260{ 260{
261 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 261 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
262 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 262 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index b5229affb953..d652515e2855 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -98,8 +98,8 @@ __setup("apicpmtimer", setup_apicpmtimer);
98#ifdef HAVE_X2APIC 98#ifdef HAVE_X2APIC
99int x2apic; 99int x2apic;
100/* x2apic enabled before OS handover */ 100/* x2apic enabled before OS handover */
101int x2apic_preenabled; 101static int x2apic_preenabled;
102int disable_x2apic; 102static int disable_x2apic;
103static __init int setup_nox2apic(char *str) 103static __init int setup_nox2apic(char *str)
104{ 104{
105 disable_x2apic = 1; 105 disable_x2apic = 1;
@@ -119,8 +119,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
119 119
120int first_system_vector = 0xfe; 120int first_system_vector = 0xfe;
121 121
122char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
123
124/* 122/*
125 * Debug level, exported for io_apic.c 123 * Debug level, exported for io_apic.c
126 */ 124 */
@@ -142,7 +140,7 @@ static int lapic_next_event(unsigned long delta,
142 struct clock_event_device *evt); 140 struct clock_event_device *evt);
143static void lapic_timer_setup(enum clock_event_mode mode, 141static void lapic_timer_setup(enum clock_event_mode mode,
144 struct clock_event_device *evt); 142 struct clock_event_device *evt);
145static void lapic_timer_broadcast(cpumask_t mask); 143static void lapic_timer_broadcast(const cpumask_t *mask);
146static void apic_pm_activate(void); 144static void apic_pm_activate(void);
147 145
148/* 146/*
@@ -228,7 +226,7 @@ void xapic_icr_write(u32 low, u32 id)
228 apic_write(APIC_ICR, low); 226 apic_write(APIC_ICR, low);
229} 227}
230 228
231u64 xapic_icr_read(void) 229static u64 xapic_icr_read(void)
232{ 230{
233 u32 icr1, icr2; 231 u32 icr1, icr2;
234 232
@@ -268,7 +266,7 @@ void x2apic_icr_write(u32 low, u32 id)
268 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); 266 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
269} 267}
270 268
271u64 x2apic_icr_read(void) 269static u64 x2apic_icr_read(void)
272{ 270{
273 unsigned long val; 271 unsigned long val;
274 272
@@ -455,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
455/* 453/*
456 * Local APIC timer broadcast function 454 * Local APIC timer broadcast function
457 */ 455 */
458static void lapic_timer_broadcast(cpumask_t mask) 456static void lapic_timer_broadcast(const cpumask_t *mask)
459{ 457{
460#ifdef CONFIG_SMP 458#ifdef CONFIG_SMP
461 send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 459 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@ -471,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void)
471 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 469 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
472 470
473 memcpy(levt, &lapic_clockevent, sizeof(*levt)); 471 memcpy(levt, &lapic_clockevent, sizeof(*levt));
474 levt->cpumask = cpumask_of_cpu(smp_processor_id()); 472 levt->cpumask = cpumask_of(smp_processor_id());
475 473
476 clockevents_register_device(levt); 474 clockevents_register_device(levt);
477} 475}
@@ -1807,28 +1805,32 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1807void __cpuinit generic_processor_info(int apicid, int version) 1805void __cpuinit generic_processor_info(int apicid, int version)
1808{ 1806{
1809 int cpu; 1807 int cpu;
1810 cpumask_t tmp_map;
1811 1808
1812 /* 1809 /*
1813 * Validate version 1810 * Validate version
1814 */ 1811 */
1815 if (version == 0x0) { 1812 if (version == 0x0) {
1816 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " 1813 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
1817 "fixing up to 0x10. (tell your hw vendor)\n", 1814 "fixing up to 0x10. (tell your hw vendor)\n",
1818 version); 1815 version);
1819 version = 0x10; 1816 version = 0x10;
1820 } 1817 }
1821 apic_version[apicid] = version; 1818 apic_version[apicid] = version;
1822 1819
1823 if (num_processors >= NR_CPUS) { 1820 if (num_processors >= nr_cpu_ids) {
1824 pr_warning("WARNING: NR_CPUS limit of %i reached." 1821 int max = nr_cpu_ids;
1825 " Processor ignored.\n", NR_CPUS); 1822 int thiscpu = max + disabled_cpus;
1823
1824 pr_warning(
1825 "ACPI: NR_CPUS/possible_cpus limit of %i reached."
1826 " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
1827
1828 disabled_cpus++;
1826 return; 1829 return;
1827 } 1830 }
1828 1831
1829 num_processors++; 1832 num_processors++;
1830 cpus_complement(tmp_map, cpu_present_map); 1833 cpu = cpumask_next_zero(-1, cpu_present_mask);
1831 cpu = first_cpu(tmp_map);
1832 1834
1833 physid_set(apicid, phys_cpu_present_map); 1835 physid_set(apicid, phys_cpu_present_map);
1834 if (apicid == boot_cpu_physical_apicid) { 1836 if (apicid == boot_cpu_physical_apicid) {
@@ -1878,8 +1880,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1878 } 1880 }
1879#endif 1881#endif
1880 1882
1881 cpu_set(cpu, cpu_possible_map); 1883 set_cpu_possible(cpu, true);
1882 cpu_set(cpu, cpu_present_map); 1884 set_cpu_present(cpu, true);
1883} 1885}
1884 1886
1885#ifdef CONFIG_X86_64 1887#ifdef CONFIG_X86_64
@@ -2081,7 +2083,7 @@ __cpuinit int apic_is_clustered_box(void)
2081 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); 2083 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2082 bitmap_zero(clustermap, NUM_APIC_CLUSTERS); 2084 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2083 2085
2084 for (i = 0; i < NR_CPUS; i++) { 2086 for (i = 0; i < nr_cpu_ids; i++) {
2085 /* are we being called early in kernel startup? */ 2087 /* are we being called early in kernel startup? */
2086 if (bios_cpu_apicid) { 2088 if (bios_cpu_apicid) {
2087 id = bios_cpu_apicid[i]; 2089 id = bios_cpu_apicid[i];
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 2a0a2a3cac26..f63882728d91 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,7 +25,7 @@
25#include <asm/uv/bios.h> 25#include <asm/uv/bios.h>
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27 27
28struct uv_systab uv_systab; 28static struct uv_systab uv_systab;
29 29
30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
31{ 31{
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 68b5d8681cbb..c6ecda64f5f1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
534 per_cpu(cpuid4_info, cpu) = NULL; 534 per_cpu(cpuid4_info, cpu) = NULL;
535} 535}
536 536
537static int __cpuinit detect_cache_attributes(unsigned int cpu) 537static void get_cpu_leaves(void *_retval)
538{ 538{
539 struct _cpuid4_info *this_leaf; 539 int j, *retval = _retval, cpu = smp_processor_id();
540 unsigned long j;
541 int retval;
542 cpumask_t oldmask;
543
544 if (num_cache_leaves == 0)
545 return -ENOENT;
546
547 per_cpu(cpuid4_info, cpu) = kzalloc(
548 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
549 if (per_cpu(cpuid4_info, cpu) == NULL)
550 return -ENOMEM;
551
552 oldmask = current->cpus_allowed;
553 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
554 if (retval)
555 goto out;
556 540
557 /* Do cpuid and store the results */ 541 /* Do cpuid and store the results */
558 for (j = 0; j < num_cache_leaves; j++) { 542 for (j = 0; j < num_cache_leaves; j++) {
543 struct _cpuid4_info *this_leaf;
559 this_leaf = CPUID4_INFO_IDX(cpu, j); 544 this_leaf = CPUID4_INFO_IDX(cpu, j);
560 retval = cpuid4_cache_lookup(j, this_leaf); 545 *retval = cpuid4_cache_lookup(j, this_leaf);
561 if (unlikely(retval < 0)) { 546 if (unlikely(*retval < 0)) {
562 int i; 547 int i;
563 548
564 for (i = 0; i < j; i++) 549 for (i = 0; i < j; i++)
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
567 } 552 }
568 cache_shared_cpu_map_setup(cpu, j); 553 cache_shared_cpu_map_setup(cpu, j);
569 } 554 }
570 set_cpus_allowed_ptr(current, &oldmask); 555}
556
557static int __cpuinit detect_cache_attributes(unsigned int cpu)
558{
559 int retval;
560
561 if (num_cache_leaves == 0)
562 return -ENOENT;
563
564 per_cpu(cpuid4_info, cpu) = kzalloc(
565 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
566 if (per_cpu(cpuid4_info, cpu) == NULL)
567 return -ENOMEM;
571 568
572out: 569 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
573 if (retval) { 570 if (retval) {
574 kfree(per_cpu(cpuid4_info, cpu)); 571 kfree(per_cpu(cpuid4_info, cpu));
575 per_cpu(cpuid4_info, cpu) = NULL; 572 per_cpu(cpuid4_info, cpu) = NULL;
@@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
626 cpumask_t *mask = &this_leaf->shared_cpu_map; 623 cpumask_t *mask = &this_leaf->shared_cpu_map;
627 624
628 n = type? 625 n = type?
629 cpulist_scnprintf(buf, len-2, *mask): 626 cpulist_scnprintf(buf, len-2, mask) :
630 cpumask_scnprintf(buf, len-2, *mask); 627 cpumask_scnprintf(buf, len-2, mask);
631 buf[n++] = '\n'; 628 buf[n++] = '\n';
632 buf[n] = '\0'; 629 buf[n] = '\0';
633 } 630 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 748c8f9e7a05..a5a5e0530370 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
83 * CPU Initialization 83 * CPU Initialization
84 */ 84 */
85 85
86struct thresh_restart {
87 struct threshold_block *b;
88 int reset;
89 u16 old_limit;
90};
91
86/* must be called with correct cpu affinity */ 92/* must be called with correct cpu affinity */
87static void threshold_restart_bank(struct threshold_block *b, 93static long threshold_restart_bank(void *_tr)
88 int reset, u16 old_limit)
89{ 94{
95 struct thresh_restart *tr = _tr;
90 u32 mci_misc_hi, mci_misc_lo; 96 u32 mci_misc_hi, mci_misc_lo;
91 97
92 rdmsr(b->address, mci_misc_lo, mci_misc_hi); 98 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
93 99
94 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 100 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
95 reset = 1; /* limit cannot be lower than err count */ 101 tr->reset = 1; /* limit cannot be lower than err count */
96 102
97 if (reset) { /* reset err count and overflow bit */ 103 if (tr->reset) { /* reset err count and overflow bit */
98 mci_misc_hi = 104 mci_misc_hi =
99 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 105 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
100 (THRESHOLD_MAX - b->threshold_limit); 106 (THRESHOLD_MAX - tr->b->threshold_limit);
101 } else if (old_limit) { /* change limit w/o reset */ 107 } else if (tr->old_limit) { /* change limit w/o reset */
102 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 108 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
103 (old_limit - b->threshold_limit); 109 (tr->old_limit - tr->b->threshold_limit);
104 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 110 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
105 (new_count & THRESHOLD_MAX); 111 (new_count & THRESHOLD_MAX);
106 } 112 }
107 113
108 b->interrupt_enable ? 114 tr->b->interrupt_enable ?
109 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 115 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
110 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 116 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
111 117
112 mci_misc_hi |= MASK_COUNT_EN_HI; 118 mci_misc_hi |= MASK_COUNT_EN_HI;
113 wrmsr(b->address, mci_misc_lo, mci_misc_hi); 119 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
120 return 0;
114} 121}
115 122
116/* cpu init entry point, called from mce.c with preempt off */ 123/* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
120 unsigned int cpu = smp_processor_id(); 127 unsigned int cpu = smp_processor_id();
121 u8 lvt_off; 128 u8 lvt_off;
122 u32 low = 0, high = 0, address = 0; 129 u32 low = 0, high = 0, address = 0;
130 struct thresh_restart tr;
123 131
124 for (bank = 0; bank < NR_BANKS; ++bank) { 132 for (bank = 0; bank < NR_BANKS; ++bank) {
125 for (block = 0; block < NR_BLOCKS; ++block) { 133 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
162 wrmsr(address, low, high); 170 wrmsr(address, low, high);
163 171
164 threshold_defaults.address = address; 172 threshold_defaults.address = address;
165 threshold_restart_bank(&threshold_defaults, 0, 0); 173 tr.b = &threshold_defaults;
174 tr.reset = 0;
175 tr.old_limit = 0;
176 threshold_restart_bank(&tr);
166 } 177 }
167 } 178 }
168} 179}
@@ -251,20 +262,6 @@ struct threshold_attr {
251 ssize_t(*store) (struct threshold_block *, const char *, size_t count); 262 ssize_t(*store) (struct threshold_block *, const char *, size_t count);
252}; 263};
253 264
254static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
255 cpumask_t *newmask)
256{
257 *oldmask = current->cpus_allowed;
258 cpus_clear(*newmask);
259 cpu_set(cpu, *newmask);
260 set_cpus_allowed_ptr(current, newmask);
261}
262
263static void affinity_restore(const cpumask_t *oldmask)
264{
265 set_cpus_allowed_ptr(current, oldmask);
266}
267
268#define SHOW_FIELDS(name) \ 265#define SHOW_FIELDS(name) \
269static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ 266static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
270{ \ 267{ \
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
277 const char *buf, size_t count) 274 const char *buf, size_t count)
278{ 275{
279 char *end; 276 char *end;
280 cpumask_t oldmask, newmask; 277 struct thresh_restart tr;
281 unsigned long new = simple_strtoul(buf, &end, 0); 278 unsigned long new = simple_strtoul(buf, &end, 0);
282 if (end == buf) 279 if (end == buf)
283 return -EINVAL; 280 return -EINVAL;
284 b->interrupt_enable = !!new; 281 b->interrupt_enable = !!new;
285 282
286 affinity_set(b->cpu, &oldmask, &newmask); 283 tr.b = b;
287 threshold_restart_bank(b, 0, 0); 284 tr.reset = 0;
288 affinity_restore(&oldmask); 285 tr.old_limit = 0;
286 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
289 287
290 return end - buf; 288 return end - buf;
291} 289}
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
294 const char *buf, size_t count) 292 const char *buf, size_t count)
295{ 293{
296 char *end; 294 char *end;
297 cpumask_t oldmask, newmask; 295 struct thresh_restart tr;
298 u16 old;
299 unsigned long new = simple_strtoul(buf, &end, 0); 296 unsigned long new = simple_strtoul(buf, &end, 0);
300 if (end == buf) 297 if (end == buf)
301 return -EINVAL; 298 return -EINVAL;
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
303 new = THRESHOLD_MAX; 300 new = THRESHOLD_MAX;
304 if (new < 1) 301 if (new < 1)
305 new = 1; 302 new = 1;
306 old = b->threshold_limit; 303 tr.old_limit = b->threshold_limit;
307 b->threshold_limit = new; 304 b->threshold_limit = new;
305 tr.b = b;
306 tr.reset = 0;
308 307
309 affinity_set(b->cpu, &oldmask, &newmask); 308 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
310 threshold_restart_bank(b, 0, old);
311 affinity_restore(&oldmask);
312 309
313 return end - buf; 310 return end - buf;
314} 311}
315 312
316static ssize_t show_error_count(struct threshold_block *b, char *buf) 313static long local_error_count(void *_b)
317{ 314{
318 u32 high, low; 315 struct threshold_block *b = _b;
319 cpumask_t oldmask, newmask; 316 u32 low, high;
320 affinity_set(b->cpu, &oldmask, &newmask); 317
321 rdmsr(b->address, low, high); 318 rdmsr(b->address, low, high);
322 affinity_restore(&oldmask); 319 return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
323 return sprintf(buf, "%x\n", 320}
324 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 321
322static ssize_t show_error_count(struct threshold_block *b, char *buf)
323{
324 return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
325} 325}
326 326
327static ssize_t store_error_count(struct threshold_block *b, 327static ssize_t store_error_count(struct threshold_block *b,
328 const char *buf, size_t count) 328 const char *buf, size_t count)
329{ 329{
330 cpumask_t oldmask, newmask; 330 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
331 affinity_set(b->cpu, &oldmask, &newmask); 331
332 threshold_restart_bank(b, 1, 0); 332 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
333 affinity_restore(&oldmask);
334 return 1; 333 return 1;
335} 334}
336 335
@@ -463,12 +462,19 @@ out_free:
463 return err; 462 return err;
464} 463}
465 464
465static long local_allocate_threshold_blocks(void *_bank)
466{
467 unsigned int *bank = _bank;
468
469 return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
470 MSR_IA32_MC0_MISC + *bank * 4);
471}
472
466/* symlinks sibling shared banks to first core. first core owns dir/files. */ 473/* symlinks sibling shared banks to first core. first core owns dir/files. */
467static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) 474static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
468{ 475{
469 int i, err = 0; 476 int i, err = 0;
470 struct threshold_bank *b = NULL; 477 struct threshold_bank *b = NULL;
471 cpumask_t oldmask, newmask;
472 char name[32]; 478 char name[32];
473 479
474 sprintf(name, "threshold_bank%i", bank); 480 sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
519 525
520 per_cpu(threshold_banks, cpu)[bank] = b; 526 per_cpu(threshold_banks, cpu)[bank] = b;
521 527
522 affinity_set(cpu, &oldmask, &newmask); 528 err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
523 err = allocate_threshold_blocks(cpu, bank, 0,
524 MSR_IA32_MC0_MISC + bank * 4);
525 affinity_restore(&oldmask);
526
527 if (err) 529 if (err)
528 goto out_free; 530 goto out_free;
529 531
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f01eeb..b59ddcc88cd8 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
14#include <asm/pat.h> 14#include <asm/pat.h>
15#include "mtrr.h" 15#include "mtrr.h"
16 16
17struct mtrr_state {
18 struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
19 mtrr_type fixed_ranges[NUM_FIXED_RANGES];
20 unsigned char enabled;
21 unsigned char have_fixed;
22 mtrr_type def_type;
23};
24
25struct fixed_range_block { 17struct fixed_range_block {
26 int base_msr; /* start address of an MTRR block */ 18 int base_msr; /* start address of an MTRR block */
27 int ranges; /* number of MTRRs in this block */ 19 int ranges; /* number of MTRRs in this block */
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
35}; 27};
36 28
37static unsigned long smp_changes_mask; 29static unsigned long smp_changes_mask;
38static struct mtrr_state mtrr_state = {};
39static int mtrr_state_set; 30static int mtrr_state_set;
40u64 mtrr_tom2; 31u64 mtrr_tom2;
41 32
33struct mtrr_state_type mtrr_state = {};
34EXPORT_SYMBOL_GPL(mtrr_state);
35
42#undef MODULE_PARAM_PREFIX 36#undef MODULE_PARAM_PREFIX
43#define MODULE_PARAM_PREFIX "mtrr." 37#define MODULE_PARAM_PREFIX "mtrr."
44 38
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1159e269e596..d259e5d2e054 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
49 49
50u32 num_var_ranges = 0; 50u32 num_var_ranges = 0;
51 51
52unsigned int mtrr_usage_table[MAX_VAR_RANGES]; 52unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
53static DEFINE_MUTEX(mtrr_mutex); 53static DEFINE_MUTEX(mtrr_mutex);
54 54
55u64 size_or_mask, size_and_mask; 55u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
574 unsigned long lsize; 574 unsigned long lsize;
575}; 575};
576 576
577static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; 577static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
578 578
579static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 579static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
580{ 580{
@@ -824,16 +824,14 @@ static int enable_mtrr_cleanup __initdata =
824 824
825static int __init disable_mtrr_cleanup_setup(char *str) 825static int __init disable_mtrr_cleanup_setup(char *str)
826{ 826{
827 if (enable_mtrr_cleanup != -1) 827 enable_mtrr_cleanup = 0;
828 enable_mtrr_cleanup = 0;
829 return 0; 828 return 0;
830} 829}
831early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); 830early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
832 831
833static int __init enable_mtrr_cleanup_setup(char *str) 832static int __init enable_mtrr_cleanup_setup(char *str)
834{ 833{
835 if (enable_mtrr_cleanup != -1) 834 enable_mtrr_cleanup = 1;
836 enable_mtrr_cleanup = 1;
837 return 0; 835 return 0;
838} 836}
839early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); 837early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec656b23..ffd60409cc6d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,11 +8,6 @@
8#define MTRRcap_MSR 0x0fe 8#define MTRRcap_MSR 0x0fe
9#define MTRRdefType_MSR 0x2ff 9#define MTRRdefType_MSR 0x2ff
10 10
11#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
12#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
13
14#define NUM_FIXED_RANGES 88
15#define MAX_VAR_RANGES 256
16#define MTRRfix64K_00000_MSR 0x250 11#define MTRRfix64K_00000_MSR 0x250
17#define MTRRfix16K_80000_MSR 0x258 12#define MTRRfix16K_80000_MSR 0x258
18#define MTRRfix16K_A0000_MSR 0x259 13#define MTRRfix16K_A0000_MSR 0x259
@@ -29,11 +24,7 @@
29#define MTRR_CHANGE_MASK_VARIABLE 0x02 24#define MTRR_CHANGE_MASK_VARIABLE 0x02
30#define MTRR_CHANGE_MASK_DEFTYPE 0x04 25#define MTRR_CHANGE_MASK_DEFTYPE 0x04
31 26
32/* In the Intel processor's MTRR interface, the MTRR type is always held in 27extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
33 an 8 bit field: */
34typedef u8 mtrr_type;
35
36extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
37 28
38struct mtrr_ops { 29struct mtrr_ops {
39 u32 vendor; 30 u32 vendor;
@@ -70,13 +61,6 @@ struct set_mtrr_context {
70 u32 ccr3; 61 u32 ccr3;
71}; 62};
72 63
73struct mtrr_var_range {
74 u32 base_lo;
75 u32 base_hi;
76 u32 mask_lo;
77 u32 mask_hi;
78};
79
80void set_mtrr_done(struct set_mtrr_context *ctxt); 64void set_mtrr_done(struct set_mtrr_context *ctxt);
81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); 65void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); 66void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 72cefd1e649b..85d28d53f5d3 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -39,10 +39,10 @@
39#include <linux/device.h> 39#include <linux/device.h>
40#include <linux/cpu.h> 40#include <linux/cpu.h>
41#include <linux/notifier.h> 41#include <linux/notifier.h>
42#include <linux/uaccess.h>
42 43
43#include <asm/processor.h> 44#include <asm/processor.h>
44#include <asm/msr.h> 45#include <asm/msr.h>
45#include <asm/uaccess.h>
46#include <asm/system.h> 46#include <asm/system.h>
47 47
48static struct class *cpuid_class; 48static struct class *cpuid_class;
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
82} 82}
83 83
84static ssize_t cpuid_read(struct file *file, char __user *buf, 84static ssize_t cpuid_read(struct file *file, char __user *buf,
85 size_t count, loff_t * ppos) 85 size_t count, loff_t *ppos)
86{ 86{
87 char __user *tmp = buf; 87 char __user *tmp = buf;
88 struct cpuid_regs cmd; 88 struct cpuid_regs cmd;
@@ -117,7 +117,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
117 unsigned int cpu; 117 unsigned int cpu;
118 struct cpuinfo_x86 *c; 118 struct cpuinfo_x86 *c;
119 int ret = 0; 119 int ret = 0;
120 120
121 lock_kernel(); 121 lock_kernel();
122 122
123 cpu = iminor(file->f_path.dentry->d_inode); 123 cpu = iminor(file->f_path.dentry->d_inode);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852e4cd7..c689d19e35ab 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/smp.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h>
29 30
30#include <mach_ipi.h> 31#include <mach_ipi.h>
31 32
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
49#endif 50#endif
50 crash_save_cpu(regs, cpu); 51 crash_save_cpu(regs, cpu);
51 52
53 /* Disable VMX or SVM if needed.
54 *
55 * We need to disable virtualization on all CPUs.
56 * Having VMX or SVM enabled on any CPU may break rebooting
57 * after the kdump kernel has finished its task.
58 */
59 cpu_emergency_vmxoff();
60 cpu_emergency_svm_disable();
61
52 disable_local_APIC(); 62 disable_local_APIC();
53} 63}
54 64
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
80 local_irq_disable(); 90 local_irq_disable();
81 91
82 kdump_nmi_shootdown_cpus(); 92 kdump_nmi_shootdown_cpus();
93
94 /* Booting kdump kernel with VMX or SVM enabled won't work,
95 * because (among other limitations) we can't disable paging
96 * with the virt flags.
97 */
98 cpu_emergency_vmxoff();
99 cpu_emergency_svm_disable();
100
83 lapic_shutdown(); 101 lapic_shutdown();
84#if defined(CONFIG_X86_IO_APIC) 102#if defined(CONFIG_X86_IO_APIC)
85 disable_IO_APIC(); 103 disable_IO_APIC();
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 23b138e31e9c..504ad198e4ad 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...)
886 va_list ap; 886 va_list ap;
887 887
888 va_start(ap, fmt); 888 va_start(ap, fmt);
889 n = vscnprintf(buf, 512, fmt, ap); 889 n = vscnprintf(buf, sizeof(buf), fmt, ap);
890 early_console->write(early_console, buf, n); 890 early_console->write(early_console, buf, n);
891 va_end(ap); 891 va_end(ap);
892} 892}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c0262791bda4..34185488e4fb 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
30 return 1; 30 return 1;
31} 31}
32 32
33static cpumask_t flat_target_cpus(void) 33static const struct cpumask *flat_target_cpus(void)
34{ 34{
35 return cpu_online_map; 35 return cpu_online_mask;
36} 36}
37 37
38static cpumask_t flat_vector_allocation_domain(int cpu) 38static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
39{ 39{
40 /* Careful. Some cpus do not strictly honor the set of cpus 40 /* Careful. Some cpus do not strictly honor the set of cpus
41 * specified in the interrupt destination when using lowest 41 * specified in the interrupt destination when using lowest
@@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu)
45 * deliver interrupts to the wrong hyperthread when only one 45 * deliver interrupts to the wrong hyperthread when only one
46 * hyperthread was specified in the interrupt desitination. 46 * hyperthread was specified in the interrupt desitination.
47 */ 47 */
48 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 48 cpumask_clear(retmask);
49 return domain; 49 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
50} 50}
51 51
52/* 52/*
@@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void)
69 apic_write(APIC_LDR, val); 69 apic_write(APIC_LDR, val);
70} 70}
71 71
72static void flat_send_IPI_mask(cpumask_t cpumask, int vector) 72static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
73{ 73{
74 unsigned long mask = cpus_addr(cpumask)[0];
75 unsigned long flags; 74 unsigned long flags;
76 75
77 local_irq_save(flags); 76 local_irq_save(flags);
@@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
79 local_irq_restore(flags); 78 local_irq_restore(flags);
80} 79}
81 80
81static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
82{
83 unsigned long mask = cpumask_bits(cpumask)[0];
84
85 _flat_send_IPI_mask(mask, vector);
86}
87
88static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
89 int vector)
90{
91 unsigned long mask = cpumask_bits(cpumask)[0];
92 int cpu = smp_processor_id();
93
94 if (cpu < BITS_PER_LONG)
95 clear_bit(cpu, &mask);
96 _flat_send_IPI_mask(mask, vector);
97}
98
82static void flat_send_IPI_allbutself(int vector) 99static void flat_send_IPI_allbutself(int vector)
83{ 100{
101 int cpu = smp_processor_id();
84#ifdef CONFIG_HOTPLUG_CPU 102#ifdef CONFIG_HOTPLUG_CPU
85 int hotplug = 1; 103 int hotplug = 1;
86#else 104#else
87 int hotplug = 0; 105 int hotplug = 0;
88#endif 106#endif
89 if (hotplug || vector == NMI_VECTOR) { 107 if (hotplug || vector == NMI_VECTOR) {
90 cpumask_t allbutme = cpu_online_map; 108 if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
109 unsigned long mask = cpumask_bits(cpu_online_mask)[0];
91 110
92 cpu_clear(smp_processor_id(), allbutme); 111 if (cpu < BITS_PER_LONG)
112 clear_bit(cpu, &mask);
93 113
94 if (!cpus_empty(allbutme)) 114 _flat_send_IPI_mask(mask, vector);
95 flat_send_IPI_mask(allbutme, vector); 115 }
96 } else if (num_online_cpus() > 1) { 116 } else if (num_online_cpus() > 1) {
97 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); 117 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
98 } 118 }
@@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector)
101static void flat_send_IPI_all(int vector) 121static void flat_send_IPI_all(int vector)
102{ 122{
103 if (vector == NMI_VECTOR) 123 if (vector == NMI_VECTOR)
104 flat_send_IPI_mask(cpu_online_map, vector); 124 flat_send_IPI_mask(cpu_online_mask, vector);
105 else 125 else
106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 126 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
107} 127}
@@ -135,9 +155,18 @@ static int flat_apic_id_registered(void)
135 return physid_isset(read_xapic_id(), phys_cpu_present_map); 155 return physid_isset(read_xapic_id(), phys_cpu_present_map);
136} 156}
137 157
138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 158static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
159{
160 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
161}
162
163static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
164 const struct cpumask *andmask)
139{ 165{
140 return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; 166 unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
167 unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
168
169 return mask1 & mask2;
141} 170}
142 171
143static unsigned int phys_pkg_id(int index_msb) 172static unsigned int phys_pkg_id(int index_msb)
@@ -157,8 +186,10 @@ struct genapic apic_flat = {
157 .send_IPI_all = flat_send_IPI_all, 186 .send_IPI_all = flat_send_IPI_all,
158 .send_IPI_allbutself = flat_send_IPI_allbutself, 187 .send_IPI_allbutself = flat_send_IPI_allbutself,
159 .send_IPI_mask = flat_send_IPI_mask, 188 .send_IPI_mask = flat_send_IPI_mask,
189 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
160 .send_IPI_self = apic_send_IPI_self, 190 .send_IPI_self = apic_send_IPI_self,
161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 191 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
192 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
162 .phys_pkg_id = phys_pkg_id, 193 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id, 194 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id, 195 .set_apic_id = set_apic_id,
@@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
188 return 0; 219 return 0;
189} 220}
190 221
191static cpumask_t physflat_target_cpus(void) 222static const struct cpumask *physflat_target_cpus(void)
192{ 223{
193 return cpu_online_map; 224 return cpu_online_mask;
194} 225}
195 226
196static cpumask_t physflat_vector_allocation_domain(int cpu) 227static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
197{ 228{
198 return cpumask_of_cpu(cpu); 229 cpumask_clear(retmask);
230 cpumask_set_cpu(cpu, retmask);
199} 231}
200 232
201static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) 233static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
202{ 234{
203 send_IPI_mask_sequence(cpumask, vector); 235 send_IPI_mask_sequence(cpumask, vector);
204} 236}
205 237
206static void physflat_send_IPI_allbutself(int vector) 238static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
239 int vector)
207{ 240{
208 cpumask_t allbutme = cpu_online_map; 241 send_IPI_mask_allbutself(cpumask, vector);
242}
209 243
210 cpu_clear(smp_processor_id(), allbutme); 244static void physflat_send_IPI_allbutself(int vector)
211 physflat_send_IPI_mask(allbutme, vector); 245{
246 send_IPI_mask_allbutself(cpu_online_mask, vector);
212} 247}
213 248
214static void physflat_send_IPI_all(int vector) 249static void physflat_send_IPI_all(int vector)
215{ 250{
216 physflat_send_IPI_mask(cpu_online_map, vector); 251 physflat_send_IPI_mask(cpu_online_mask, vector);
217} 252}
218 253
219static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) 254static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
220{ 255{
221 int cpu; 256 int cpu;
222 257
@@ -224,13 +259,31 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
224 * We're using fixed IRQ delivery, can only return one phys APIC ID. 259 * We're using fixed IRQ delivery, can only return one phys APIC ID.
225 * May as well be the first. 260 * May as well be the first.
226 */ 261 */
227 cpu = first_cpu(cpumask); 262 cpu = cpumask_first(cpumask);
228 if ((unsigned)cpu < nr_cpu_ids) 263 if ((unsigned)cpu < nr_cpu_ids)
229 return per_cpu(x86_cpu_to_apicid, cpu); 264 return per_cpu(x86_cpu_to_apicid, cpu);
230 else 265 else
231 return BAD_APICID; 266 return BAD_APICID;
232} 267}
233 268
269static unsigned int
270physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
271 const struct cpumask *andmask)
272{
273 int cpu;
274
275 /*
276 * We're using fixed IRQ delivery, can only return one phys APIC ID.
277 * May as well be the first.
278 */
279 for_each_cpu_and(cpu, cpumask, andmask)
280 if (cpumask_test_cpu(cpu, cpu_online_mask))
281 break;
282 if (cpu < nr_cpu_ids)
283 return per_cpu(x86_cpu_to_apicid, cpu);
284 return BAD_APICID;
285}
286
234struct genapic apic_physflat = { 287struct genapic apic_physflat = {
235 .name = "physical flat", 288 .name = "physical flat",
236 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 289 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -243,8 +296,10 @@ struct genapic apic_physflat = {
243 .send_IPI_all = physflat_send_IPI_all, 296 .send_IPI_all = physflat_send_IPI_all,
244 .send_IPI_allbutself = physflat_send_IPI_allbutself, 297 .send_IPI_allbutself = physflat_send_IPI_allbutself,
245 .send_IPI_mask = physflat_send_IPI_mask, 298 .send_IPI_mask = physflat_send_IPI_mask,
299 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
246 .send_IPI_self = apic_send_IPI_self, 300 .send_IPI_self = apic_send_IPI_self,
247 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 301 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
302 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
248 .phys_pkg_id = phys_pkg_id, 303 .phys_pkg_id = phys_pkg_id,
249 .get_apic_id = get_apic_id, 304 .get_apic_id = get_apic_id,
250 .set_apic_id = set_apic_id, 305 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8eb48a6..6ce497cc372d 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
22 22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24 24
25static cpumask_t x2apic_target_cpus(void) 25static const struct cpumask *x2apic_target_cpus(void)
26{ 26{
27 return cpumask_of_cpu(0); 27 return cpumask_of(0);
28} 28}
29 29
30/* 30/*
31 * for now each logical cpu is in its own vector allocation domain. 31 * for now each logical cpu is in its own vector allocation domain.
32 */ 32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu) 33static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
34{ 34{
35 cpumask_t domain = CPU_MASK_NONE; 35 cpumask_clear(retmask);
36 cpu_set(cpu, domain); 36 cpumask_set_cpu(cpu, retmask);
37 return domain;
38} 37}
39 38
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 39static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register 55 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes. 56 * writes.
58 */ 57 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
60{ 59{
61 unsigned long flags; 60 unsigned long flags;
62 unsigned long query_cpu; 61 unsigned long query_cpu;
63 62
64 local_irq_save(flags); 63 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) { 64 for_each_cpu(query_cpu, mask)
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), 65 __x2apic_send_IPI_dest(
67 vector, APIC_DEST_LOGICAL); 66 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
68 } 67 vector, APIC_DEST_LOGICAL);
69 local_irq_restore(flags); 68 local_irq_restore(flags);
70} 69}
71 70
72static void x2apic_send_IPI_allbutself(int vector) 71static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
72 int vector)
73{ 73{
74 cpumask_t mask = cpu_online_map; 74 unsigned long flags;
75 unsigned long query_cpu;
76 unsigned long this_cpu = smp_processor_id();
75 77
76 cpu_clear(smp_processor_id(), mask); 78 local_irq_save(flags);
79 for_each_cpu(query_cpu, mask)
80 if (query_cpu != this_cpu)
81 __x2apic_send_IPI_dest(
82 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
83 vector, APIC_DEST_LOGICAL);
84 local_irq_restore(flags);
85}
86
87static void x2apic_send_IPI_allbutself(int vector)
88{
89 unsigned long flags;
90 unsigned long query_cpu;
91 unsigned long this_cpu = smp_processor_id();
77 92
78 if (!cpus_empty(mask)) 93 local_irq_save(flags);
79 x2apic_send_IPI_mask(mask, vector); 94 for_each_online_cpu(query_cpu)
95 if (query_cpu != this_cpu)
96 __x2apic_send_IPI_dest(
97 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
98 vector, APIC_DEST_LOGICAL);
99 local_irq_restore(flags);
80} 100}
81 101
82static void x2apic_send_IPI_all(int vector) 102static void x2apic_send_IPI_all(int vector)
83{ 103{
84 x2apic_send_IPI_mask(cpu_online_map, vector); 104 x2apic_send_IPI_mask(cpu_online_mask, vector);
85} 105}
86 106
87static int x2apic_apic_id_registered(void) 107static int x2apic_apic_id_registered(void)
@@ -89,21 +109,38 @@ static int x2apic_apic_id_registered(void)
89 return 1; 109 return 1;
90} 110}
91 111
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
93{ 113{
94 int cpu; 114 int cpu;
95 115
96 /* 116 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID. 117 * We're using fixed IRQ delivery, can only return one logical APIC ID.
98 * May as well be the first. 118 * May as well be the first.
99 */ 119 */
100 cpu = first_cpu(cpumask); 120 cpu = cpumask_first(cpumask);
101 if ((unsigned)cpu < NR_CPUS) 121 if ((unsigned)cpu < nr_cpu_ids)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu); 122 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else 123 else
104 return BAD_APICID; 124 return BAD_APICID;
105} 125}
106 126
127static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
128 const struct cpumask *andmask)
129{
130 int cpu;
131
132 /*
133 * We're using fixed IRQ delivery, can only return one logical APIC ID.
134 * May as well be the first.
135 */
136 for_each_cpu_and(cpu, cpumask, andmask)
137 if (cpumask_test_cpu(cpu, cpu_online_mask))
138 break;
139 if (cpu < nr_cpu_ids)
140 return per_cpu(x86_cpu_to_logical_apicid, cpu);
141 return BAD_APICID;
142}
143
107static unsigned int get_apic_id(unsigned long x) 144static unsigned int get_apic_id(unsigned long x)
108{ 145{
109 unsigned int id; 146 unsigned int id;
@@ -150,8 +187,10 @@ struct genapic apic_x2apic_cluster = {
150 .send_IPI_all = x2apic_send_IPI_all, 187 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 188 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask, 189 .send_IPI_mask = x2apic_send_IPI_mask,
190 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
153 .send_IPI_self = x2apic_send_IPI_self, 191 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 192 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
193 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
155 .phys_pkg_id = phys_pkg_id, 194 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id, 195 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id, 196 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211768b7..21bcc0e098ba 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31 31
32static cpumask_t x2apic_target_cpus(void) 32static const struct cpumask *x2apic_target_cpus(void)
33{ 33{
34 return cpumask_of_cpu(0); 34 return cpumask_of(0);
35} 35}
36 36
37static cpumask_t x2apic_vector_allocation_domain(int cpu) 37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
38{ 38{
39 cpumask_t domain = CPU_MASK_NONE; 39 cpumask_clear(retmask);
40 cpu_set(cpu, domain); 40 cpumask_set_cpu(cpu, retmask);
41 return domain;
42} 41}
43 42
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 43static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
54 x2apic_icr_write(cfg, apicid); 53 x2apic_icr_write(cfg, apicid);
55} 54}
56 55
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
58{ 57{
59 unsigned long flags; 58 unsigned long flags;
60 unsigned long query_cpu; 59 unsigned long query_cpu;
61 60
62 local_irq_save(flags); 61 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) { 62 for_each_cpu(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), 63 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL); 64 vector, APIC_DEST_PHYSICAL);
66 } 65 }
67 local_irq_restore(flags); 66 local_irq_restore(flags);
68} 67}
69 68
70static void x2apic_send_IPI_allbutself(int vector) 69static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
70 int vector)
71{ 71{
72 cpumask_t mask = cpu_online_map; 72 unsigned long flags;
73 unsigned long query_cpu;
74 unsigned long this_cpu = smp_processor_id();
75
76 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) {
78 if (query_cpu != this_cpu)
79 __x2apic_send_IPI_dest(
80 per_cpu(x86_cpu_to_apicid, query_cpu),
81 vector, APIC_DEST_PHYSICAL);
82 }
83 local_irq_restore(flags);
84}
73 85
74 cpu_clear(smp_processor_id(), mask); 86static void x2apic_send_IPI_allbutself(int vector)
87{
88 unsigned long flags;
89 unsigned long query_cpu;
90 unsigned long this_cpu = smp_processor_id();
75 91
76 if (!cpus_empty(mask)) 92 local_irq_save(flags);
77 x2apic_send_IPI_mask(mask, vector); 93 for_each_online_cpu(query_cpu)
94 if (query_cpu != this_cpu)
95 __x2apic_send_IPI_dest(
96 per_cpu(x86_cpu_to_apicid, query_cpu),
97 vector, APIC_DEST_PHYSICAL);
98 local_irq_restore(flags);
78} 99}
79 100
80static void x2apic_send_IPI_all(int vector) 101static void x2apic_send_IPI_all(int vector)
81{ 102{
82 x2apic_send_IPI_mask(cpu_online_map, vector); 103 x2apic_send_IPI_mask(cpu_online_mask, vector);
83} 104}
84 105
85static int x2apic_apic_id_registered(void) 106static int x2apic_apic_id_registered(void)
@@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void)
87 return 1; 108 return 1;
88} 109}
89 110
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 111static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
91{ 112{
92 int cpu; 113 int cpu;
93 114
@@ -95,13 +116,30 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
95 * We're using fixed IRQ delivery, can only return one phys APIC ID. 116 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first. 117 * May as well be the first.
97 */ 118 */
98 cpu = first_cpu(cpumask); 119 cpu = cpumask_first(cpumask);
99 if ((unsigned)cpu < NR_CPUS) 120 if ((unsigned)cpu < nr_cpu_ids)
100 return per_cpu(x86_cpu_to_apicid, cpu); 121 return per_cpu(x86_cpu_to_apicid, cpu);
101 else 122 else
102 return BAD_APICID; 123 return BAD_APICID;
103} 124}
104 125
126static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
127 const struct cpumask *andmask)
128{
129 int cpu;
130
131 /*
132 * We're using fixed IRQ delivery, can only return one phys APIC ID.
133 * May as well be the first.
134 */
135 for_each_cpu_and(cpu, cpumask, andmask)
136 if (cpumask_test_cpu(cpu, cpu_online_mask))
137 break;
138 if (cpu < nr_cpu_ids)
139 return per_cpu(x86_cpu_to_apicid, cpu);
140 return BAD_APICID;
141}
142
105static unsigned int get_apic_id(unsigned long x) 143static unsigned int get_apic_id(unsigned long x)
106{ 144{
107 unsigned int id; 145 unsigned int id;
@@ -123,12 +161,12 @@ static unsigned int phys_pkg_id(int index_msb)
123 return current_cpu_data.initial_apicid >> index_msb; 161 return current_cpu_data.initial_apicid >> index_msb;
124} 162}
125 163
126void x2apic_send_IPI_self(int vector) 164static void x2apic_send_IPI_self(int vector)
127{ 165{
128 apic_write(APIC_SELF_IPI, vector); 166 apic_write(APIC_SELF_IPI, vector);
129} 167}
130 168
131void init_x2apic_ldr(void) 169static void init_x2apic_ldr(void)
132{ 170{
133 return; 171 return;
134} 172}
@@ -145,8 +183,10 @@ struct genapic apic_x2apic_phys = {
145 .send_IPI_all = x2apic_send_IPI_all, 183 .send_IPI_all = x2apic_send_IPI_all,
146 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 184 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
147 .send_IPI_mask = x2apic_send_IPI_mask, 185 .send_IPI_mask = x2apic_send_IPI_mask,
186 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
148 .send_IPI_self = x2apic_send_IPI_self, 187 .send_IPI_self = x2apic_send_IPI_self,
149 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 188 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
189 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
150 .phys_pkg_id = phys_pkg_id, 190 .phys_pkg_id = phys_pkg_id,
151 .get_apic_id = get_apic_id, 191 .get_apic_id = get_apic_id,
152 .set_apic_id = set_apic_id, 192 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index dece17289731..b193e082f6ce 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -79,16 +79,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
79 79
80/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 80/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
81 81
82static cpumask_t uv_target_cpus(void) 82static const struct cpumask *uv_target_cpus(void)
83{ 83{
84 return cpumask_of_cpu(0); 84 return cpumask_of(0);
85} 85}
86 86
87static cpumask_t uv_vector_allocation_domain(int cpu) 87static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
88{ 88{
89 cpumask_t domain = CPU_MASK_NONE; 89 cpumask_clear(retmask);
90 cpu_set(cpu, domain); 90 cpumask_set_cpu(cpu, retmask);
91 return domain;
92} 91}
93 92
94int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) 93int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -127,28 +126,37 @@ static void uv_send_IPI_one(int cpu, int vector)
127 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 126 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
128} 127}
129 128
130static void uv_send_IPI_mask(cpumask_t mask, int vector) 129static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
131{ 130{
132 unsigned int cpu; 131 unsigned int cpu;
133 132
134 for_each_possible_cpu(cpu) 133 for_each_cpu(cpu, mask)
135 if (cpu_isset(cpu, mask)) 134 uv_send_IPI_one(cpu, vector);
135}
136
137static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
138{
139 unsigned int cpu;
140 unsigned int this_cpu = smp_processor_id();
141
142 for_each_cpu(cpu, mask)
143 if (cpu != this_cpu)
136 uv_send_IPI_one(cpu, vector); 144 uv_send_IPI_one(cpu, vector);
137} 145}
138 146
139static void uv_send_IPI_allbutself(int vector) 147static void uv_send_IPI_allbutself(int vector)
140{ 148{
141 cpumask_t mask = cpu_online_map; 149 unsigned int cpu;
142 150 unsigned int this_cpu = smp_processor_id();
143 cpu_clear(smp_processor_id(), mask);
144 151
145 if (!cpus_empty(mask)) 152 for_each_online_cpu(cpu)
146 uv_send_IPI_mask(mask, vector); 153 if (cpu != this_cpu)
154 uv_send_IPI_one(cpu, vector);
147} 155}
148 156
149static void uv_send_IPI_all(int vector) 157static void uv_send_IPI_all(int vector)
150{ 158{
151 uv_send_IPI_mask(cpu_online_map, vector); 159 uv_send_IPI_mask(cpu_online_mask, vector);
152} 160}
153 161
154static int uv_apic_id_registered(void) 162static int uv_apic_id_registered(void)
@@ -160,7 +168,7 @@ static void uv_init_apic_ldr(void)
160{ 168{
161} 169}
162 170
163static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 171static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
164{ 172{
165 int cpu; 173 int cpu;
166 174
@@ -168,13 +176,30 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
168 * We're using fixed IRQ delivery, can only return one phys APIC ID. 176 * We're using fixed IRQ delivery, can only return one phys APIC ID.
169 * May as well be the first. 177 * May as well be the first.
170 */ 178 */
171 cpu = first_cpu(cpumask); 179 cpu = cpumask_first(cpumask);
172 if ((unsigned)cpu < nr_cpu_ids) 180 if ((unsigned)cpu < nr_cpu_ids)
173 return per_cpu(x86_cpu_to_apicid, cpu); 181 return per_cpu(x86_cpu_to_apicid, cpu);
174 else 182 else
175 return BAD_APICID; 183 return BAD_APICID;
176} 184}
177 185
186static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
187 const struct cpumask *andmask)
188{
189 int cpu;
190
191 /*
192 * We're using fixed IRQ delivery, can only return one phys APIC ID.
193 * May as well be the first.
194 */
195 for_each_cpu_and(cpu, cpumask, andmask)
196 if (cpumask_test_cpu(cpu, cpu_online_mask))
197 break;
198 if (cpu < nr_cpu_ids)
199 return per_cpu(x86_cpu_to_apicid, cpu);
200 return BAD_APICID;
201}
202
178static unsigned int get_apic_id(unsigned long x) 203static unsigned int get_apic_id(unsigned long x)
179{ 204{
180 unsigned int id; 205 unsigned int id;
@@ -222,8 +247,10 @@ struct genapic apic_x2apic_uv_x = {
222 .send_IPI_all = uv_send_IPI_all, 247 .send_IPI_all = uv_send_IPI_all,
223 .send_IPI_allbutself = uv_send_IPI_allbutself, 248 .send_IPI_allbutself = uv_send_IPI_allbutself,
224 .send_IPI_mask = uv_send_IPI_mask, 249 .send_IPI_mask = uv_send_IPI_mask,
250 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
225 .send_IPI_self = uv_send_IPI_self, 251 .send_IPI_self = uv_send_IPI_self,
226 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 252 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
253 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
227 .phys_pkg_id = phys_pkg_id, 254 .phys_pkg_id = phys_pkg_id,
228 .get_apic_id = get_apic_id, 255 .get_apic_id = get_apic_id,
229 .set_apic_id = set_apic_id, 256 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 388e05a5fc17..b9a4d8c4b935 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */ 29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda __read_mostly; 30static struct x8664_pda _boot_cpu_pda;
31 31
32#ifdef CONFIG_SMP 32#ifdef CONFIG_SMP
33/* 33/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 845ea097383e..cd759ad90690 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -248,7 +248,7 @@ static void hpet_legacy_clockevent_register(void)
248 * Start hpet with the boot cpu mask and make it 248 * Start hpet with the boot cpu mask and make it
249 * global after the IO_APIC has been initialized. 249 * global after the IO_APIC has been initialized.
250 */ 250 */
251 hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 251 hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
252 clockevents_register_device(&hpet_clockevent); 252 clockevents_register_device(&hpet_clockevent);
253 global_clock_event = &hpet_clockevent; 253 global_clock_event = &hpet_clockevent;
254 printk(KERN_DEBUG "hpet clockevent registered\n"); 254 printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -303,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
303 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); 303 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
304 hpet_setup_msi_irq(hdev->irq); 304 hpet_setup_msi_irq(hdev->irq);
305 disable_irq(hdev->irq); 305 disable_irq(hdev->irq);
306 irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); 306 irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
307 enable_irq(hdev->irq); 307 enable_irq(hdev->irq);
308 } 308 }
309 break; 309 break;
@@ -451,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
451 return -1; 451 return -1;
452 452
453 disable_irq(dev->irq); 453 disable_irq(dev->irq);
454 irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); 454 irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
455 enable_irq(dev->irq); 455 enable_irq(dev->irq);
456 456
457 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", 457 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@ -502,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
502 /* 5 usec minimum reprogramming delta. */ 502 /* 5 usec minimum reprogramming delta. */
503 evt->min_delta_ns = 5000; 503 evt->min_delta_ns = 5000;
504 504
505 evt->cpumask = cpumask_of_cpu(hdev->cpu); 505 evt->cpumask = cpumask_of(hdev->cpu);
506 clockevents_register_device(evt); 506 clockevents_register_device(evt);
507} 507}
508 508
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3ece1f2..10f92fb532f3 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
114 * Start pit with the boot cpu mask and make it global after the 114 * Start pit with the boot cpu mask and make it global after the
115 * IO_APIC has been initialized. 115 * IO_APIC has been initialized.
116 */ 116 */
117 pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 117 pit_clockevent.cpumask = cpumask_of(smp_processor_id());
118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
119 pit_clockevent.shift); 119 pit_clockevent.shift);
120 pit_clockevent.max_delta_ns = 120 pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 74917658b004..69911722b9d3 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
136 136
137struct irq_cfg { 137struct irq_cfg {
138 struct irq_pin_list *irq_2_pin; 138 struct irq_pin_list *irq_2_pin;
139 cpumask_t domain; 139 cpumask_var_t domain;
140 cpumask_t old_domain; 140 cpumask_var_t old_domain;
141 unsigned move_cleanup_count; 141 unsigned move_cleanup_count;
142 u8 vector; 142 u8 vector;
143 u8 move_in_progress : 1; 143 u8 move_in_progress : 1;
@@ -152,22 +152,22 @@ static struct irq_cfg irq_cfgx[] = {
152#else 152#else
153static struct irq_cfg irq_cfgx[NR_IRQS] = { 153static struct irq_cfg irq_cfgx[NR_IRQS] = {
154#endif 154#endif
155 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 155 [0] = { .vector = IRQ0_VECTOR, },
156 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 156 [1] = { .vector = IRQ1_VECTOR, },
157 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 157 [2] = { .vector = IRQ2_VECTOR, },
158 [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, 158 [3] = { .vector = IRQ3_VECTOR, },
159 [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, 159 [4] = { .vector = IRQ4_VECTOR, },
160 [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, 160 [5] = { .vector = IRQ5_VECTOR, },
161 [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, 161 [6] = { .vector = IRQ6_VECTOR, },
162 [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, 162 [7] = { .vector = IRQ7_VECTOR, },
163 [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, 163 [8] = { .vector = IRQ8_VECTOR, },
164 [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, 164 [9] = { .vector = IRQ9_VECTOR, },
165 [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, 165 [10] = { .vector = IRQ10_VECTOR, },
166 [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, 166 [11] = { .vector = IRQ11_VECTOR, },
167 [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, 167 [12] = { .vector = IRQ12_VECTOR, },
168 [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, 168 [13] = { .vector = IRQ13_VECTOR, },
169 [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, 169 [14] = { .vector = IRQ14_VECTOR, },
170 [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, 170 [15] = { .vector = IRQ15_VECTOR, },
171}; 171};
172 172
173int __init arch_early_irq_init(void) 173int __init arch_early_irq_init(void)
@@ -183,6 +183,10 @@ int __init arch_early_irq_init(void)
183 for (i = 0; i < count; i++) { 183 for (i = 0; i < count; i++) {
184 desc = irq_to_desc(i); 184 desc = irq_to_desc(i);
185 desc->chip_data = &cfg[i]; 185 desc->chip_data = &cfg[i];
186 alloc_bootmem_cpumask_var(&cfg[i].domain);
187 alloc_bootmem_cpumask_var(&cfg[i].old_domain);
188 if (i < NR_IRQS_LEGACY)
189 cpumask_setall(cfg[i].domain);
186 } 190 }
187 191
188 return 0; 192 return 0;
@@ -209,6 +213,20 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
209 node = cpu_to_node(cpu); 213 node = cpu_to_node(cpu);
210 214
211 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 215 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
216 if (cfg) {
217 /* FIXME: needs alloc_cpumask_var_node() */
218 if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
219 kfree(cfg);
220 cfg = NULL;
221 } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
222 free_cpumask_var(cfg->domain);
223 kfree(cfg);
224 cfg = NULL;
225 } else {
226 cpumask_clear(cfg->domain);
227 cpumask_clear(cfg->old_domain);
228 }
229 }
212 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); 230 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
213 231
214 return cfg; 232 return cfg;
@@ -333,13 +351,14 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
333 } 351 }
334} 352}
335 353
336static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) 354static void
355set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
337{ 356{
338 struct irq_cfg *cfg = desc->chip_data; 357 struct irq_cfg *cfg = desc->chip_data;
339 358
340 if (!cfg->move_in_progress) { 359 if (!cfg->move_in_progress) {
341 /* it means that domain is not changed */ 360 /* it means that domain is not changed */
342 if (!cpus_intersects(desc->affinity, mask)) 361 if (!cpumask_intersects(&desc->affinity, mask))
343 cfg->move_desc_pending = 1; 362 cfg->move_desc_pending = 1;
344 } 363 }
345} 364}
@@ -354,7 +373,8 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
354#endif 373#endif
355 374
356#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC 375#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
357static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) 376static inline void
377set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
358{ 378{
359} 379}
360#endif 380#endif
@@ -485,6 +505,26 @@ static void ioapic_mask_entry(int apic, int pin)
485} 505}
486 506
487#ifdef CONFIG_SMP 507#ifdef CONFIG_SMP
508static void send_cleanup_vector(struct irq_cfg *cfg)
509{
510 cpumask_var_t cleanup_mask;
511
512 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
513 unsigned int i;
514 cfg->move_cleanup_count = 0;
515 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
516 cfg->move_cleanup_count++;
517 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
518 send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
519 } else {
520 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
521 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
522 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
523 free_cpumask_var(cleanup_mask);
524 }
525 cfg->move_in_progress = 0;
526}
527
488static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) 528static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
489{ 529{
490 int apic, pin; 530 int apic, pin;
@@ -520,41 +560,55 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
520 } 560 }
521} 561}
522 562
523static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); 563static int
564assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
524 565
525static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) 566/*
567 * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
568 * of that, or returns BAD_APICID and leaves desc->affinity untouched.
569 */
570static unsigned int
571set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
526{ 572{
527 struct irq_cfg *cfg; 573 struct irq_cfg *cfg;
528 unsigned long flags;
529 unsigned int dest;
530 cpumask_t tmp;
531 unsigned int irq; 574 unsigned int irq;
532 575
533 cpus_and(tmp, mask, cpu_online_map); 576 if (!cpumask_intersects(mask, cpu_online_mask))
534 if (cpus_empty(tmp)) 577 return BAD_APICID;
535 return;
536 578
537 irq = desc->irq; 579 irq = desc->irq;
538 cfg = desc->chip_data; 580 cfg = desc->chip_data;
539 if (assign_irq_vector(irq, cfg, mask)) 581 if (assign_irq_vector(irq, cfg, mask))
540 return; 582 return BAD_APICID;
541 583
584 cpumask_and(&desc->affinity, cfg->domain, mask);
542 set_extra_move_desc(desc, mask); 585 set_extra_move_desc(desc, mask);
586 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
587}
543 588
544 cpus_and(tmp, cfg->domain, mask); 589static void
545 dest = cpu_mask_to_apicid(tmp); 590set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
546 /* 591{
547 * Only the high 8 bits are valid. 592 struct irq_cfg *cfg;
548 */ 593 unsigned long flags;
549 dest = SET_APIC_LOGICAL_ID(dest); 594 unsigned int dest;
595 unsigned int irq;
596
597 irq = desc->irq;
598 cfg = desc->chip_data;
550 599
551 spin_lock_irqsave(&ioapic_lock, flags); 600 spin_lock_irqsave(&ioapic_lock, flags);
552 __target_IO_APIC_irq(irq, dest, cfg); 601 dest = set_desc_affinity(desc, mask);
553 desc->affinity = mask; 602 if (dest != BAD_APICID) {
603 /* Only the high 8 bits are valid. */
604 dest = SET_APIC_LOGICAL_ID(dest);
605 __target_IO_APIC_irq(irq, dest, cfg);
606 }
554 spin_unlock_irqrestore(&ioapic_lock, flags); 607 spin_unlock_irqrestore(&ioapic_lock, flags);
555} 608}
556 609
557static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 610static void
611set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
558{ 612{
559 struct irq_desc *desc; 613 struct irq_desc *desc;
560 614
@@ -652,7 +706,7 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
652} 706}
653 707
654#ifdef CONFIG_X86_64 708#ifdef CONFIG_X86_64
655void io_apic_sync(struct irq_pin_list *entry) 709static void io_apic_sync(struct irq_pin_list *entry)
656{ 710{
657 /* 711 /*
658 * Synchronize the IO-APIC and the CPU by doing 712 * Synchronize the IO-APIC and the CPU by doing
@@ -1222,7 +1276,8 @@ void unlock_vector_lock(void)
1222 spin_unlock(&vector_lock); 1276 spin_unlock(&vector_lock);
1223} 1277}
1224 1278
1225static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) 1279static int
1280__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1226{ 1281{
1227 /* 1282 /*
1228 * NOTE! The local APIC isn't very good at handling 1283 * NOTE! The local APIC isn't very good at handling
@@ -1237,49 +1292,49 @@ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
1237 */ 1292 */
1238 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1293 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
1239 unsigned int old_vector; 1294 unsigned int old_vector;
1240 int cpu; 1295 int cpu, err;
1296 cpumask_var_t tmp_mask;
1241 1297
1242 if ((cfg->move_in_progress) || cfg->move_cleanup_count) 1298 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1243 return -EBUSY; 1299 return -EBUSY;
1244 1300
1245 /* Only try and allocate irqs on cpus that are present */ 1301 if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
1246 cpus_and(mask, mask, cpu_online_map); 1302 return -ENOMEM;
1247 1303
1248 old_vector = cfg->vector; 1304 old_vector = cfg->vector;
1249 if (old_vector) { 1305 if (old_vector) {
1250 cpumask_t tmp; 1306 cpumask_and(tmp_mask, mask, cpu_online_mask);
1251 cpus_and(tmp, cfg->domain, mask); 1307 cpumask_and(tmp_mask, cfg->domain, tmp_mask);
1252 if (!cpus_empty(tmp)) 1308 if (!cpumask_empty(tmp_mask)) {
1309 free_cpumask_var(tmp_mask);
1253 return 0; 1310 return 0;
1311 }
1254 } 1312 }
1255 1313
1256 for_each_cpu_mask_nr(cpu, mask) { 1314 /* Only try and allocate irqs on cpus that are present */
1257 cpumask_t domain, new_mask; 1315 err = -ENOSPC;
1316 for_each_cpu_and(cpu, mask, cpu_online_mask) {
1258 int new_cpu; 1317 int new_cpu;
1259 int vector, offset; 1318 int vector, offset;
1260 1319
1261 domain = vector_allocation_domain(cpu); 1320 vector_allocation_domain(cpu, tmp_mask);
1262 cpus_and(new_mask, domain, cpu_online_map);
1263 1321
1264 vector = current_vector; 1322 vector = current_vector;
1265 offset = current_offset; 1323 offset = current_offset;
1266next: 1324next:
1267 vector += 8; 1325 vector += 8;
1268 if (vector >= first_system_vector) { 1326 if (vector >= first_system_vector) {
1269 /* If we run out of vectors on large boxen, must share them. */ 1327 /* If out of vectors on large boxen, must share them. */
1270 offset = (offset + 1) % 8; 1328 offset = (offset + 1) % 8;
1271 vector = FIRST_DEVICE_VECTOR + offset; 1329 vector = FIRST_DEVICE_VECTOR + offset;
1272 } 1330 }
1273 if (unlikely(current_vector == vector)) 1331 if (unlikely(current_vector == vector))
1274 continue; 1332 continue;
1275#ifdef CONFIG_X86_64 1333
1276 if (vector == IA32_SYSCALL_VECTOR) 1334 if (test_bit(vector, used_vectors))
1277 goto next;
1278#else
1279 if (vector == SYSCALL_VECTOR)
1280 goto next; 1335 goto next;
1281#endif 1336
1282 for_each_cpu_mask_nr(new_cpu, new_mask) 1337 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1283 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 1338 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1284 goto next; 1339 goto next;
1285 /* Found one! */ 1340 /* Found one! */
@@ -1287,18 +1342,21 @@ next:
1287 current_offset = offset; 1342 current_offset = offset;
1288 if (old_vector) { 1343 if (old_vector) {
1289 cfg->move_in_progress = 1; 1344 cfg->move_in_progress = 1;
1290 cfg->old_domain = cfg->domain; 1345 cpumask_copy(cfg->old_domain, cfg->domain);
1291 } 1346 }
1292 for_each_cpu_mask_nr(new_cpu, new_mask) 1347 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1293 per_cpu(vector_irq, new_cpu)[vector] = irq; 1348 per_cpu(vector_irq, new_cpu)[vector] = irq;
1294 cfg->vector = vector; 1349 cfg->vector = vector;
1295 cfg->domain = domain; 1350 cpumask_copy(cfg->domain, tmp_mask);
1296 return 0; 1351 err = 0;
1352 break;
1297 } 1353 }
1298 return -ENOSPC; 1354 free_cpumask_var(tmp_mask);
1355 return err;
1299} 1356}
1300 1357
1301static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) 1358static int
1359assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1302{ 1360{
1303 int err; 1361 int err;
1304 unsigned long flags; 1362 unsigned long flags;
@@ -1311,23 +1369,20 @@ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
1311 1369
1312static void __clear_irq_vector(int irq, struct irq_cfg *cfg) 1370static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1313{ 1371{
1314 cpumask_t mask;
1315 int cpu, vector; 1372 int cpu, vector;
1316 1373
1317 BUG_ON(!cfg->vector); 1374 BUG_ON(!cfg->vector);
1318 1375
1319 vector = cfg->vector; 1376 vector = cfg->vector;
1320 cpus_and(mask, cfg->domain, cpu_online_map); 1377 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1321 for_each_cpu_mask_nr(cpu, mask)
1322 per_cpu(vector_irq, cpu)[vector] = -1; 1378 per_cpu(vector_irq, cpu)[vector] = -1;
1323 1379
1324 cfg->vector = 0; 1380 cfg->vector = 0;
1325 cpus_clear(cfg->domain); 1381 cpumask_clear(cfg->domain);
1326 1382
1327 if (likely(!cfg->move_in_progress)) 1383 if (likely(!cfg->move_in_progress))
1328 return; 1384 return;
1329 cpus_and(mask, cfg->old_domain, cpu_online_map); 1385 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1330 for_each_cpu_mask_nr(cpu, mask) {
1331 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; 1386 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1332 vector++) { 1387 vector++) {
1333 if (per_cpu(vector_irq, cpu)[vector] != irq) 1388 if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1350,7 +1405,7 @@ void __setup_vector_irq(int cpu)
1350 /* Mark the inuse vectors */ 1405 /* Mark the inuse vectors */
1351 for_each_irq_desc(irq, desc) { 1406 for_each_irq_desc(irq, desc) {
1352 cfg = desc->chip_data; 1407 cfg = desc->chip_data;
1353 if (!cpu_isset(cpu, cfg->domain)) 1408 if (!cpumask_test_cpu(cpu, cfg->domain))
1354 continue; 1409 continue;
1355 vector = cfg->vector; 1410 vector = cfg->vector;
1356 per_cpu(vector_irq, cpu)[vector] = irq; 1411 per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1362,7 +1417,7 @@ void __setup_vector_irq(int cpu)
1362 continue; 1417 continue;
1363 1418
1364 cfg = irq_cfg(irq); 1419 cfg = irq_cfg(irq);
1365 if (!cpu_isset(cpu, cfg->domain)) 1420 if (!cpumask_test_cpu(cpu, cfg->domain))
1366 per_cpu(vector_irq, cpu)[vector] = -1; 1421 per_cpu(vector_irq, cpu)[vector] = -1;
1367 } 1422 }
1368} 1423}
@@ -1498,18 +1553,17 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1498{ 1553{
1499 struct irq_cfg *cfg; 1554 struct irq_cfg *cfg;
1500 struct IO_APIC_route_entry entry; 1555 struct IO_APIC_route_entry entry;
1501 cpumask_t mask; 1556 unsigned int dest;
1502 1557
1503 if (!IO_APIC_IRQ(irq)) 1558 if (!IO_APIC_IRQ(irq))
1504 return; 1559 return;
1505 1560
1506 cfg = desc->chip_data; 1561 cfg = desc->chip_data;
1507 1562
1508 mask = TARGET_CPUS; 1563 if (assign_irq_vector(irq, cfg, TARGET_CPUS))
1509 if (assign_irq_vector(irq, cfg, mask))
1510 return; 1564 return;
1511 1565
1512 cpus_and(mask, cfg->domain, mask); 1566 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
1513 1567
1514 apic_printk(APIC_VERBOSE,KERN_DEBUG 1568 apic_printk(APIC_VERBOSE,KERN_DEBUG
1515 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1569 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1519,8 +1573,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1519 1573
1520 1574
1521 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1575 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
1522 cpu_mask_to_apicid(mask), trigger, polarity, 1576 dest, trigger, polarity, cfg->vector)) {
1523 cfg->vector)) {
1524 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1577 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1525 mp_ioapics[apic].mp_apicid, pin); 1578 mp_ioapics[apic].mp_apicid, pin);
1526 __clear_irq_vector(irq, cfg); 1579 __clear_irq_vector(irq, cfg);
@@ -2240,7 +2293,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2240 unsigned long flags; 2293 unsigned long flags;
2241 2294
2242 spin_lock_irqsave(&vector_lock, flags); 2295 spin_lock_irqsave(&vector_lock, flags);
2243 send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); 2296 send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2244 spin_unlock_irqrestore(&vector_lock, flags); 2297 spin_unlock_irqrestore(&vector_lock, flags);
2245 2298
2246 return 1; 2299 return 1;
@@ -2289,18 +2342,17 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2289 * as simple as edge triggered migration and we can do the irq migration 2342 * as simple as edge triggered migration and we can do the irq migration
2290 * with a simple atomic update to IO-APIC RTE. 2343 * with a simple atomic update to IO-APIC RTE.
2291 */ 2344 */
2292static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) 2345static void
2346migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2293{ 2347{
2294 struct irq_cfg *cfg; 2348 struct irq_cfg *cfg;
2295 cpumask_t tmp, cleanup_mask;
2296 struct irte irte; 2349 struct irte irte;
2297 int modify_ioapic_rte; 2350 int modify_ioapic_rte;
2298 unsigned int dest; 2351 unsigned int dest;
2299 unsigned long flags; 2352 unsigned long flags;
2300 unsigned int irq; 2353 unsigned int irq;
2301 2354
2302 cpus_and(tmp, mask, cpu_online_map); 2355 if (!cpumask_intersects(mask, cpu_online_mask))
2303 if (cpus_empty(tmp))
2304 return; 2356 return;
2305 2357
2306 irq = desc->irq; 2358 irq = desc->irq;
@@ -2313,8 +2365,7 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
2313 2365
2314 set_extra_move_desc(desc, mask); 2366 set_extra_move_desc(desc, mask);
2315 2367
2316 cpus_and(tmp, cfg->domain, mask); 2368 dest = cpu_mask_to_apicid_and(cfg->domain, mask);
2317 dest = cpu_mask_to_apicid(tmp);
2318 2369
2319 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2370 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2320 if (modify_ioapic_rte) { 2371 if (modify_ioapic_rte) {
@@ -2331,14 +2382,10 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
2331 */ 2382 */
2332 modify_irte(irq, &irte); 2383 modify_irte(irq, &irte);
2333 2384
2334 if (cfg->move_in_progress) { 2385 if (cfg->move_in_progress)
2335 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2386 send_cleanup_vector(cfg);
2336 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2337 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2338 cfg->move_in_progress = 0;
2339 }
2340 2387
2341 desc->affinity = mask; 2388 cpumask_copy(&desc->affinity, mask);
2342} 2389}
2343 2390
2344static int migrate_irq_remapped_level_desc(struct irq_desc *desc) 2391static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2360,11 +2407,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2360 } 2407 }
2361 2408
2362 /* everthing is clear. we have right of way */ 2409 /* everthing is clear. we have right of way */
2363 migrate_ioapic_irq_desc(desc, desc->pending_mask); 2410 migrate_ioapic_irq_desc(desc, &desc->pending_mask);
2364 2411
2365 ret = 0; 2412 ret = 0;
2366 desc->status &= ~IRQ_MOVE_PENDING; 2413 desc->status &= ~IRQ_MOVE_PENDING;
2367 cpus_clear(desc->pending_mask); 2414 cpumask_clear(&desc->pending_mask);
2368 2415
2369unmask: 2416unmask:
2370 unmask_IO_APIC_irq_desc(desc); 2417 unmask_IO_APIC_irq_desc(desc);
@@ -2389,7 +2436,7 @@ static void ir_irq_migration(struct work_struct *work)
2389 continue; 2436 continue;
2390 } 2437 }
2391 2438
2392 desc->chip->set_affinity(irq, desc->pending_mask); 2439 desc->chip->set_affinity(irq, &desc->pending_mask);
2393 spin_unlock_irqrestore(&desc->lock, flags); 2440 spin_unlock_irqrestore(&desc->lock, flags);
2394 } 2441 }
2395 } 2442 }
@@ -2398,18 +2445,20 @@ static void ir_irq_migration(struct work_struct *work)
2398/* 2445/*
2399 * Migrates the IRQ destination in the process context. 2446 * Migrates the IRQ destination in the process context.
2400 */ 2447 */
2401static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) 2448static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2449 const struct cpumask *mask)
2402{ 2450{
2403 if (desc->status & IRQ_LEVEL) { 2451 if (desc->status & IRQ_LEVEL) {
2404 desc->status |= IRQ_MOVE_PENDING; 2452 desc->status |= IRQ_MOVE_PENDING;
2405 desc->pending_mask = mask; 2453 cpumask_copy(&desc->pending_mask, mask);
2406 migrate_irq_remapped_level_desc(desc); 2454 migrate_irq_remapped_level_desc(desc);
2407 return; 2455 return;
2408 } 2456 }
2409 2457
2410 migrate_ioapic_irq_desc(desc, mask); 2458 migrate_ioapic_irq_desc(desc, mask);
2411} 2459}
2412static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 2460static void set_ir_ioapic_affinity_irq(unsigned int irq,
2461 const struct cpumask *mask)
2413{ 2462{
2414 struct irq_desc *desc = irq_to_desc(irq); 2463 struct irq_desc *desc = irq_to_desc(irq);
2415 2464
@@ -2444,7 +2493,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2444 if (!cfg->move_cleanup_count) 2493 if (!cfg->move_cleanup_count)
2445 goto unlock; 2494 goto unlock;
2446 2495
2447 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) 2496 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2448 goto unlock; 2497 goto unlock;
2449 2498
2450 __get_cpu_var(vector_irq)[vector] = -1; 2499 __get_cpu_var(vector_irq)[vector] = -1;
@@ -2481,20 +2530,14 @@ static void irq_complete_move(struct irq_desc **descp)
2481 2530
2482 vector = ~get_irq_regs()->orig_ax; 2531 vector = ~get_irq_regs()->orig_ax;
2483 me = smp_processor_id(); 2532 me = smp_processor_id();
2484 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
2485 cpumask_t cleanup_mask;
2486
2487#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC 2533#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2488 *descp = desc = move_irq_desc(desc, me); 2534 *descp = desc = move_irq_desc(desc, me);
2489 /* get the new one */ 2535 /* get the new one */
2490 cfg = desc->chip_data; 2536 cfg = desc->chip_data;
2491#endif 2537#endif
2492 2538
2493 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2539 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2494 cfg->move_cleanup_count = cpus_weight(cleanup_mask); 2540 send_cleanup_vector(cfg);
2495 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2496 cfg->move_in_progress = 0;
2497 }
2498} 2541}
2499#else 2542#else
2500static inline void irq_complete_move(struct irq_desc **descp) {} 2543static inline void irq_complete_move(struct irq_desc **descp) {}
@@ -3216,16 +3259,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3216 struct irq_cfg *cfg; 3259 struct irq_cfg *cfg;
3217 int err; 3260 int err;
3218 unsigned dest; 3261 unsigned dest;
3219 cpumask_t tmp;
3220 3262
3221 cfg = irq_cfg(irq); 3263 cfg = irq_cfg(irq);
3222 tmp = TARGET_CPUS; 3264 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3223 err = assign_irq_vector(irq, cfg, tmp);
3224 if (err) 3265 if (err)
3225 return err; 3266 return err;
3226 3267
3227 cpus_and(tmp, cfg->domain, tmp); 3268 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
3228 dest = cpu_mask_to_apicid(tmp);
3229 3269
3230#ifdef CONFIG_INTR_REMAP 3270#ifdef CONFIG_INTR_REMAP
3231 if (irq_remapped(irq)) { 3271 if (irq_remapped(irq)) {
@@ -3279,26 +3319,18 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3279} 3319}
3280 3320
3281#ifdef CONFIG_SMP 3321#ifdef CONFIG_SMP
3282static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3322static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3283{ 3323{
3284 struct irq_desc *desc = irq_to_desc(irq); 3324 struct irq_desc *desc = irq_to_desc(irq);
3285 struct irq_cfg *cfg; 3325 struct irq_cfg *cfg;
3286 struct msi_msg msg; 3326 struct msi_msg msg;
3287 unsigned int dest; 3327 unsigned int dest;
3288 cpumask_t tmp;
3289 3328
3290 cpus_and(tmp, mask, cpu_online_map); 3329 dest = set_desc_affinity(desc, mask);
3291 if (cpus_empty(tmp)) 3330 if (dest == BAD_APICID)
3292 return; 3331 return;
3293 3332
3294 cfg = desc->chip_data; 3333 cfg = desc->chip_data;
3295 if (assign_irq_vector(irq, cfg, mask))
3296 return;
3297
3298 set_extra_move_desc(desc, mask);
3299
3300 cpus_and(tmp, cfg->domain, mask);
3301 dest = cpu_mask_to_apicid(tmp);
3302 3334
3303 read_msi_msg_desc(desc, &msg); 3335 read_msi_msg_desc(desc, &msg);
3304 3336
@@ -3308,37 +3340,27 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3308 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3340 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3309 3341
3310 write_msi_msg_desc(desc, &msg); 3342 write_msi_msg_desc(desc, &msg);
3311 desc->affinity = mask;
3312} 3343}
3313#ifdef CONFIG_INTR_REMAP 3344#ifdef CONFIG_INTR_REMAP
3314/* 3345/*
3315 * Migrate the MSI irq to another cpumask. This migration is 3346 * Migrate the MSI irq to another cpumask. This migration is
3316 * done in the process context using interrupt-remapping hardware. 3347 * done in the process context using interrupt-remapping hardware.
3317 */ 3348 */
3318static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3349static void
3350ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3319{ 3351{
3320 struct irq_desc *desc = irq_to_desc(irq); 3352 struct irq_desc *desc = irq_to_desc(irq);
3321 struct irq_cfg *cfg; 3353 struct irq_cfg *cfg = desc->chip_data;
3322 unsigned int dest; 3354 unsigned int dest;
3323 cpumask_t tmp, cleanup_mask;
3324 struct irte irte; 3355 struct irte irte;
3325 3356
3326 cpus_and(tmp, mask, cpu_online_map);
3327 if (cpus_empty(tmp))
3328 return;
3329
3330 if (get_irte(irq, &irte)) 3357 if (get_irte(irq, &irte))
3331 return; 3358 return;
3332 3359
3333 cfg = desc->chip_data; 3360 dest = set_desc_affinity(desc, mask);
3334 if (assign_irq_vector(irq, cfg, mask)) 3361 if (dest == BAD_APICID)
3335 return; 3362 return;
3336 3363
3337 set_extra_move_desc(desc, mask);
3338
3339 cpus_and(tmp, cfg->domain, mask);
3340 dest = cpu_mask_to_apicid(tmp);
3341
3342 irte.vector = cfg->vector; 3364 irte.vector = cfg->vector;
3343 irte.dest_id = IRTE_DEST(dest); 3365 irte.dest_id = IRTE_DEST(dest);
3344 3366
@@ -3352,14 +3374,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3352 * at the new destination. So, time to cleanup the previous 3374 * at the new destination. So, time to cleanup the previous
3353 * vector allocation. 3375 * vector allocation.
3354 */ 3376 */
3355 if (cfg->move_in_progress) { 3377 if (cfg->move_in_progress)
3356 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 3378 send_cleanup_vector(cfg);
3357 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
3358 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
3359 cfg->move_in_progress = 0;
3360 }
3361
3362 desc->affinity = mask;
3363} 3379}
3364 3380
3365#endif 3381#endif
@@ -3550,26 +3566,18 @@ void arch_teardown_msi_irq(unsigned int irq)
3550 3566
3551#ifdef CONFIG_DMAR 3567#ifdef CONFIG_DMAR
3552#ifdef CONFIG_SMP 3568#ifdef CONFIG_SMP
3553static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) 3569static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3554{ 3570{
3555 struct irq_desc *desc = irq_to_desc(irq); 3571 struct irq_desc *desc = irq_to_desc(irq);
3556 struct irq_cfg *cfg; 3572 struct irq_cfg *cfg;
3557 struct msi_msg msg; 3573 struct msi_msg msg;
3558 unsigned int dest; 3574 unsigned int dest;
3559 cpumask_t tmp;
3560 3575
3561 cpus_and(tmp, mask, cpu_online_map); 3576 dest = set_desc_affinity(desc, mask);
3562 if (cpus_empty(tmp)) 3577 if (dest == BAD_APICID)
3563 return; 3578 return;
3564 3579
3565 cfg = desc->chip_data; 3580 cfg = desc->chip_data;
3566 if (assign_irq_vector(irq, cfg, mask))
3567 return;
3568
3569 set_extra_move_desc(desc, mask);
3570
3571 cpus_and(tmp, cfg->domain, mask);
3572 dest = cpu_mask_to_apicid(tmp);
3573 3581
3574 dmar_msi_read(irq, &msg); 3582 dmar_msi_read(irq, &msg);
3575 3583
@@ -3579,7 +3587,6 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
3579 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3587 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3580 3588
3581 dmar_msi_write(irq, &msg); 3589 dmar_msi_write(irq, &msg);
3582 desc->affinity = mask;
3583} 3590}
3584 3591
3585#endif /* CONFIG_SMP */ 3592#endif /* CONFIG_SMP */
@@ -3613,26 +3620,18 @@ int arch_setup_dmar_msi(unsigned int irq)
3613#ifdef CONFIG_HPET_TIMER 3620#ifdef CONFIG_HPET_TIMER
3614 3621
3615#ifdef CONFIG_SMP 3622#ifdef CONFIG_SMP
3616static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) 3623static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3617{ 3624{
3618 struct irq_desc *desc = irq_to_desc(irq); 3625 struct irq_desc *desc = irq_to_desc(irq);
3619 struct irq_cfg *cfg; 3626 struct irq_cfg *cfg;
3620 struct msi_msg msg; 3627 struct msi_msg msg;
3621 unsigned int dest; 3628 unsigned int dest;
3622 cpumask_t tmp;
3623 3629
3624 cpus_and(tmp, mask, cpu_online_map); 3630 dest = set_desc_affinity(desc, mask);
3625 if (cpus_empty(tmp)) 3631 if (dest == BAD_APICID)
3626 return; 3632 return;
3627 3633
3628 cfg = desc->chip_data; 3634 cfg = desc->chip_data;
3629 if (assign_irq_vector(irq, cfg, mask))
3630 return;
3631
3632 set_extra_move_desc(desc, mask);
3633
3634 cpus_and(tmp, cfg->domain, mask);
3635 dest = cpu_mask_to_apicid(tmp);
3636 3635
3637 hpet_msi_read(irq, &msg); 3636 hpet_msi_read(irq, &msg);
3638 3637
@@ -3642,7 +3641,6 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3642 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3641 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3643 3642
3644 hpet_msi_write(irq, &msg); 3643 hpet_msi_write(irq, &msg);
3645 desc->affinity = mask;
3646} 3644}
3647 3645
3648#endif /* CONFIG_SMP */ 3646#endif /* CONFIG_SMP */
@@ -3697,28 +3695,19 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3697 write_ht_irq_msg(irq, &msg); 3695 write_ht_irq_msg(irq, &msg);
3698} 3696}
3699 3697
3700static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) 3698static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3701{ 3699{
3702 struct irq_desc *desc = irq_to_desc(irq); 3700 struct irq_desc *desc = irq_to_desc(irq);
3703 struct irq_cfg *cfg; 3701 struct irq_cfg *cfg;
3704 unsigned int dest; 3702 unsigned int dest;
3705 cpumask_t tmp;
3706 3703
3707 cpus_and(tmp, mask, cpu_online_map); 3704 dest = set_desc_affinity(desc, mask);
3708 if (cpus_empty(tmp)) 3705 if (dest == BAD_APICID)
3709 return; 3706 return;
3710 3707
3711 cfg = desc->chip_data; 3708 cfg = desc->chip_data;
3712 if (assign_irq_vector(irq, cfg, mask))
3713 return;
3714
3715 set_extra_move_desc(desc, mask);
3716
3717 cpus_and(tmp, cfg->domain, mask);
3718 dest = cpu_mask_to_apicid(tmp);
3719 3709
3720 target_ht_irq(irq, dest, cfg->vector); 3710 target_ht_irq(irq, dest, cfg->vector);
3721 desc->affinity = mask;
3722} 3711}
3723 3712
3724#endif 3713#endif
@@ -3738,17 +3727,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3738{ 3727{
3739 struct irq_cfg *cfg; 3728 struct irq_cfg *cfg;
3740 int err; 3729 int err;
3741 cpumask_t tmp;
3742 3730
3743 cfg = irq_cfg(irq); 3731 cfg = irq_cfg(irq);
3744 tmp = TARGET_CPUS; 3732 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3745 err = assign_irq_vector(irq, cfg, tmp);
3746 if (!err) { 3733 if (!err) {
3747 struct ht_irq_msg msg; 3734 struct ht_irq_msg msg;
3748 unsigned dest; 3735 unsigned dest;
3749 3736
3750 cpus_and(tmp, cfg->domain, tmp); 3737 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
3751 dest = cpu_mask_to_apicid(tmp);
3752 3738
3753 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3739 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3754 3740
@@ -3784,7 +3770,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3784int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, 3770int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3785 unsigned long mmr_offset) 3771 unsigned long mmr_offset)
3786{ 3772{
3787 const cpumask_t *eligible_cpu = get_cpu_mask(cpu); 3773 const struct cpumask *eligible_cpu = cpumask_of(cpu);
3788 struct irq_cfg *cfg; 3774 struct irq_cfg *cfg;
3789 int mmr_pnode; 3775 int mmr_pnode;
3790 unsigned long mmr_value; 3776 unsigned long mmr_value;
@@ -3794,7 +3780,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3794 3780
3795 cfg = irq_cfg(irq); 3781 cfg = irq_cfg(irq);
3796 3782
3797 err = assign_irq_vector(irq, cfg, *eligible_cpu); 3783 err = assign_irq_vector(irq, cfg, eligible_cpu);
3798 if (err != 0) 3784 if (err != 0)
3799 return err; 3785 return err;
3800 3786
@@ -3813,7 +3799,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3813 entry->polarity = 0; 3799 entry->polarity = 0;
3814 entry->trigger = 0; 3800 entry->trigger = 0;
3815 entry->mask = 0; 3801 entry->mask = 0;
3816 entry->dest = cpu_mask_to_apicid(*eligible_cpu); 3802 entry->dest = cpu_mask_to_apicid(eligible_cpu);
3817 3803
3818 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3804 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3819 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3805 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -4024,7 +4010,7 @@ void __init setup_ioapic_dest(void)
4024 int pin, ioapic, irq, irq_entry; 4010 int pin, ioapic, irq, irq_entry;
4025 struct irq_desc *desc; 4011 struct irq_desc *desc;
4026 struct irq_cfg *cfg; 4012 struct irq_cfg *cfg;
4027 cpumask_t mask; 4013 const struct cpumask *mask;
4028 4014
4029 if (skip_ioapic_setup == 1) 4015 if (skip_ioapic_setup == 1)
4030 return; 4016 return;
@@ -4055,7 +4041,7 @@ void __init setup_ioapic_dest(void)
4055 */ 4041 */
4056 if (desc->status & 4042 if (desc->status &
4057 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4043 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4058 mask = desc->affinity; 4044 mask = &desc->affinity;
4059 else 4045 else
4060 mask = TARGET_CPUS; 4046 mask = TARGET_CPUS;
4061 4047
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e46f35..285bbf8831fa 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
116/* 116/*
117 * This is only used on smaller machines. 117 * This is only used on smaller machines.
118 */ 118 */
119void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) 119void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
120{ 120{
121 unsigned long mask = cpus_addr(cpumask)[0]; 121 unsigned long mask = cpumask_bits(cpumask)[0];
122 unsigned long flags; 122 unsigned long flags;
123 123
124 local_irq_save(flags); 124 local_irq_save(flags);
125 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); 125 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
126 __send_IPI_dest_field(mask, vector); 126 __send_IPI_dest_field(mask, vector);
127 local_irq_restore(flags); 127 local_irq_restore(flags);
128} 128}
129 129
130void send_IPI_mask_sequence(cpumask_t mask, int vector) 130void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
131{ 131{
132 unsigned long flags; 132 unsigned long flags;
133 unsigned int query_cpu; 133 unsigned int query_cpu;
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
139 */ 139 */
140 140
141 local_irq_save(flags); 141 local_irq_save(flags);
142 for_each_possible_cpu(query_cpu) { 142 for_each_cpu(query_cpu, mask)
143 if (cpu_isset(query_cpu, mask)) { 143 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
144 local_irq_restore(flags);
145}
146
147void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
148{
149 unsigned long flags;
150 unsigned int query_cpu;
151 unsigned int this_cpu = smp_processor_id();
152
153 /* See Hack comment above */
154
155 local_irq_save(flags);
156 for_each_cpu(query_cpu, mask)
157 if (query_cpu != this_cpu)
144 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), 158 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
145 vector); 159 vector);
146 }
147 }
148 local_irq_restore(flags); 160 local_irq_restore(flags);
149} 161}
150 162
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3f1d9d18df67..bce53e1352a0 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -9,6 +9,7 @@
9#include <asm/apic.h> 9#include <asm/apic.h>
10#include <asm/io_apic.h> 10#include <asm/io_apic.h>
11#include <asm/smp.h> 11#include <asm/smp.h>
12#include <asm/irq.h>
12 13
13atomic_t irq_err_count; 14atomic_t irq_err_count;
14 15
@@ -190,3 +191,5 @@ u64 arch_irq_stat(void)
190#endif 191#endif
191 return sum; 192 return sum;
192} 193}
194
195EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 119fc9c8ff7f..9dc5588f336a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -233,27 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs)
233#ifdef CONFIG_HOTPLUG_CPU 233#ifdef CONFIG_HOTPLUG_CPU
234#include <mach_apic.h> 234#include <mach_apic.h>
235 235
236void fixup_irqs(cpumask_t map) 236/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
237void fixup_irqs(void)
237{ 238{
238 unsigned int irq; 239 unsigned int irq;
239 static int warned; 240 static int warned;
240 struct irq_desc *desc; 241 struct irq_desc *desc;
241 242
242 for_each_irq_desc(irq, desc) { 243 for_each_irq_desc(irq, desc) {
243 cpumask_t mask; 244 const struct cpumask *affinity;
244 245
245 if (!desc) 246 if (!desc)
246 continue; 247 continue;
247 if (irq == 2) 248 if (irq == 2)
248 continue; 249 continue;
249 250
250 cpus_and(mask, desc->affinity, map); 251 affinity = &desc->affinity;
251 if (any_online_cpu(mask) == NR_CPUS) { 252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
252 printk("Breaking affinity for irq %i\n", irq); 253 printk("Breaking affinity for irq %i\n", irq);
253 mask = map; 254 affinity = cpu_all_mask;
254 } 255 }
255 if (desc->chip->set_affinity) 256 if (desc->chip->set_affinity)
256 desc->chip->set_affinity(irq, mask); 257 desc->chip->set_affinity(irq, affinity);
257 else if (desc->action && !(warned++)) 258 else if (desc->action && !(warned++))
258 printk("Cannot set affinity for irq %i\n", irq); 259 printk("Cannot set affinity for irq %i\n", irq);
259 } 260 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index a174a217eb1a..6383d50f82ea 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -80,16 +80,17 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
80} 80}
81 81
82#ifdef CONFIG_HOTPLUG_CPU 82#ifdef CONFIG_HOTPLUG_CPU
83void fixup_irqs(cpumask_t map) 83/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
84void fixup_irqs(void)
84{ 85{
85 unsigned int irq; 86 unsigned int irq;
86 static int warned; 87 static int warned;
87 struct irq_desc *desc; 88 struct irq_desc *desc;
88 89
89 for_each_irq_desc(irq, desc) { 90 for_each_irq_desc(irq, desc) {
90 cpumask_t mask;
91 int break_affinity = 0; 91 int break_affinity = 0;
92 int set_affinity = 1; 92 int set_affinity = 1;
93 const struct cpumask *affinity;
93 94
94 if (!desc) 95 if (!desc)
95 continue; 96 continue;
@@ -99,23 +100,23 @@ void fixup_irqs(cpumask_t map)
99 /* interrupt's are disabled at this point */ 100 /* interrupt's are disabled at this point */
100 spin_lock(&desc->lock); 101 spin_lock(&desc->lock);
101 102
103 affinity = &desc->affinity;
102 if (!irq_has_action(irq) || 104 if (!irq_has_action(irq) ||
103 cpus_equal(desc->affinity, map)) { 105 cpumask_equal(affinity, cpu_online_mask)) {
104 spin_unlock(&desc->lock); 106 spin_unlock(&desc->lock);
105 continue; 107 continue;
106 } 108 }
107 109
108 cpus_and(mask, desc->affinity, map); 110 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
109 if (cpus_empty(mask)) {
110 break_affinity = 1; 111 break_affinity = 1;
111 mask = map; 112 affinity = cpu_all_mask;
112 } 113 }
113 114
114 if (desc->chip->mask) 115 if (desc->chip->mask)
115 desc->chip->mask(irq); 116 desc->chip->mask(irq);
116 117
117 if (desc->chip->set_affinity) 118 if (desc->chip->set_affinity)
118 desc->chip->set_affinity(irq, mask); 119 desc->chip->set_affinity(irq, affinity);
119 else if (!(warned++)) 120 else if (!(warned++))
120 set_affinity = 0; 121 set_affinity = 0;
121 122
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 203384ed2b5d..84723295f88a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -110,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
110 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 110 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
111}; 111};
112 112
113int vector_used_by_percpu_irq(unsigned int vector)
114{
115 int cpu;
116
117 for_each_online_cpu(cpu) {
118 if (per_cpu(vector_irq, cpu)[vector] != -1)
119 return 1;
120 }
121
122 return 0;
123}
124
113/* Overridden in paravirt.c */ 125/* Overridden in paravirt.c */
114void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 126void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
115 127
@@ -146,10 +158,12 @@ void __init native_init_IRQ(void)
146 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 158 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
147 159
148 /* IPI for single call function */ 160 /* IPI for single call function */
149 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); 161 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
162 call_function_single_interrupt);
150 163
151 /* Low priority IPI to cleanup after moving an irq */ 164 /* Low priority IPI to cleanup after moving an irq */
152 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 165 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
166 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
153#endif 167#endif
154 168
155#ifdef CONFIG_X86_LOCAL_APIC 169#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 6190e6ef546c..31ebfe38e96c 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -69,6 +69,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
69 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 69 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
70}; 70};
71 71
72int vector_used_by_percpu_irq(unsigned int vector)
73{
74 int cpu;
75
76 for_each_online_cpu(cpu) {
77 if (per_cpu(vector_irq, cpu)[vector] != -1)
78 return 1;
79 }
80
81 return 0;
82}
83
72void __init init_ISA_irqs(void) 84void __init init_ISA_irqs(void)
73{ 85{
74 int i; 86 int i;
@@ -121,6 +133,7 @@ static void __init smp_intr_init(void)
121 133
122 /* Low priority IPI to cleanup after moving an irq */ 134 /* Low priority IPI to cleanup after moving an irq */
123 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 135 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
136 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
124#endif 137#endif
125} 138}
126 139
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9b6a62..652fce6d2cce 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
89 */ 89 */
90static unsigned long kvm_get_tsc_khz(void) 90static unsigned long kvm_get_tsc_khz(void)
91{ 91{
92 return preset_lpj; 92 struct pvclock_vcpu_time_info *src;
93 src = &per_cpu(hv_clock, 0);
94 return pvclock_tsc_khz(src);
93} 95}
94 96
95static void kvm_get_preset_lpj(void) 97static void kvm_get_preset_lpj(void)
96{ 98{
97 struct pvclock_vcpu_time_info *src;
98 unsigned long khz; 99 unsigned long khz;
99 u64 lpj; 100 u64 lpj;
100 101
101 src = &per_cpu(hv_clock, 0); 102 khz = kvm_get_tsc_khz();
102 khz = pvclock_tsc_khz(src);
103 103
104 lpj = ((u64)khz * 1000); 104 lpj = ((u64)khz * 1000);
105 do_div(lpj, HZ); 105 do_div(lpj, HZ);
@@ -194,5 +194,7 @@ void __init kvmclock_init(void)
194#endif 194#endif
195 kvm_get_preset_lpj(); 195 kvm_get_preset_lpj();
196 clocksource_register(&kvm_clock); 196 clocksource_register(&kvm_clock);
197 pv_info.paravirt_enabled = 1;
198 pv_info.name = "KVM";
197 } 199 }
198} 200}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index eee32b43fee3..71f1d99a635d 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,8 +12,8 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/smp.h> 13#include <linux/smp.h>
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#include <linux/uaccess.h>
15 16
16#include <asm/uaccess.h>
17#include <asm/system.h> 17#include <asm/system.h>
18#include <asm/ldt.h> 18#include <asm/ldt.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
93 if (err < 0) 93 if (err < 0)
94 return err; 94 return err;
95 95
96 for(i = 0; i < old->size; i++) 96 for (i = 0; i < old->size; i++)
97 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); 97 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
98 return 0; 98 return 0;
99} 99}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b599518c322..c12314c9e86f 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
287 .set_mode = mfgpt_set_mode, 287 .set_mode = mfgpt_set_mode,
288 .set_next_event = mfgpt_next_event, 288 .set_next_event = mfgpt_next_event,
289 .rating = 250, 289 .rating = 250,
290 .cpumask = CPU_MASK_ALL, 290 .cpumask = cpu_all_mask,
291 .shift = 32 291 .shift = 32
292}; 292};
293 293
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index efc2f361fe85..666e43df51f9 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -13,8 +13,7 @@
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/acpi.h> 14#include <asm/acpi.h>
15#include <asm/mmconfig.h> 15#include <asm/mmconfig.h>
16 16#include <asm/pci_x86.h>
17#include "../pci/pci.h"
18 17
19struct pci_hostbridge_probe { 18struct pci_hostbridge_probe {
20 u32 bus; 19 u32 bus;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 45e3b69808ba..c5c5b8df1dbc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -16,14 +16,14 @@
16#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/acpi.h> 17#include <linux/acpi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp.h>
20#include <linux/acpi.h>
19 21
20#include <asm/smp.h>
21#include <asm/mtrr.h> 22#include <asm/mtrr.h>
22#include <asm/mpspec.h> 23#include <asm/mpspec.h>
23#include <asm/pgalloc.h> 24#include <asm/pgalloc.h>
24#include <asm/io_apic.h> 25#include <asm/io_apic.h>
25#include <asm/proto.h> 26#include <asm/proto.h>
26#include <asm/acpi.h>
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/e820.h> 28#include <asm/e820.h>
29#include <asm/trampoline.h> 29#include <asm/trampoline.h>
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
95#endif 95#endif
96 96
97 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { 97 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
98 set_bit(m->mpc_busid, mp_bus_not_pci); 98 set_bit(m->mpc_busid, mp_bus_not_pci);
99#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 99#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
100 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; 100 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
101#endif 101#endif
102 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { 102 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
104 x86_quirks->mpc_oem_pci_bus(m); 104 x86_quirks->mpc_oem_pci_bus(m);
105 105
106 clear_bit(m->mpc_busid, mp_bus_not_pci); 106 clear_bit(m->mpc_busid, mp_bus_not_pci);
107#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 107#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
108 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; 108 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
109 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { 109 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
110 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; 110 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8bd1bf9622a7..45a09ccdc214 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -26,11 +26,10 @@
26#include <linux/kernel_stat.h> 26#include <linux/kernel_stat.h>
27#include <linux/kdebug.h> 27#include <linux/kdebug.h>
28#include <linux/smp.h> 28#include <linux/smp.h>
29#include <linux/nmi.h>
29 30
30#include <asm/i8259.h> 31#include <asm/i8259.h>
31#include <asm/io_apic.h> 32#include <asm/io_apic.h>
32#include <asm/smp.h>
33#include <asm/nmi.h>
34#include <asm/proto.h> 33#include <asm/proto.h>
35#include <asm/timer.h> 34#include <asm/timer.h>
36 35
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a35eaa379ff6..00c2bcd41463 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */
52 * to trigger bugs with some popular PCI cards, in particular 3ware (but 52 * to trigger bugs with some popular PCI cards, in particular 3ware (but
53 * has been also also seen with Qlogic at least). 53 * has been also also seen with Qlogic at least).
54 */ 54 */
55int iommu_fullflush = 1; 55static int iommu_fullflush = 1;
56 56
57/* Allocation bitmap for the remapping area: */ 57/* Allocation bitmap for the remapping area: */
58static DEFINE_SPINLOCK(iommu_bitmap_lock); 58static DEFINE_SPINLOCK(iommu_bitmap_lock);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718df6eec..bf088c61fa40 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,8 @@
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/reboot_fixups.h> 13#include <asm/reboot_fixups.h>
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h>
16#include <asm/virtext.h>
15 17
16#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
17# include <linux/dmi.h> 19# include <linux/dmi.h>
@@ -23,7 +25,6 @@
23 25
24#include <mach_ipi.h> 26#include <mach_ipi.h>
25 27
26
27/* 28/*
28 * Power off function, if any 29 * Power off function, if any
29 */ 30 */
@@ -39,6 +40,12 @@ int reboot_force;
39static int reboot_cpu = -1; 40static int reboot_cpu = -1;
40#endif 41#endif
41 42
43/* This is set if we need to go through the 'emergency' path.
44 * When machine_emergency_restart() is called, we may be on
45 * an inconsistent state and won't be able to do a clean cleanup
46 */
47static int reboot_emergency;
48
42/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ 49/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
43bool port_cf9_safe = false; 50bool port_cf9_safe = false;
44 51
@@ -368,6 +375,48 @@ static inline void kb_wait(void)
368 } 375 }
369} 376}
370 377
378static void vmxoff_nmi(int cpu, struct die_args *args)
379{
380 cpu_emergency_vmxoff();
381}
382
383/* Use NMIs as IPIs to tell all CPUs to disable virtualization
384 */
385static void emergency_vmx_disable_all(void)
386{
387 /* Just make sure we won't change CPUs while doing this */
388 local_irq_disable();
389
390 /* We need to disable VMX on all CPUs before rebooting, otherwise
391 * we risk hanging up the machine, because the CPU ignore INIT
392 * signals when VMX is enabled.
393 *
394 * We can't take any locks and we may be on an inconsistent
395 * state, so we use NMIs as IPIs to tell the other CPUs to disable
396 * VMX and halt.
397 *
398 * For safety, we will avoid running the nmi_shootdown_cpus()
399 * stuff unnecessarily, but we don't have a way to check
400 * if other CPUs have VMX enabled. So we will call it only if the
401 * CPU we are running on has VMX enabled.
402 *
403 * We will miss cases where VMX is not enabled on all CPUs. This
404 * shouldn't do much harm because KVM always enable VMX on all
405 * CPUs anyway. But we can miss it on the small window where KVM
406 * is still enabling VMX.
407 */
408 if (cpu_has_vmx() && cpu_vmx_enabled()) {
409 /* Disable VMX on this CPU.
410 */
411 cpu_vmxoff();
412
413 /* Halt and disable VMX on the other CPUs */
414 nmi_shootdown_cpus(vmxoff_nmi);
415
416 }
417}
418
419
371void __attribute__((weak)) mach_reboot_fixups(void) 420void __attribute__((weak)) mach_reboot_fixups(void)
372{ 421{
373} 422}
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
376{ 425{
377 int i; 426 int i;
378 427
428 if (reboot_emergency)
429 emergency_vmx_disable_all();
430
379 /* Tell the BIOS if we want cold or warm reboot */ 431 /* Tell the BIOS if we want cold or warm reboot */
380 *((unsigned short *)__va(0x472)) = reboot_mode; 432 *((unsigned short *)__va(0x472)) = reboot_mode;
381 433
@@ -482,13 +534,19 @@ void native_machine_shutdown(void)
482#endif 534#endif
483} 535}
484 536
537static void __machine_emergency_restart(int emergency)
538{
539 reboot_emergency = emergency;
540 machine_ops.emergency_restart();
541}
542
485static void native_machine_restart(char *__unused) 543static void native_machine_restart(char *__unused)
486{ 544{
487 printk("machine restart\n"); 545 printk("machine restart\n");
488 546
489 if (!reboot_force) 547 if (!reboot_force)
490 machine_shutdown(); 548 machine_shutdown();
491 machine_emergency_restart(); 549 __machine_emergency_restart(0);
492} 550}
493 551
494static void native_machine_halt(void) 552static void native_machine_halt(void)
@@ -532,7 +590,7 @@ void machine_shutdown(void)
532 590
533void machine_emergency_restart(void) 591void machine_emergency_restart(void)
534{ 592{
535 machine_ops.emergency_restart(); 593 __machine_emergency_restart(1);
536} 594}
537 595
538void machine_restart(char *cmd) 596void machine_restart(char *cmd)
@@ -592,10 +650,7 @@ static int crash_nmi_callback(struct notifier_block *self,
592 650
593static void smp_send_nmi_allbutself(void) 651static void smp_send_nmi_allbutself(void)
594{ 652{
595 cpumask_t mask = cpu_online_map; 653 send_IPI_allbutself(NMI_VECTOR);
596 cpu_clear(safe_smp_processor_id(), mask);
597 if (!cpus_empty(mask))
598 send_IPI_mask(mask, NMI_VECTOR);
599} 654}
600 655
601static struct notifier_block crash_nmi_nb = { 656static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3bb770..0b63b08e7530 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -152,6 +152,11 @@ void __init setup_per_cpu_areas(void)
152 old_size = PERCPU_ENOUGH_ROOM; 152 old_size = PERCPU_ENOUGH_ROOM;
153 align = max_t(unsigned long, PAGE_SIZE, align); 153 align = max_t(unsigned long, PAGE_SIZE, align);
154 size = roundup(old_size, align); 154 size = roundup(old_size, align);
155
156 printk(KERN_INFO
157 "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
158 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
159
155 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 160 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
156 size); 161 size);
157 162
@@ -168,24 +173,24 @@ void __init setup_per_cpu_areas(void)
168 "cpu %d has no node %d or node-local memory\n", 173 "cpu %d has no node %d or node-local memory\n",
169 cpu, node); 174 cpu, node);
170 if (ptr) 175 if (ptr)
171 printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", 176 printk(KERN_DEBUG
177 "per cpu data for cpu%d at %016lx\n",
172 cpu, __pa(ptr)); 178 cpu, __pa(ptr));
173 } 179 }
174 else { 180 else {
175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 181 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
176 __pa(MAX_DMA_ADDRESS)); 182 __pa(MAX_DMA_ADDRESS));
177 if (ptr) 183 if (ptr)
178 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 184 printk(KERN_DEBUG
179 cpu, node, __pa(ptr)); 185 "per cpu data for cpu%d on node%d "
186 "at %016lx\n",
187 cpu, node, __pa(ptr));
180 } 188 }
181#endif 189#endif
182 per_cpu_offset(cpu) = ptr - __per_cpu_start; 190 per_cpu_offset(cpu) = ptr - __per_cpu_start;
183 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 191 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
184 } 192 }
185 193
186 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
187 NR_CPUS, nr_cpu_ids, nr_node_ids);
188
189 /* Setup percpu data maps */ 194 /* Setup percpu data maps */
190 setup_per_cpu_maps(); 195 setup_per_cpu_maps();
191 196
@@ -282,7 +287,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
282 else 287 else
283 cpu_clear(cpu, *mask); 288 cpu_clear(cpu, *mask);
284 289
285 cpulist_scnprintf(buf, sizeof(buf), *mask); 290 cpulist_scnprintf(buf, sizeof(buf), mask);
286 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 291 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
287 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); 292 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
288 } 293 }
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7e558db362c1..beea2649a240 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 118 WARN_ON(1);
119 return; 119 return;
120 } 120 }
121 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 121 send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 122}
123 123
124void native_send_call_func_single_ipi(int cpu) 124void native_send_call_func_single_ipi(int cpu)
125{ 125{
126 send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); 126 send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
127} 127}
128 128
129void native_send_call_func_ipi(cpumask_t mask) 129void native_send_call_func_ipi(const struct cpumask *mask)
130{ 130{
131 cpumask_t allbutself; 131 cpumask_t allbutself;
132 132
133 allbutself = cpu_online_map; 133 allbutself = cpu_online_map;
134 cpu_clear(smp_processor_id(), allbutself); 134 cpu_clear(smp_processor_id(), allbutself);
135 135
136 if (cpus_equal(mask, allbutself) && 136 if (cpus_equal(*mask, allbutself) &&
137 cpus_equal(cpu_online_map, cpu_callout_map)) 137 cpus_equal(cpu_online_map, cpu_callout_map))
138 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 138 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
139 else 139 else
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f8500c969442..31869bf5fabd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,14 +102,8 @@ EXPORT_SYMBOL(smp_num_siblings);
102/* Last level cache ID of each logical CPU */ 102/* Last level cache ID of each logical CPU */
103DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; 103DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
104 104
105/* bitmap of online cpus */
106cpumask_t cpu_online_map __read_mostly;
107EXPORT_SYMBOL(cpu_online_map);
108
109cpumask_t cpu_callin_map; 105cpumask_t cpu_callin_map;
110cpumask_t cpu_callout_map; 106cpumask_t cpu_callout_map;
111cpumask_t cpu_possible_map;
112EXPORT_SYMBOL(cpu_possible_map);
113 107
114/* representing HT siblings of each logical CPU */ 108/* representing HT siblings of each logical CPU */
115DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); 109DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@ -1260,6 +1254,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1260 check_nmi_watchdog(); 1254 check_nmi_watchdog();
1261} 1255}
1262 1256
1257static int __initdata setup_possible_cpus = -1;
1258static int __init _setup_possible_cpus(char *str)
1259{
1260 get_option(&str, &setup_possible_cpus);
1261 return 0;
1262}
1263early_param("possible_cpus", _setup_possible_cpus);
1264
1265
1263/* 1266/*
1264 * cpu_possible_map should be static, it cannot change as cpu's 1267 * cpu_possible_map should be static, it cannot change as cpu's
1265 * are onlined, or offlined. The reason is per-cpu data-structures 1268 * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1272,7 +1275,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1272 * 1275 *
1273 * Three ways to find out the number of additional hotplug CPUs: 1276 * Three ways to find out the number of additional hotplug CPUs:
1274 * - If the BIOS specified disabled CPUs in ACPI/mptables use that. 1277 * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
1275 * - The user can overwrite it with additional_cpus=NUM 1278 * - The user can overwrite it with possible_cpus=NUM
1276 * - Otherwise don't reserve additional CPUs. 1279 * - Otherwise don't reserve additional CPUs.
1277 * We do this because additional CPUs waste a lot of memory. 1280 * We do this because additional CPUs waste a lot of memory.
1278 * -AK 1281 * -AK
@@ -1285,9 +1288,17 @@ __init void prefill_possible_map(void)
1285 if (!num_processors) 1288 if (!num_processors)
1286 num_processors = 1; 1289 num_processors = 1;
1287 1290
1288 possible = num_processors + disabled_cpus; 1291 if (setup_possible_cpus == -1)
1289 if (possible > NR_CPUS) 1292 possible = num_processors + disabled_cpus;
1290 possible = NR_CPUS; 1293 else
1294 possible = setup_possible_cpus;
1295
1296 if (possible > CONFIG_NR_CPUS) {
1297 printk(KERN_WARNING
1298 "%d Processors exceeds NR_CPUS limit of %d\n",
1299 possible, CONFIG_NR_CPUS);
1300 possible = CONFIG_NR_CPUS;
1301 }
1291 1302
1292 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1303 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
1293 possible, max_t(int, possible - num_processors, 0)); 1304 possible, max_t(int, possible - num_processors, 0));
@@ -1352,7 +1363,7 @@ void cpu_disable_common(void)
1352 lock_vector_lock(); 1363 lock_vector_lock();
1353 remove_cpu_from_maps(cpu); 1364 remove_cpu_from_maps(cpu);
1354 unlock_vector_lock(); 1365 unlock_vector_lock();
1355 fixup_irqs(cpu_online_map); 1366 fixup_irqs();
1356} 1367}
1357 1368
1358int native_cpu_disable(void) 1369int native_cpu_disable(void)
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 8da059f949be..ce5054642247 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
163 * We have to send the IPI only to 163 * We have to send the IPI only to
164 * CPUs affected. 164 * CPUs affected.
165 */ 165 */
166 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); 166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
167 167
168 while (!cpus_empty(flush_cpumask)) 168 while (!cpus_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */ 169 /* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 29887d7081a9..f8be6f1d2e48 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 * We have to send the IPI only to 191 * We have to send the IPI only to
192 * CPUs affected. 192 * CPUs affected.
193 */ 193 */
194 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); 194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
195 195
196 while (!cpus_empty(f->flush_cpumask)) 196 while (!cpus_empty(f->flush_cpumask))
197 cpu_relax(); 197 cpu_relax();
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6a00e5faaa74..f885023167e0 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -582,7 +582,6 @@ static int __init uv_ptc_init(void)
582static struct bau_control * __init uv_table_bases_init(int blade, int node) 582static struct bau_control * __init uv_table_bases_init(int blade, int node)
583{ 583{
584 int i; 584 int i;
585 int *ip;
586 struct bau_msg_status *msp; 585 struct bau_msg_status *msp;
587 struct bau_control *bau_tabp; 586 struct bau_control *bau_tabp;
588 587
@@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
599 bau_cpubits_clear(&msp->seen_by, (int) 598 bau_cpubits_clear(&msp->seen_by, (int)
600 uv_blade_nr_possible_cpus(blade)); 599 uv_blade_nr_possible_cpus(blade));
601 600
602 bau_tabp->watching =
603 kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
604 BUG_ON(!bau_tabp->watching);
605
606 for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
607 *ip = 0;
608
609 uv_bau_table_bases[blade] = bau_tabp; 601 uv_bau_table_bases[blade] = bau_tabp;
610 602
611 return bau_tabp; 603 return bau_tabp;
@@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu,
628 bcp->bau_msg_head = bau_tablesp->va_queue_first; 620 bcp->bau_msg_head = bau_tablesp->va_queue_first;
629 bcp->va_queue_first = bau_tablesp->va_queue_first; 621 bcp->va_queue_first = bau_tablesp->va_queue_first;
630 bcp->va_queue_last = bau_tablesp->va_queue_last; 622 bcp->va_queue_last = bau_tablesp->va_queue_last;
631 bcp->watching = bau_tablesp->watching;
632 bcp->msg_statuses = bau_tablesp->msg_statuses; 623 bcp->msg_statuses = bau_tablesp->msg_statuses;
633 bcp->descriptor_base = adp; 624 bcp->descriptor_base = adp;
634 } 625 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 141907ab6e22..ce6650eb64e9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,9 +72,6 @@
72 72
73#include "cpu/mcheck/mce.h" 73#include "cpu/mcheck/mce.h"
74 74
75DECLARE_BITMAP(used_vectors, NR_VECTORS);
76EXPORT_SYMBOL_GPL(used_vectors);
77
78asmlinkage int system_call(void); 75asmlinkage int system_call(void);
79 76
80/* Do we ignore FPU interrupts ? */ 77/* Do we ignore FPU interrupts ? */
@@ -89,6 +86,9 @@ gate_desc idt_table[256]
89 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; 86 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
90#endif 87#endif
91 88
89DECLARE_BITMAP(used_vectors, NR_VECTORS);
90EXPORT_SYMBOL_GPL(used_vectors);
91
92static int ignore_nmis; 92static int ignore_nmis;
93 93
94static inline void conditional_sti(struct pt_regs *regs) 94static inline void conditional_sti(struct pt_regs *regs)
@@ -292,8 +292,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
292 tsk->thread.error_code = error_code; 292 tsk->thread.error_code = error_code;
293 tsk->thread.trap_no = 8; 293 tsk->thread.trap_no = 8;
294 294
295 /* This is always a kernel trap and never fixable (and thus must 295 /*
296 never return). */ 296 * This is always a kernel trap and never fixable (and thus must
297 * never return).
298 */
297 for (;;) 299 for (;;)
298 die(str, regs, error_code); 300 die(str, regs, error_code);
299} 301}
@@ -520,9 +522,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
520} 522}
521 523
522#ifdef CONFIG_X86_64 524#ifdef CONFIG_X86_64
523/* Help handler running on IST stack to switch back to user stack 525/*
524 for scheduling or signal handling. The actual stack switch is done in 526 * Help handler running on IST stack to switch back to user stack
525 entry.S */ 527 * for scheduling or signal handling. The actual stack switch is done in
528 * entry.S
529 */
526asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) 530asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
527{ 531{
528 struct pt_regs *regs = eregs; 532 struct pt_regs *regs = eregs;
@@ -532,8 +536,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
532 /* Exception from user space */ 536 /* Exception from user space */
533 else if (user_mode(eregs)) 537 else if (user_mode(eregs))
534 regs = task_pt_regs(current); 538 regs = task_pt_regs(current);
535 /* Exception from kernel and interrupts are enabled. Move to 539 /*
536 kernel process stack. */ 540 * Exception from kernel and interrupts are enabled. Move to
541 * kernel process stack.
542 */
537 else if (eregs->flags & X86_EFLAGS_IF) 543 else if (eregs->flags & X86_EFLAGS_IF)
538 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); 544 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
539 if (eregs != regs) 545 if (eregs != regs)
@@ -685,12 +691,7 @@ void math_error(void __user *ip)
685 cwd = get_fpu_cwd(task); 691 cwd = get_fpu_cwd(task);
686 swd = get_fpu_swd(task); 692 swd = get_fpu_swd(task);
687 693
688 err = swd & ~cwd & 0x3f; 694 err = swd & ~cwd;
689
690#ifdef CONFIG_X86_32
691 if (!err)
692 return;
693#endif
694 695
695 if (err & 0x001) { /* Invalid op */ 696 if (err & 0x001) { /* Invalid op */
696 /* 697 /*
@@ -708,7 +709,11 @@ void math_error(void __user *ip)
708 } else if (err & 0x020) { /* Precision */ 709 } else if (err & 0x020) { /* Precision */
709 info.si_code = FPE_FLTRES; 710 info.si_code = FPE_FLTRES;
710 } else { 711 } else {
711 info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ 712 /*
713 * If we're using IRQ 13, or supposedly even some trap 16
714 * implementations, it's possible we get a spurious trap...
715 */
716 return; /* Spurious trap, no error */
712 } 717 }
713 force_sig_info(SIGFPE, &info, task); 718 force_sig_info(SIGFPE, &info, task);
714} 719}
@@ -941,9 +946,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
941 946
942void __init trap_init(void) 947void __init trap_init(void)
943{ 948{
944#ifdef CONFIG_X86_32
945 int i; 949 int i;
946#endif
947 950
948#ifdef CONFIG_EISA 951#ifdef CONFIG_EISA
949 void __iomem *p = early_ioremap(0x0FFFD9, 4); 952 void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1000,11 +1003,15 @@ void __init trap_init(void)
1000 } 1003 }
1001 1004
1002 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 1005 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
1006#endif
1003 1007
1004 /* Reserve all the builtin and the syscall vector: */ 1008 /* Reserve all the builtin and the syscall vector: */
1005 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 1009 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
1006 set_bit(i, used_vectors); 1010 set_bit(i, used_vectors);
1007 1011
1012#ifdef CONFIG_X86_64
1013 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
1014#else
1008 set_bit(SYSCALL_VECTOR, used_vectors); 1015 set_bit(SYSCALL_VECTOR, used_vectors);
1009#endif 1016#endif
1010 /* 1017 /*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07f8635..c4c1f9e09402 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
226 /* Upper bound is clockevent's use of ulong for cycle deltas. */ 226 /* Upper bound is clockevent's use of ulong for cycle deltas. */
227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); 227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
228 evt->min_delta_ns = clockevent_delta2ns(1, evt); 228 evt->min_delta_ns = clockevent_delta2ns(1, evt);
229 evt->cpumask = cpumask_of_cpu(cpu); 229 evt->cpumask = cpumask_of(cpu);
230 230
231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", 231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
232 evt->name, evt->mult, evt->shift); 232 evt->name, evt->mult, evt->shift);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 15c3e6999182..2b54fe002e94 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf)
159 * Restore the extended state if present. Otherwise, restore the FP/SSE 159 * Restore the extended state if present. Otherwise, restore the FP/SSE
160 * state. 160 * state.
161 */ 161 */
162int restore_user_xstate(void __user *buf) 162static int restore_user_xstate(void __user *buf)
163{ 163{
164 struct _fpx_sw_bytes fx_sw_user; 164 struct _fpx_sw_bytes fx_sw_user;
165 u64 mask; 165 u64 mask;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 59ebd37ad79e..e665d1c623ca 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -603,10 +603,29 @@ void kvm_free_pit(struct kvm *kvm)
603 603
604static void __inject_pit_timer_intr(struct kvm *kvm) 604static void __inject_pit_timer_intr(struct kvm *kvm)
605{ 605{
606 struct kvm_vcpu *vcpu;
607 int i;
608
606 mutex_lock(&kvm->lock); 609 mutex_lock(&kvm->lock);
607 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); 610 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
608 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); 611 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
609 mutex_unlock(&kvm->lock); 612 mutex_unlock(&kvm->lock);
613
614 /*
615 * Provides NMI watchdog support via Virtual Wire mode.
616 * The route is: PIT -> PIC -> LVT0 in NMI mode.
617 *
618 * Note: Our Virtual Wire implementation is simplified, only
619 * propagating PIT interrupts to all VCPUs when they have set
620 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
621 * VCPU0, and only if its LVT0 is in EXTINT mode.
622 */
623 if (kvm->arch.vapics_in_nmi_mode > 0)
624 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
625 vcpu = kvm->vcpus[i];
626 if (vcpu)
627 kvm_apic_nmi_wd_deliver(vcpu);
628 }
610} 629}
611 630
612void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) 631void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 17e41e165f1a..179dcb0103fd 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -26,10 +26,40 @@
26 * Port from Qemu. 26 * Port from Qemu.
27 */ 27 */
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/bitops.h>
29#include "irq.h" 30#include "irq.h"
30 31
31#include <linux/kvm_host.h> 32#include <linux/kvm_host.h>
32 33
34static void pic_lock(struct kvm_pic *s)
35{
36 spin_lock(&s->lock);
37}
38
39static void pic_unlock(struct kvm_pic *s)
40{
41 struct kvm *kvm = s->kvm;
42 unsigned acks = s->pending_acks;
43 bool wakeup = s->wakeup_needed;
44 struct kvm_vcpu *vcpu;
45
46 s->pending_acks = 0;
47 s->wakeup_needed = false;
48
49 spin_unlock(&s->lock);
50
51 while (acks) {
52 kvm_notify_acked_irq(kvm, __ffs(acks));
53 acks &= acks - 1;
54 }
55
56 if (wakeup) {
57 vcpu = s->kvm->vcpus[0];
58 if (vcpu)
59 kvm_vcpu_kick(vcpu);
60 }
61}
62
33static void pic_clear_isr(struct kvm_kpic_state *s, int irq) 63static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
34{ 64{
35 s->isr &= ~(1 << irq); 65 s->isr &= ~(1 << irq);
@@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s)
136 166
137void kvm_pic_update_irq(struct kvm_pic *s) 167void kvm_pic_update_irq(struct kvm_pic *s)
138{ 168{
169 pic_lock(s);
139 pic_update_irq(s); 170 pic_update_irq(s);
171 pic_unlock(s);
140} 172}
141 173
142void kvm_pic_set_irq(void *opaque, int irq, int level) 174void kvm_pic_set_irq(void *opaque, int irq, int level)
143{ 175{
144 struct kvm_pic *s = opaque; 176 struct kvm_pic *s = opaque;
145 177
178 pic_lock(s);
146 if (irq >= 0 && irq < PIC_NUM_PINS) { 179 if (irq >= 0 && irq < PIC_NUM_PINS) {
147 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 180 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
148 pic_update_irq(s); 181 pic_update_irq(s);
149 } 182 }
183 pic_unlock(s);
150} 184}
151 185
152/* 186/*
@@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
172 int irq, irq2, intno; 206 int irq, irq2, intno;
173 struct kvm_pic *s = pic_irqchip(kvm); 207 struct kvm_pic *s = pic_irqchip(kvm);
174 208
209 pic_lock(s);
175 irq = pic_get_irq(&s->pics[0]); 210 irq = pic_get_irq(&s->pics[0]);
176 if (irq >= 0) { 211 if (irq >= 0) {
177 pic_intack(&s->pics[0], irq); 212 pic_intack(&s->pics[0], irq);
@@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
196 intno = s->pics[0].irq_base + irq; 231 intno = s->pics[0].irq_base + irq;
197 } 232 }
198 pic_update_irq(s); 233 pic_update_irq(s);
234 pic_unlock(s);
199 kvm_notify_acked_irq(kvm, irq); 235 kvm_notify_acked_irq(kvm, irq);
200 236
201 return intno; 237 return intno;
@@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
203 239
204void kvm_pic_reset(struct kvm_kpic_state *s) 240void kvm_pic_reset(struct kvm_kpic_state *s)
205{ 241{
206 int irq, irqbase; 242 int irq, irqbase, n;
207 struct kvm *kvm = s->pics_state->irq_request_opaque; 243 struct kvm *kvm = s->pics_state->irq_request_opaque;
208 struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; 244 struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
209 245
@@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
214 250
215 for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { 251 for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
216 if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) 252 if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
217 if (s->irr & (1 << irq) || s->isr & (1 << irq)) 253 if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
218 kvm_notify_acked_irq(kvm, irq+irqbase); 254 n = irq + irqbase;
255 s->pics_state->pending_acks |= 1 << n;
256 }
219 } 257 }
220 s->last_irr = 0; 258 s->last_irr = 0;
221 s->irr = 0; 259 s->irr = 0;
@@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this,
406 printk(KERN_ERR "PIC: non byte write\n"); 444 printk(KERN_ERR "PIC: non byte write\n");
407 return; 445 return;
408 } 446 }
447 pic_lock(s);
409 switch (addr) { 448 switch (addr) {
410 case 0x20: 449 case 0x20:
411 case 0x21: 450 case 0x21:
@@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this,
418 elcr_ioport_write(&s->pics[addr & 1], addr, data); 457 elcr_ioport_write(&s->pics[addr & 1], addr, data);
419 break; 458 break;
420 } 459 }
460 pic_unlock(s);
421} 461}
422 462
423static void picdev_read(struct kvm_io_device *this, 463static void picdev_read(struct kvm_io_device *this,
@@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this,
431 printk(KERN_ERR "PIC: non byte read\n"); 471 printk(KERN_ERR "PIC: non byte read\n");
432 return; 472 return;
433 } 473 }
474 pic_lock(s);
434 switch (addr) { 475 switch (addr) {
435 case 0x20: 476 case 0x20:
436 case 0x21: 477 case 0x21:
@@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this,
444 break; 485 break;
445 } 486 }
446 *(unsigned char *)val = data; 487 *(unsigned char *)val = data;
488 pic_unlock(s);
447} 489}
448 490
449/* 491/*
@@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level)
459 s->output = level; 501 s->output = level;
460 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { 502 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
461 s->pics[0].isr_ack &= ~(1 << irq); 503 s->pics[0].isr_ack &= ~(1 << irq);
462 kvm_vcpu_kick(vcpu); 504 s->wakeup_needed = true;
463 } 505 }
464} 506}
465 507
@@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
469 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); 511 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
470 if (!s) 512 if (!s)
471 return NULL; 513 return NULL;
514 spin_lock_init(&s->lock);
515 s->kvm = kvm;
472 s->pics[0].elcr_mask = 0xf8; 516 s->pics[0].elcr_mask = 0xf8;
473 s->pics[1].elcr_mask = 0xde; 517 s->pics[1].elcr_mask = 0xde;
474 s->irq_request = pic_irq_request; 518 s->irq_request = pic_irq_request;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index f17c8f5bbf31..2bf32a03ceec 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -25,6 +25,7 @@
25#include <linux/mm_types.h> 25#include <linux/mm_types.h>
26#include <linux/hrtimer.h> 26#include <linux/hrtimer.h>
27#include <linux/kvm_host.h> 27#include <linux/kvm_host.h>
28#include <linux/spinlock.h>
28 29
29#include "iodev.h" 30#include "iodev.h"
30#include "ioapic.h" 31#include "ioapic.h"
@@ -59,6 +60,10 @@ struct kvm_kpic_state {
59}; 60};
60 61
61struct kvm_pic { 62struct kvm_pic {
63 spinlock_t lock;
64 bool wakeup_needed;
65 unsigned pending_acks;
66 struct kvm *kvm;
62 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ 67 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
63 irq_request_func *irq_request; 68 irq_request_func *irq_request;
64 void *irq_request_opaque; 69 void *irq_request_opaque;
@@ -87,6 +92,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
87void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); 92void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
88void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 93void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
89void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 94void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
95void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
90void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); 96void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
91void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); 97void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
92void __kvm_migrate_timers(struct kvm_vcpu *vcpu); 98void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 65ef0fc2c036..8e5ee99551f6 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -7,7 +7,7 @@
7#include <linux/kvm_host.h> 7#include <linux/kvm_host.h>
8#include <asm/msr.h> 8#include <asm/msr.h>
9 9
10#include "svm.h" 10#include <asm/svm.h>
11 11
12static const u32 host_save_user_msrs[] = { 12static const u32 host_save_user_msrs[] = {
13#ifdef CONFIG_X86_64 13#ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0fc3cab48943..afac68c0815c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic)
130 return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; 130 return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
131} 131}
132 132
133static inline int apic_lvt_nmi_mode(u32 lvt_val)
134{
135 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
136}
137
133static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 138static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
134 LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ 139 LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
135 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 140 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
@@ -354,6 +359,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
354 359
355 case APIC_DM_NMI: 360 case APIC_DM_NMI:
356 kvm_inject_nmi(vcpu); 361 kvm_inject_nmi(vcpu);
362 kvm_vcpu_kick(vcpu);
357 break; 363 break;
358 364
359 case APIC_DM_INIT: 365 case APIC_DM_INIT:
@@ -380,6 +386,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
380 } 386 }
381 break; 387 break;
382 388
389 case APIC_DM_EXTINT:
390 /*
391 * Should only be called by kvm_apic_local_deliver() with LVT0,
392 * before NMI watchdog was enabled. Already handled by
393 * kvm_apic_accept_pic_intr().
394 */
395 break;
396
383 default: 397 default:
384 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 398 printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
385 delivery_mode); 399 delivery_mode);
@@ -663,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic)
663 apic->timer.period))); 677 apic->timer.period)));
664} 678}
665 679
680static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
681{
682 int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
683
684 if (apic_lvt_nmi_mode(lvt0_val)) {
685 if (!nmi_wd_enabled) {
686 apic_debug("Receive NMI setting on APIC_LVT0 "
687 "for cpu %d\n", apic->vcpu->vcpu_id);
688 apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
689 }
690 } else if (nmi_wd_enabled)
691 apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
692}
693
666static void apic_mmio_write(struct kvm_io_device *this, 694static void apic_mmio_write(struct kvm_io_device *this,
667 gpa_t address, int len, const void *data) 695 gpa_t address, int len, const void *data)
668{ 696{
@@ -743,10 +771,11 @@ static void apic_mmio_write(struct kvm_io_device *this,
743 apic_set_reg(apic, APIC_ICR2, val & 0xff000000); 771 apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
744 break; 772 break;
745 773
774 case APIC_LVT0:
775 apic_manage_nmi_watchdog(apic, val);
746 case APIC_LVTT: 776 case APIC_LVTT:
747 case APIC_LVTTHMR: 777 case APIC_LVTTHMR:
748 case APIC_LVTPC: 778 case APIC_LVTPC:
749 case APIC_LVT0:
750 case APIC_LVT1: 779 case APIC_LVT1:
751 case APIC_LVTERR: 780 case APIC_LVTERR:
752 /* TODO: Check vector */ 781 /* TODO: Check vector */
@@ -961,12 +990,26 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
961 return 0; 990 return 0;
962} 991}
963 992
964static int __inject_apic_timer_irq(struct kvm_lapic *apic) 993static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
994{
995 u32 reg = apic_get_reg(apic, lvt_type);
996 int vector, mode, trig_mode;
997
998 if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
999 vector = reg & APIC_VECTOR_MASK;
1000 mode = reg & APIC_MODE_MASK;
1001 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
1002 return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
1003 }
1004 return 0;
1005}
1006
1007void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
965{ 1008{
966 int vector; 1009 struct kvm_lapic *apic = vcpu->arch.apic;
967 1010
968 vector = apic_lvt_vector(apic, APIC_LVTT); 1011 if (apic)
969 return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); 1012 kvm_apic_local_deliver(apic, APIC_LVT0);
970} 1013}
971 1014
972static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 1015static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
@@ -1061,9 +1104,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1061{ 1104{
1062 struct kvm_lapic *apic = vcpu->arch.apic; 1105 struct kvm_lapic *apic = vcpu->arch.apic;
1063 1106
1064 if (apic && apic_lvt_enabled(apic, APIC_LVTT) && 1107 if (apic && atomic_read(&apic->timer.pending) > 0) {
1065 atomic_read(&apic->timer.pending) > 0) { 1108 if (kvm_apic_local_deliver(apic, APIC_LVTT))
1066 if (__inject_apic_timer_irq(apic))
1067 atomic_dec(&apic->timer.pending); 1109 atomic_dec(&apic->timer.pending);
1068 } 1110 }
1069} 1111}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 410ddbc1aa2e..83f11c7474a1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -17,7 +17,6 @@
17 * 17 *
18 */ 18 */
19 19
20#include "vmx.h"
21#include "mmu.h" 20#include "mmu.h"
22 21
23#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
@@ -33,6 +32,7 @@
33#include <asm/page.h> 32#include <asm/page.h>
34#include <asm/cmpxchg.h> 33#include <asm/cmpxchg.h>
35#include <asm/io.h> 34#include <asm/io.h>
35#include <asm/vmx.h>
36 36
37/* 37/*
38 * When setting this variable to true it enables Two-Dimensional-Paging 38 * When setting this variable to true it enables Two-Dimensional-Paging
@@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
168static u64 __read_mostly shadow_user_mask; 168static u64 __read_mostly shadow_user_mask;
169static u64 __read_mostly shadow_accessed_mask; 169static u64 __read_mostly shadow_accessed_mask;
170static u64 __read_mostly shadow_dirty_mask; 170static u64 __read_mostly shadow_dirty_mask;
171static u64 __read_mostly shadow_mt_mask;
171 172
172void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) 173void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
173{ 174{
@@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
183EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); 184EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
184 185
185void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 186void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
186 u64 dirty_mask, u64 nx_mask, u64 x_mask) 187 u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
187{ 188{
188 shadow_user_mask = user_mask; 189 shadow_user_mask = user_mask;
189 shadow_accessed_mask = accessed_mask; 190 shadow_accessed_mask = accessed_mask;
190 shadow_dirty_mask = dirty_mask; 191 shadow_dirty_mask = dirty_mask;
191 shadow_nx_mask = nx_mask; 192 shadow_nx_mask = nx_mask;
192 shadow_x_mask = x_mask; 193 shadow_x_mask = x_mask;
194 shadow_mt_mask = mt_mask;
193} 195}
194EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); 196EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
195 197
@@ -384,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
384{ 386{
385 int *write_count; 387 int *write_count;
386 388
387 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); 389 gfn = unalias_gfn(kvm, gfn);
390 write_count = slot_largepage_idx(gfn,
391 gfn_to_memslot_unaliased(kvm, gfn));
388 *write_count += 1; 392 *write_count += 1;
389} 393}
390 394
@@ -392,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
392{ 396{
393 int *write_count; 397 int *write_count;
394 398
395 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); 399 gfn = unalias_gfn(kvm, gfn);
400 write_count = slot_largepage_idx(gfn,
401 gfn_to_memslot_unaliased(kvm, gfn));
396 *write_count -= 1; 402 *write_count -= 1;
397 WARN_ON(*write_count < 0); 403 WARN_ON(*write_count < 0);
398} 404}
399 405
400static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) 406static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
401{ 407{
402 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 408 struct kvm_memory_slot *slot;
403 int *largepage_idx; 409 int *largepage_idx;
404 410
411 gfn = unalias_gfn(kvm, gfn);
412 slot = gfn_to_memslot_unaliased(kvm, gfn);
405 if (slot) { 413 if (slot) {
406 largepage_idx = slot_largepage_idx(gfn, slot); 414 largepage_idx = slot_largepage_idx(gfn, slot);
407 return *largepage_idx; 415 return *largepage_idx;
@@ -613,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
613 return NULL; 621 return NULL;
614} 622}
615 623
616static void rmap_write_protect(struct kvm *kvm, u64 gfn) 624static int rmap_write_protect(struct kvm *kvm, u64 gfn)
617{ 625{
618 unsigned long *rmapp; 626 unsigned long *rmapp;
619 u64 *spte; 627 u64 *spte;
@@ -659,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
659 spte = rmap_next(kvm, rmapp, spte); 667 spte = rmap_next(kvm, rmapp, spte);
660 } 668 }
661 669
662 if (write_protected) 670 return write_protected;
663 kvm_flush_remote_tlbs(kvm);
664} 671}
665 672
666static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) 673static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -786,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
786 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 793 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
787 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 794 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
788 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 795 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
796 INIT_LIST_HEAD(&sp->oos_link);
789 ASSERT(is_empty_shadow_page(sp->spt)); 797 ASSERT(is_empty_shadow_page(sp->spt));
790 sp->slot_bitmap = 0; 798 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
791 sp->multimapped = 0; 799 sp->multimapped = 0;
800 sp->global = 1;
792 sp->parent_pte = parent_pte; 801 sp->parent_pte = parent_pte;
793 --vcpu->kvm->arch.n_free_mmu_pages; 802 --vcpu->kvm->arch.n_free_mmu_pages;
794 return sp; 803 return sp;
@@ -900,8 +909,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte)
900 struct kvm_mmu_page *sp = page_header(__pa(spte)); 909 struct kvm_mmu_page *sp = page_header(__pa(spte));
901 910
902 index = spte - sp->spt; 911 index = spte - sp->spt;
903 __set_bit(index, sp->unsync_child_bitmap); 912 if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
904 sp->unsync_children = 1; 913 sp->unsync_children++;
914 WARN_ON(!sp->unsync_children);
905} 915}
906 916
907static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) 917static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@@ -928,7 +938,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
928 938
929static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 939static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
930{ 940{
931 sp->unsync_children = 1;
932 kvm_mmu_update_parents_unsync(sp); 941 kvm_mmu_update_parents_unsync(sp);
933 return 1; 942 return 1;
934} 943}
@@ -959,38 +968,66 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
959{ 968{
960} 969}
961 970
971#define KVM_PAGE_ARRAY_NR 16
972
973struct kvm_mmu_pages {
974 struct mmu_page_and_offset {
975 struct kvm_mmu_page *sp;
976 unsigned int idx;
977 } page[KVM_PAGE_ARRAY_NR];
978 unsigned int nr;
979};
980
962#define for_each_unsync_children(bitmap, idx) \ 981#define for_each_unsync_children(bitmap, idx) \
963 for (idx = find_first_bit(bitmap, 512); \ 982 for (idx = find_first_bit(bitmap, 512); \
964 idx < 512; \ 983 idx < 512; \
965 idx = find_next_bit(bitmap, 512, idx+1)) 984 idx = find_next_bit(bitmap, 512, idx+1))
966 985
967static int mmu_unsync_walk(struct kvm_mmu_page *sp, 986int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
968 struct kvm_unsync_walk *walker) 987 int idx)
969{ 988{
970 int i, ret; 989 int i;
971 990
972 if (!sp->unsync_children) 991 if (sp->unsync)
973 return 0; 992 for (i=0; i < pvec->nr; i++)
993 if (pvec->page[i].sp == sp)
994 return 0;
995
996 pvec->page[pvec->nr].sp = sp;
997 pvec->page[pvec->nr].idx = idx;
998 pvec->nr++;
999 return (pvec->nr == KVM_PAGE_ARRAY_NR);
1000}
1001
1002static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
1003 struct kvm_mmu_pages *pvec)
1004{
1005 int i, ret, nr_unsync_leaf = 0;
974 1006
975 for_each_unsync_children(sp->unsync_child_bitmap, i) { 1007 for_each_unsync_children(sp->unsync_child_bitmap, i) {
976 u64 ent = sp->spt[i]; 1008 u64 ent = sp->spt[i];
977 1009
978 if (is_shadow_present_pte(ent)) { 1010 if (is_shadow_present_pte(ent) && !is_large_pte(ent)) {
979 struct kvm_mmu_page *child; 1011 struct kvm_mmu_page *child;
980 child = page_header(ent & PT64_BASE_ADDR_MASK); 1012 child = page_header(ent & PT64_BASE_ADDR_MASK);
981 1013
982 if (child->unsync_children) { 1014 if (child->unsync_children) {
983 ret = mmu_unsync_walk(child, walker); 1015 if (mmu_pages_add(pvec, child, i))
984 if (ret) 1016 return -ENOSPC;
1017
1018 ret = __mmu_unsync_walk(child, pvec);
1019 if (!ret)
1020 __clear_bit(i, sp->unsync_child_bitmap);
1021 else if (ret > 0)
1022 nr_unsync_leaf += ret;
1023 else
985 return ret; 1024 return ret;
986 __clear_bit(i, sp->unsync_child_bitmap);
987 } 1025 }
988 1026
989 if (child->unsync) { 1027 if (child->unsync) {
990 ret = walker->entry(child, walker); 1028 nr_unsync_leaf++;
991 __clear_bit(i, sp->unsync_child_bitmap); 1029 if (mmu_pages_add(pvec, child, i))
992 if (ret) 1030 return -ENOSPC;
993 return ret;
994 } 1031 }
995 } 1032 }
996 } 1033 }
@@ -998,7 +1035,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
998 if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) 1035 if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
999 sp->unsync_children = 0; 1036 sp->unsync_children = 0;
1000 1037
1001 return 0; 1038 return nr_unsync_leaf;
1039}
1040
1041static int mmu_unsync_walk(struct kvm_mmu_page *sp,
1042 struct kvm_mmu_pages *pvec)
1043{
1044 if (!sp->unsync_children)
1045 return 0;
1046
1047 mmu_pages_add(pvec, sp, 0);
1048 return __mmu_unsync_walk(sp, pvec);
1002} 1049}
1003 1050
1004static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) 1051static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@@ -1021,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
1021 return NULL; 1068 return NULL;
1022} 1069}
1023 1070
1071static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
1072{
1073 list_del(&sp->oos_link);
1074 --kvm->stat.mmu_unsync_global;
1075}
1076
1024static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1077static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1025{ 1078{
1026 WARN_ON(!sp->unsync); 1079 WARN_ON(!sp->unsync);
1027 sp->unsync = 0; 1080 sp->unsync = 0;
1081 if (sp->global)
1082 kvm_unlink_unsync_global(kvm, sp);
1028 --kvm->stat.mmu_unsync; 1083 --kvm->stat.mmu_unsync;
1029} 1084}
1030 1085
@@ -1037,7 +1092,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1037 return 1; 1092 return 1;
1038 } 1093 }
1039 1094
1040 rmap_write_protect(vcpu->kvm, sp->gfn); 1095 if (rmap_write_protect(vcpu->kvm, sp->gfn))
1096 kvm_flush_remote_tlbs(vcpu->kvm);
1041 kvm_unlink_unsync_page(vcpu->kvm, sp); 1097 kvm_unlink_unsync_page(vcpu->kvm, sp);
1042 if (vcpu->arch.mmu.sync_page(vcpu, sp)) { 1098 if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
1043 kvm_mmu_zap_page(vcpu->kvm, sp); 1099 kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1048,30 +1104,89 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1048 return 0; 1104 return 0;
1049} 1105}
1050 1106
1051struct sync_walker { 1107struct mmu_page_path {
1052 struct kvm_vcpu *vcpu; 1108 struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
1053 struct kvm_unsync_walk walker; 1109 unsigned int idx[PT64_ROOT_LEVEL-1];
1054}; 1110};
1055 1111
1056static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) 1112#define for_each_sp(pvec, sp, parents, i) \
1113 for (i = mmu_pages_next(&pvec, &parents, -1), \
1114 sp = pvec.page[i].sp; \
1115 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
1116 i = mmu_pages_next(&pvec, &parents, i))
1117
1118int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
1119 int i)
1057{ 1120{
1058 struct sync_walker *sync_walk = container_of(walk, struct sync_walker, 1121 int n;
1059 walker);
1060 struct kvm_vcpu *vcpu = sync_walk->vcpu;
1061 1122
1062 kvm_sync_page(vcpu, sp); 1123 for (n = i+1; n < pvec->nr; n++) {
1063 return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); 1124 struct kvm_mmu_page *sp = pvec->page[n].sp;
1125
1126 if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
1127 parents->idx[0] = pvec->page[n].idx;
1128 return n;
1129 }
1130
1131 parents->parent[sp->role.level-2] = sp;
1132 parents->idx[sp->role.level-1] = pvec->page[n].idx;
1133 }
1134
1135 return n;
1064} 1136}
1065 1137
1066static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1138void mmu_pages_clear_parents(struct mmu_page_path *parents)
1067{ 1139{
1068 struct sync_walker walker = { 1140 struct kvm_mmu_page *sp;
1069 .walker = { .entry = mmu_sync_fn, }, 1141 unsigned int level = 0;
1070 .vcpu = vcpu, 1142
1071 }; 1143 do {
1144 unsigned int idx = parents->idx[level];
1145
1146 sp = parents->parent[level];
1147 if (!sp)
1148 return;
1149
1150 --sp->unsync_children;
1151 WARN_ON((int)sp->unsync_children < 0);
1152 __clear_bit(idx, sp->unsync_child_bitmap);
1153 level++;
1154 } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
1155}
1156
1157static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
1158 struct mmu_page_path *parents,
1159 struct kvm_mmu_pages *pvec)
1160{
1161 parents->parent[parent->role.level-1] = NULL;
1162 pvec->nr = 0;
1163}
1164
1165static void mmu_sync_children(struct kvm_vcpu *vcpu,
1166 struct kvm_mmu_page *parent)
1167{
1168 int i;
1169 struct kvm_mmu_page *sp;
1170 struct mmu_page_path parents;
1171 struct kvm_mmu_pages pages;
1172
1173 kvm_mmu_pages_init(parent, &parents, &pages);
1174 while (mmu_unsync_walk(parent, &pages)) {
1175 int protected = 0;
1072 1176
1073 while (mmu_unsync_walk(sp, &walker.walker)) 1177 for_each_sp(pages, sp, parents, i)
1178 protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
1179
1180 if (protected)
1181 kvm_flush_remote_tlbs(vcpu->kvm);
1182
1183 for_each_sp(pages, sp, parents, i) {
1184 kvm_sync_page(vcpu, sp);
1185 mmu_pages_clear_parents(&parents);
1186 }
1074 cond_resched_lock(&vcpu->kvm->mmu_lock); 1187 cond_resched_lock(&vcpu->kvm->mmu_lock);
1188 kvm_mmu_pages_init(parent, &parents, &pages);
1189 }
1075} 1190}
1076 1191
1077static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, 1192static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1129,7 +1244,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1129 sp->role = role; 1244 sp->role = role;
1130 hlist_add_head(&sp->hash_link, bucket); 1245 hlist_add_head(&sp->hash_link, bucket);
1131 if (!metaphysical) { 1246 if (!metaphysical) {
1132 rmap_write_protect(vcpu->kvm, gfn); 1247 if (rmap_write_protect(vcpu->kvm, gfn))
1248 kvm_flush_remote_tlbs(vcpu->kvm);
1133 account_shadowed(vcpu->kvm, gfn); 1249 account_shadowed(vcpu->kvm, gfn);
1134 } 1250 }
1135 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) 1251 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
@@ -1153,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker,
1153 if (level == PT32E_ROOT_LEVEL) { 1269 if (level == PT32E_ROOT_LEVEL) {
1154 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; 1270 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
1155 shadow_addr &= PT64_BASE_ADDR_MASK; 1271 shadow_addr &= PT64_BASE_ADDR_MASK;
1272 if (!shadow_addr)
1273 return 1;
1156 --level; 1274 --level;
1157 } 1275 }
1158 1276
@@ -1237,33 +1355,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
1237 } 1355 }
1238} 1356}
1239 1357
1240struct zap_walker { 1358static int mmu_zap_unsync_children(struct kvm *kvm,
1241 struct kvm_unsync_walk walker; 1359 struct kvm_mmu_page *parent)
1242 struct kvm *kvm;
1243 int zapped;
1244};
1245
1246static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
1247{ 1360{
1248 struct zap_walker *zap_walk = container_of(walk, struct zap_walker, 1361 int i, zapped = 0;
1249 walker); 1362 struct mmu_page_path parents;
1250 kvm_mmu_zap_page(zap_walk->kvm, sp); 1363 struct kvm_mmu_pages pages;
1251 zap_walk->zapped = 1;
1252 return 0;
1253}
1254 1364
1255static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) 1365 if (parent->role.level == PT_PAGE_TABLE_LEVEL)
1256{
1257 struct zap_walker walker = {
1258 .walker = { .entry = mmu_zap_fn, },
1259 .kvm = kvm,
1260 .zapped = 0,
1261 };
1262
1263 if (sp->role.level == PT_PAGE_TABLE_LEVEL)
1264 return 0; 1366 return 0;
1265 mmu_unsync_walk(sp, &walker.walker); 1367
1266 return walker.zapped; 1368 kvm_mmu_pages_init(parent, &parents, &pages);
1369 while (mmu_unsync_walk(parent, &pages)) {
1370 struct kvm_mmu_page *sp;
1371
1372 for_each_sp(pages, sp, parents, i) {
1373 kvm_mmu_zap_page(kvm, sp);
1374 mmu_pages_clear_parents(&parents);
1375 }
1376 zapped += pages.nr;
1377 kvm_mmu_pages_init(parent, &parents, &pages);
1378 }
1379
1380 return zapped;
1267} 1381}
1268 1382
1269static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1383static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -1362,7 +1476,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
1362 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); 1476 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
1363 struct kvm_mmu_page *sp = page_header(__pa(pte)); 1477 struct kvm_mmu_page *sp = page_header(__pa(pte));
1364 1478
1365 __set_bit(slot, &sp->slot_bitmap); 1479 __set_bit(slot, sp->slot_bitmap);
1366} 1480}
1367 1481
1368static void mmu_convert_notrap(struct kvm_mmu_page *sp) 1482static void mmu_convert_notrap(struct kvm_mmu_page *sp)
@@ -1393,6 +1507,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
1393 return page; 1507 return page;
1394} 1508}
1395 1509
1510/*
1511 * The function is based on mtrr_type_lookup() in
1512 * arch/x86/kernel/cpu/mtrr/generic.c
1513 */
1514static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
1515 u64 start, u64 end)
1516{
1517 int i;
1518 u64 base, mask;
1519 u8 prev_match, curr_match;
1520 int num_var_ranges = KVM_NR_VAR_MTRR;
1521
1522 if (!mtrr_state->enabled)
1523 return 0xFF;
1524
1525 /* Make end inclusive end, instead of exclusive */
1526 end--;
1527
1528 /* Look in fixed ranges. Just return the type as per start */
1529 if (mtrr_state->have_fixed && (start < 0x100000)) {
1530 int idx;
1531
1532 if (start < 0x80000) {
1533 idx = 0;
1534 idx += (start >> 16);
1535 return mtrr_state->fixed_ranges[idx];
1536 } else if (start < 0xC0000) {
1537 idx = 1 * 8;
1538 idx += ((start - 0x80000) >> 14);
1539 return mtrr_state->fixed_ranges[idx];
1540 } else if (start < 0x1000000) {
1541 idx = 3 * 8;
1542 idx += ((start - 0xC0000) >> 12);
1543 return mtrr_state->fixed_ranges[idx];
1544 }
1545 }
1546
1547 /*
1548 * Look in variable ranges
1549 * Look of multiple ranges matching this address and pick type
1550 * as per MTRR precedence
1551 */
1552 if (!(mtrr_state->enabled & 2))
1553 return mtrr_state->def_type;
1554
1555 prev_match = 0xFF;
1556 for (i = 0; i < num_var_ranges; ++i) {
1557 unsigned short start_state, end_state;
1558
1559 if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
1560 continue;
1561
1562 base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
1563 (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
1564 mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
1565 (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
1566
1567 start_state = ((start & mask) == (base & mask));
1568 end_state = ((end & mask) == (base & mask));
1569 if (start_state != end_state)
1570 return 0xFE;
1571
1572 if ((start & mask) != (base & mask))
1573 continue;
1574
1575 curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
1576 if (prev_match == 0xFF) {
1577 prev_match = curr_match;
1578 continue;
1579 }
1580
1581 if (prev_match == MTRR_TYPE_UNCACHABLE ||
1582 curr_match == MTRR_TYPE_UNCACHABLE)
1583 return MTRR_TYPE_UNCACHABLE;
1584
1585 if ((prev_match == MTRR_TYPE_WRBACK &&
1586 curr_match == MTRR_TYPE_WRTHROUGH) ||
1587 (prev_match == MTRR_TYPE_WRTHROUGH &&
1588 curr_match == MTRR_TYPE_WRBACK)) {
1589 prev_match = MTRR_TYPE_WRTHROUGH;
1590 curr_match = MTRR_TYPE_WRTHROUGH;
1591 }
1592
1593 if (prev_match != curr_match)
1594 return MTRR_TYPE_UNCACHABLE;
1595 }
1596
1597 if (prev_match != 0xFF)
1598 return prev_match;
1599
1600 return mtrr_state->def_type;
1601}
1602
1603static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
1604{
1605 u8 mtrr;
1606
1607 mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
1608 (gfn << PAGE_SHIFT) + PAGE_SIZE);
1609 if (mtrr == 0xfe || mtrr == 0xff)
1610 mtrr = MTRR_TYPE_WRBACK;
1611 return mtrr;
1612}
1613
1396static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1614static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1397{ 1615{
1398 unsigned index; 1616 unsigned index;
@@ -1409,9 +1627,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1409 if (s->role.word != sp->role.word) 1627 if (s->role.word != sp->role.word)
1410 return 1; 1628 return 1;
1411 } 1629 }
1412 kvm_mmu_mark_parents_unsync(vcpu, sp);
1413 ++vcpu->kvm->stat.mmu_unsync; 1630 ++vcpu->kvm->stat.mmu_unsync;
1414 sp->unsync = 1; 1631 sp->unsync = 1;
1632
1633 if (sp->global) {
1634 list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
1635 ++vcpu->kvm->stat.mmu_unsync_global;
1636 } else
1637 kvm_mmu_mark_parents_unsync(vcpu, sp);
1638
1415 mmu_convert_notrap(sp); 1639 mmu_convert_notrap(sp);
1416 return 0; 1640 return 0;
1417} 1641}
@@ -1437,11 +1661,24 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
1437static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1661static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1438 unsigned pte_access, int user_fault, 1662 unsigned pte_access, int user_fault,
1439 int write_fault, int dirty, int largepage, 1663 int write_fault, int dirty, int largepage,
1440 gfn_t gfn, pfn_t pfn, bool speculative, 1664 int global, gfn_t gfn, pfn_t pfn, bool speculative,
1441 bool can_unsync) 1665 bool can_unsync)
1442{ 1666{
1443 u64 spte; 1667 u64 spte;
1444 int ret = 0; 1668 int ret = 0;
1669 u64 mt_mask = shadow_mt_mask;
1670 struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
1671
1672 if (!(vcpu->arch.cr4 & X86_CR4_PGE))
1673 global = 0;
1674 if (!global && sp->global) {
1675 sp->global = 0;
1676 if (sp->unsync) {
1677 kvm_unlink_unsync_global(vcpu->kvm, sp);
1678 kvm_mmu_mark_parents_unsync(vcpu, sp);
1679 }
1680 }
1681
1445 /* 1682 /*
1446 * We don't set the accessed bit, since we sometimes want to see 1683 * We don't set the accessed bit, since we sometimes want to see
1447 * whether the guest actually used the pte (in order to detect 1684 * whether the guest actually used the pte (in order to detect
@@ -1460,6 +1697,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1460 spte |= shadow_user_mask; 1697 spte |= shadow_user_mask;
1461 if (largepage) 1698 if (largepage)
1462 spte |= PT_PAGE_SIZE_MASK; 1699 spte |= PT_PAGE_SIZE_MASK;
1700 if (mt_mask) {
1701 mt_mask = get_memory_type(vcpu, gfn) <<
1702 kvm_x86_ops->get_mt_mask_shift();
1703 spte |= mt_mask;
1704 }
1463 1705
1464 spte |= (u64)pfn << PAGE_SHIFT; 1706 spte |= (u64)pfn << PAGE_SHIFT;
1465 1707
@@ -1474,6 +1716,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1474 1716
1475 spte |= PT_WRITABLE_MASK; 1717 spte |= PT_WRITABLE_MASK;
1476 1718
1719 /*
1720 * Optimization: for pte sync, if spte was writable the hash
1721 * lookup is unnecessary (and expensive). Write protection
1722 * is responsibility of mmu_get_page / kvm_sync_page.
1723 * Same reasoning can be applied to dirty page accounting.
1724 */
1725 if (!can_unsync && is_writeble_pte(*shadow_pte))
1726 goto set_pte;
1727
1477 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { 1728 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
1478 pgprintk("%s: found shadow page for %lx, marking ro\n", 1729 pgprintk("%s: found shadow page for %lx, marking ro\n",
1479 __func__, gfn); 1730 __func__, gfn);
@@ -1495,8 +1746,8 @@ set_pte:
1495static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1746static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1496 unsigned pt_access, unsigned pte_access, 1747 unsigned pt_access, unsigned pte_access,
1497 int user_fault, int write_fault, int dirty, 1748 int user_fault, int write_fault, int dirty,
1498 int *ptwrite, int largepage, gfn_t gfn, 1749 int *ptwrite, int largepage, int global,
1499 pfn_t pfn, bool speculative) 1750 gfn_t gfn, pfn_t pfn, bool speculative)
1500{ 1751{
1501 int was_rmapped = 0; 1752 int was_rmapped = 0;
1502 int was_writeble = is_writeble_pte(*shadow_pte); 1753 int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1529,7 +1780,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1529 } 1780 }
1530 } 1781 }
1531 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1782 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
1532 dirty, largepage, gfn, pfn, speculative, true)) { 1783 dirty, largepage, global, gfn, pfn, speculative, true)) {
1533 if (write_fault) 1784 if (write_fault)
1534 *ptwrite = 1; 1785 *ptwrite = 1;
1535 kvm_x86_ops->tlb_flush(vcpu); 1786 kvm_x86_ops->tlb_flush(vcpu);
@@ -1586,7 +1837,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
1586 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { 1837 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
1587 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, 1838 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
1588 0, walk->write, 1, &walk->pt_write, 1839 0, walk->write, 1, &walk->pt_write,
1589 walk->largepage, gfn, walk->pfn, false); 1840 walk->largepage, 0, gfn, walk->pfn, false);
1590 ++vcpu->stat.pf_fixed; 1841 ++vcpu->stat.pf_fixed;
1591 return 1; 1842 return 1;
1592 } 1843 }
@@ -1773,6 +2024,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
1773 } 2024 }
1774} 2025}
1775 2026
2027static void mmu_sync_global(struct kvm_vcpu *vcpu)
2028{
2029 struct kvm *kvm = vcpu->kvm;
2030 struct kvm_mmu_page *sp, *n;
2031
2032 list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
2033 kvm_sync_page(vcpu, sp);
2034}
2035
1776void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) 2036void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
1777{ 2037{
1778 spin_lock(&vcpu->kvm->mmu_lock); 2038 spin_lock(&vcpu->kvm->mmu_lock);
@@ -1780,6 +2040,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
1780 spin_unlock(&vcpu->kvm->mmu_lock); 2040 spin_unlock(&vcpu->kvm->mmu_lock);
1781} 2041}
1782 2042
2043void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
2044{
2045 spin_lock(&vcpu->kvm->mmu_lock);
2046 mmu_sync_global(vcpu);
2047 spin_unlock(&vcpu->kvm->mmu_lock);
2048}
2049
1783static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) 2050static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
1784{ 2051{
1785 return vaddr; 2052 return vaddr;
@@ -2178,7 +2445,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
2178} 2445}
2179 2446
2180void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 2447void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2181 const u8 *new, int bytes) 2448 const u8 *new, int bytes,
2449 bool guest_initiated)
2182{ 2450{
2183 gfn_t gfn = gpa >> PAGE_SHIFT; 2451 gfn_t gfn = gpa >> PAGE_SHIFT;
2184 struct kvm_mmu_page *sp; 2452 struct kvm_mmu_page *sp;
@@ -2204,15 +2472,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2204 kvm_mmu_free_some_pages(vcpu); 2472 kvm_mmu_free_some_pages(vcpu);
2205 ++vcpu->kvm->stat.mmu_pte_write; 2473 ++vcpu->kvm->stat.mmu_pte_write;
2206 kvm_mmu_audit(vcpu, "pre pte write"); 2474 kvm_mmu_audit(vcpu, "pre pte write");
2207 if (gfn == vcpu->arch.last_pt_write_gfn 2475 if (guest_initiated) {
2208 && !last_updated_pte_accessed(vcpu)) { 2476 if (gfn == vcpu->arch.last_pt_write_gfn
2209 ++vcpu->arch.last_pt_write_count; 2477 && !last_updated_pte_accessed(vcpu)) {
2210 if (vcpu->arch.last_pt_write_count >= 3) 2478 ++vcpu->arch.last_pt_write_count;
2211 flooded = 1; 2479 if (vcpu->arch.last_pt_write_count >= 3)
2212 } else { 2480 flooded = 1;
2213 vcpu->arch.last_pt_write_gfn = gfn; 2481 } else {
2214 vcpu->arch.last_pt_write_count = 1; 2482 vcpu->arch.last_pt_write_gfn = gfn;
2215 vcpu->arch.last_pte_updated = NULL; 2483 vcpu->arch.last_pt_write_count = 1;
2484 vcpu->arch.last_pte_updated = NULL;
2485 }
2216 } 2486 }
2217 index = kvm_page_table_hashfn(gfn); 2487 index = kvm_page_table_hashfn(gfn);
2218 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 2488 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@@ -2352,9 +2622,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
2352 2622
2353void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) 2623void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
2354{ 2624{
2355 spin_lock(&vcpu->kvm->mmu_lock);
2356 vcpu->arch.mmu.invlpg(vcpu, gva); 2625 vcpu->arch.mmu.invlpg(vcpu, gva);
2357 spin_unlock(&vcpu->kvm->mmu_lock);
2358 kvm_mmu_flush_tlb(vcpu); 2626 kvm_mmu_flush_tlb(vcpu);
2359 ++vcpu->stat.invlpg; 2627 ++vcpu->stat.invlpg;
2360} 2628}
@@ -2451,7 +2719,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
2451 int i; 2719 int i;
2452 u64 *pt; 2720 u64 *pt;
2453 2721
2454 if (!test_bit(slot, &sp->slot_bitmap)) 2722 if (!test_bit(slot, sp->slot_bitmap))
2455 continue; 2723 continue;
2456 2724
2457 pt = sp->spt; 2725 pt = sp->spt;
@@ -2860,8 +3128,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
2860 if (sp->role.metaphysical) 3128 if (sp->role.metaphysical)
2861 continue; 3129 continue;
2862 3130
2863 slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
2864 gfn = unalias_gfn(vcpu->kvm, sp->gfn); 3131 gfn = unalias_gfn(vcpu->kvm, sp->gfn);
3132 slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
2865 rmapp = &slot->rmap[gfn - slot->base_gfn]; 3133 rmapp = &slot->rmap[gfn - slot->base_gfn];
2866 if (*rmapp) 3134 if (*rmapp)
2867 printk(KERN_ERR "%s: (%s) shadow page has writable" 3135 printk(KERN_ERR "%s: (%s) shadow page has writable"
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 84eee43bbe74..9fd78b6e17ad 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -82,6 +82,7 @@ struct shadow_walker {
82 int *ptwrite; 82 int *ptwrite;
83 pfn_t pfn; 83 pfn_t pfn;
84 u64 *sptep; 84 u64 *sptep;
85 gpa_t pte_gpa;
85}; 86};
86 87
87static gfn_t gpte_to_gfn(pt_element_t gpte) 88static gfn_t gpte_to_gfn(pt_element_t gpte)
@@ -222,7 +223,7 @@ walk:
222 if (ret) 223 if (ret)
223 goto walk; 224 goto walk;
224 pte |= PT_DIRTY_MASK; 225 pte |= PT_DIRTY_MASK;
225 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); 226 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
226 walker->ptes[walker->level - 1] = pte; 227 walker->ptes[walker->level - 1] = pte;
227 } 228 }
228 229
@@ -274,7 +275,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
274 return; 275 return;
275 kvm_get_pfn(pfn); 276 kvm_get_pfn(pfn);
276 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 277 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
277 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), 278 gpte & PT_DIRTY_MASK, NULL, largepage,
279 gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
278 pfn, true); 280 pfn, true);
279} 281}
280 282
@@ -301,8 +303,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
301 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, 303 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
302 sw->user_fault, sw->write_fault, 304 sw->user_fault, sw->write_fault,
303 gw->ptes[gw->level-1] & PT_DIRTY_MASK, 305 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
304 sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, 306 sw->ptwrite, sw->largepage,
305 false); 307 gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
308 gw->gfn, sw->pfn, false);
306 sw->sptep = sptep; 309 sw->sptep = sptep;
307 return 1; 310 return 1;
308 } 311 }
@@ -466,10 +469,22 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
466 struct kvm_vcpu *vcpu, u64 addr, 469 struct kvm_vcpu *vcpu, u64 addr,
467 u64 *sptep, int level) 470 u64 *sptep, int level)
468{ 471{
472 struct shadow_walker *sw =
473 container_of(_sw, struct shadow_walker, walker);
469 474
470 if (level == PT_PAGE_TABLE_LEVEL) { 475 /* FIXME: properly handle invlpg on large guest pages */
471 if (is_shadow_present_pte(*sptep)) 476 if (level == PT_PAGE_TABLE_LEVEL ||
477 ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
478 struct kvm_mmu_page *sp = page_header(__pa(sptep));
479
480 sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
481 sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
482
483 if (is_shadow_present_pte(*sptep)) {
472 rmap_remove(vcpu->kvm, sptep); 484 rmap_remove(vcpu->kvm, sptep);
485 if (is_large_pte(*sptep))
486 --vcpu->kvm->stat.lpages;
487 }
473 set_shadow_pte(sptep, shadow_trap_nonpresent_pte); 488 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
474 return 1; 489 return 1;
475 } 490 }
@@ -480,11 +495,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
480 495
481static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 496static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
482{ 497{
498 pt_element_t gpte;
483 struct shadow_walker walker = { 499 struct shadow_walker walker = {
484 .walker = { .entry = FNAME(shadow_invlpg_entry), }, 500 .walker = { .entry = FNAME(shadow_invlpg_entry), },
501 .pte_gpa = -1,
485 }; 502 };
486 503
504 spin_lock(&vcpu->kvm->mmu_lock);
487 walk_shadow(&walker.walker, vcpu, gva); 505 walk_shadow(&walker.walker, vcpu, gva);
506 spin_unlock(&vcpu->kvm->mmu_lock);
507 if (walker.pte_gpa == -1)
508 return;
509 if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
510 sizeof(pt_element_t)))
511 return;
512 if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
513 if (mmu_topup_memory_caches(vcpu))
514 return;
515 kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
516 sizeof(pt_element_t), 0);
517 }
488} 518}
489 519
490static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 520static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -580,7 +610,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
580 nr_present++; 610 nr_present++;
581 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 611 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
582 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 612 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
583 is_dirty_pte(gpte), 0, gfn, 613 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
584 spte_to_pfn(sp->spt[i]), true, false); 614 spte_to_pfn(sp->spt[i]), true, false);
585 } 615 }
586 616
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9c4ce657d963..1452851ae258 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,8 @@
28 28
29#include <asm/desc.h> 29#include <asm/desc.h>
30 30
31#include <asm/virtext.h>
32
31#define __ex(x) __kvm_handle_fault_on_reboot(x) 33#define __ex(x) __kvm_handle_fault_on_reboot(x)
32 34
33MODULE_AUTHOR("Qumranet"); 35MODULE_AUTHOR("Qumranet");
@@ -245,34 +247,19 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
245 247
246static int has_svm(void) 248static int has_svm(void)
247{ 249{
248 uint32_t eax, ebx, ecx, edx; 250 const char *msg;
249
250 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
251 printk(KERN_INFO "has_svm: not amd\n");
252 return 0;
253 }
254 251
255 cpuid(0x80000000, &eax, &ebx, &ecx, &edx); 252 if (!cpu_has_svm(&msg)) {
256 if (eax < SVM_CPUID_FUNC) { 253 printk(KERN_INFO "has_svn: %s\n", msg);
257 printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
258 return 0; 254 return 0;
259 } 255 }
260 256
261 cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
262 if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
263 printk(KERN_DEBUG "has_svm: svm not available\n");
264 return 0;
265 }
266 return 1; 257 return 1;
267} 258}
268 259
269static void svm_hardware_disable(void *garbage) 260static void svm_hardware_disable(void *garbage)
270{ 261{
271 uint64_t efer; 262 cpu_svm_disable();
272
273 wrmsrl(MSR_VM_HSAVE_PA, 0);
274 rdmsrl(MSR_EFER, efer);
275 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
276} 263}
277 264
278static void svm_hardware_enable(void *garbage) 265static void svm_hardware_enable(void *garbage)
@@ -772,6 +759,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
772 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 759 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
773 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 760 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
774 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 761 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
762
763 /*
764 * SVM always stores 0 for the 'G' bit in the CS selector in
765 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
766 * Intel's VMENTRY has a check on the 'G' bit.
767 */
768 if (seg == VCPU_SREG_CS)
769 var->g = s->limit > 0xfffff;
770
771 /*
772 * Work around a bug where the busy flag in the tr selector
773 * isn't exposed
774 */
775 if (seg == VCPU_SREG_TR)
776 var->type |= 0x2;
777
775 var->unusable = !var->present; 778 var->unusable = !var->present;
776} 779}
777 780
@@ -1099,6 +1102,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1099 rep = (io_info & SVM_IOIO_REP_MASK) != 0; 1102 rep = (io_info & SVM_IOIO_REP_MASK) != 0;
1100 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; 1103 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
1101 1104
1105 skip_emulated_instruction(&svm->vcpu);
1102 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1106 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
1103} 1107}
1104 1108
@@ -1912,6 +1916,11 @@ static int get_npt_level(void)
1912#endif 1916#endif
1913} 1917}
1914 1918
1919static int svm_get_mt_mask_shift(void)
1920{
1921 return 0;
1922}
1923
1915static struct kvm_x86_ops svm_x86_ops = { 1924static struct kvm_x86_ops svm_x86_ops = {
1916 .cpu_has_kvm_support = has_svm, 1925 .cpu_has_kvm_support = has_svm,
1917 .disabled_by_bios = is_disabled, 1926 .disabled_by_bios = is_disabled,
@@ -1967,6 +1976,7 @@ static struct kvm_x86_ops svm_x86_ops = {
1967 1976
1968 .set_tss_addr = svm_set_tss_addr, 1977 .set_tss_addr = svm_set_tss_addr,
1969 .get_tdp_level = get_npt_level, 1978 .get_tdp_level = get_npt_level,
1979 .get_mt_mask_shift = svm_get_mt_mask_shift,
1970}; 1980};
1971 1981
1972static int __init svm_init(void) 1982static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4018b01e1f9..6259d7467648 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -16,7 +16,6 @@
16 */ 16 */
17 17
18#include "irq.h" 18#include "irq.h"
19#include "vmx.h"
20#include "mmu.h" 19#include "mmu.h"
21 20
22#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
@@ -31,6 +30,8 @@
31 30
32#include <asm/io.h> 31#include <asm/io.h>
33#include <asm/desc.h> 32#include <asm/desc.h>
33#include <asm/vmx.h>
34#include <asm/virtext.h>
34 35
35#define __ex(x) __kvm_handle_fault_on_reboot(x) 36#define __ex(x) __kvm_handle_fault_on_reboot(x)
36 37
@@ -90,6 +91,11 @@ struct vcpu_vmx {
90 } rmode; 91 } rmode;
91 int vpid; 92 int vpid;
92 bool emulation_required; 93 bool emulation_required;
94
95 /* Support for vnmi-less CPUs */
96 int soft_vnmi_blocked;
97 ktime_t entry_time;
98 s64 vnmi_blocked_time;
93}; 99};
94 100
95static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 101static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -122,7 +128,7 @@ static struct vmcs_config {
122 u32 vmentry_ctrl; 128 u32 vmentry_ctrl;
123} vmcs_config; 129} vmcs_config;
124 130
125struct vmx_capability { 131static struct vmx_capability {
126 u32 ept; 132 u32 ept;
127 u32 vpid; 133 u32 vpid;
128} vmx_capability; 134} vmx_capability;
@@ -957,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
957 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); 963 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
958 964
959 break; 965 break;
966 case MSR_IA32_CR_PAT:
967 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
968 vmcs_write64(GUEST_IA32_PAT, data);
969 vcpu->arch.pat = data;
970 break;
971 }
972 /* Otherwise falls through to kvm_set_msr_common */
960 default: 973 default:
961 vmx_load_host_state(vmx); 974 vmx_load_host_state(vmx);
962 msr = find_msr_entry(vmx, msr_index); 975 msr = find_msr_entry(vmx, msr_index);
@@ -1032,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
1032 1045
1033static __init int cpu_has_kvm_support(void) 1046static __init int cpu_has_kvm_support(void)
1034{ 1047{
1035 unsigned long ecx = cpuid_ecx(1); 1048 return cpu_has_vmx();
1036 return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
1037} 1049}
1038 1050
1039static __init int vmx_disabled_by_bios(void) 1051static __init int vmx_disabled_by_bios(void)
@@ -1079,13 +1091,22 @@ static void vmclear_local_vcpus(void)
1079 __vcpu_clear(vmx); 1091 __vcpu_clear(vmx);
1080} 1092}
1081 1093
1082static void hardware_disable(void *garbage) 1094
1095/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
1096 * tricks.
1097 */
1098static void kvm_cpu_vmxoff(void)
1083{ 1099{
1084 vmclear_local_vcpus();
1085 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 1100 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
1086 write_cr4(read_cr4() & ~X86_CR4_VMXE); 1101 write_cr4(read_cr4() & ~X86_CR4_VMXE);
1087} 1102}
1088 1103
1104static void hardware_disable(void *garbage)
1105{
1106 vmclear_local_vcpus();
1107 kvm_cpu_vmxoff();
1108}
1109
1089static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, 1110static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
1090 u32 msr, u32 *result) 1111 u32 msr, u32 *result)
1091{ 1112{
@@ -1176,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1176#ifdef CONFIG_X86_64 1197#ifdef CONFIG_X86_64
1177 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 1198 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
1178#endif 1199#endif
1179 opt = 0; 1200 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
1180 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 1201 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
1181 &_vmexit_control) < 0) 1202 &_vmexit_control) < 0)
1182 return -EIO; 1203 return -EIO;
1183 1204
1184 min = opt = 0; 1205 min = 0;
1206 opt = VM_ENTRY_LOAD_IA32_PAT;
1185 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 1207 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
1186 &_vmentry_control) < 0) 1208 &_vmentry_control) < 0)
1187 return -EIO; 1209 return -EIO;
@@ -2087,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
2087 */ 2109 */
2088static int vmx_vcpu_setup(struct vcpu_vmx *vmx) 2110static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2089{ 2111{
2090 u32 host_sysenter_cs; 2112 u32 host_sysenter_cs, msr_low, msr_high;
2091 u32 junk; 2113 u32 junk;
2114 u64 host_pat;
2092 unsigned long a; 2115 unsigned long a;
2093 struct descriptor_table dt; 2116 struct descriptor_table dt;
2094 int i; 2117 int i;
@@ -2176,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2176 rdmsrl(MSR_IA32_SYSENTER_EIP, a); 2199 rdmsrl(MSR_IA32_SYSENTER_EIP, a);
2177 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ 2200 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
2178 2201
2202 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
2203 rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
2204 host_pat = msr_low | ((u64) msr_high << 32);
2205 vmcs_write64(HOST_IA32_PAT, host_pat);
2206 }
2207 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2208 rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
2209 host_pat = msr_low | ((u64) msr_high << 32);
2210 /* Write the default value follow host pat */
2211 vmcs_write64(GUEST_IA32_PAT, host_pat);
2212 /* Keep arch.pat sync with GUEST_IA32_PAT */
2213 vmx->vcpu.arch.pat = host_pat;
2214 }
2215
2179 for (i = 0; i < NR_VMX_MSR; ++i) { 2216 for (i = 0; i < NR_VMX_MSR; ++i) {
2180 u32 index = vmx_msr_index[i]; 2217 u32 index = vmx_msr_index[i];
2181 u32 data_low, data_high; 2218 u32 data_low, data_high;
@@ -2230,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2230 2267
2231 vmx->vcpu.arch.rmode.active = 0; 2268 vmx->vcpu.arch.rmode.active = 0;
2232 2269
2270 vmx->soft_vnmi_blocked = 0;
2271
2233 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 2272 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
2234 kvm_set_cr8(&vmx->vcpu, 0); 2273 kvm_set_cr8(&vmx->vcpu, 0);
2235 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 2274 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2335,6 +2374,29 @@ out:
2335 return ret; 2374 return ret;
2336} 2375}
2337 2376
2377static void enable_irq_window(struct kvm_vcpu *vcpu)
2378{
2379 u32 cpu_based_vm_exec_control;
2380
2381 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2382 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2383 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2384}
2385
2386static void enable_nmi_window(struct kvm_vcpu *vcpu)
2387{
2388 u32 cpu_based_vm_exec_control;
2389
2390 if (!cpu_has_virtual_nmis()) {
2391 enable_irq_window(vcpu);
2392 return;
2393 }
2394
2395 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2396 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
2397 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2398}
2399
2338static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) 2400static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2339{ 2401{
2340 struct vcpu_vmx *vmx = to_vmx(vcpu); 2402 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2358,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2358 2420
2359static void vmx_inject_nmi(struct kvm_vcpu *vcpu) 2421static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2360{ 2422{
2423 struct vcpu_vmx *vmx = to_vmx(vcpu);
2424
2425 if (!cpu_has_virtual_nmis()) {
2426 /*
2427 * Tracking the NMI-blocked state in software is built upon
2428 * finding the next open IRQ window. This, in turn, depends on
2429 * well-behaving guests: They have to keep IRQs disabled at
2430 * least as long as the NMI handler runs. Otherwise we may
2431 * cause NMI nesting, maybe breaking the guest. But as this is
2432 * highly unlikely, we can live with the residual risk.
2433 */
2434 vmx->soft_vnmi_blocked = 1;
2435 vmx->vnmi_blocked_time = 0;
2436 }
2437
2438 ++vcpu->stat.nmi_injections;
2439 if (vcpu->arch.rmode.active) {
2440 vmx->rmode.irq.pending = true;
2441 vmx->rmode.irq.vector = NMI_VECTOR;
2442 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
2443 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2444 NMI_VECTOR | INTR_TYPE_SOFT_INTR |
2445 INTR_INFO_VALID_MASK);
2446 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
2447 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
2448 return;
2449 }
2361 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2450 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2362 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 2451 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
2363} 2452}
2364 2453
2454static void vmx_update_window_states(struct kvm_vcpu *vcpu)
2455{
2456 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2457
2458 vcpu->arch.nmi_window_open =
2459 !(guest_intr & (GUEST_INTR_STATE_STI |
2460 GUEST_INTR_STATE_MOV_SS |
2461 GUEST_INTR_STATE_NMI));
2462 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
2463 vcpu->arch.nmi_window_open = 0;
2464
2465 vcpu->arch.interrupt_window_open =
2466 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2467 !(guest_intr & (GUEST_INTR_STATE_STI |
2468 GUEST_INTR_STATE_MOV_SS)));
2469}
2470
2365static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 2471static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2366{ 2472{
2367 int word_index = __ffs(vcpu->arch.irq_summary); 2473 int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2374,40 +2480,49 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2374 kvm_queue_interrupt(vcpu, irq); 2480 kvm_queue_interrupt(vcpu, irq);
2375} 2481}
2376 2482
2377
2378static void do_interrupt_requests(struct kvm_vcpu *vcpu, 2483static void do_interrupt_requests(struct kvm_vcpu *vcpu,
2379 struct kvm_run *kvm_run) 2484 struct kvm_run *kvm_run)
2380{ 2485{
2381 u32 cpu_based_vm_exec_control; 2486 vmx_update_window_states(vcpu);
2382
2383 vcpu->arch.interrupt_window_open =
2384 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2385 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
2386 2487
2387 if (vcpu->arch.interrupt_window_open && 2488 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
2388 vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) 2489 if (vcpu->arch.interrupt.pending) {
2389 kvm_do_inject_irq(vcpu); 2490 enable_nmi_window(vcpu);
2491 } else if (vcpu->arch.nmi_window_open) {
2492 vcpu->arch.nmi_pending = false;
2493 vcpu->arch.nmi_injected = true;
2494 } else {
2495 enable_nmi_window(vcpu);
2496 return;
2497 }
2498 }
2499 if (vcpu->arch.nmi_injected) {
2500 vmx_inject_nmi(vcpu);
2501 if (vcpu->arch.nmi_pending)
2502 enable_nmi_window(vcpu);
2503 else if (vcpu->arch.irq_summary
2504 || kvm_run->request_interrupt_window)
2505 enable_irq_window(vcpu);
2506 return;
2507 }
2390 2508
2391 if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) 2509 if (vcpu->arch.interrupt_window_open) {
2392 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 2510 if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
2511 kvm_do_inject_irq(vcpu);
2393 2512
2394 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 2513 if (vcpu->arch.interrupt.pending)
2514 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
2515 }
2395 if (!vcpu->arch.interrupt_window_open && 2516 if (!vcpu->arch.interrupt_window_open &&
2396 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) 2517 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
2397 /* 2518 enable_irq_window(vcpu);
2398 * Interrupts blocked. Wait for unblock.
2399 */
2400 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2401 else
2402 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2403 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2404} 2519}
2405 2520
2406static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) 2521static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
2407{ 2522{
2408 int ret; 2523 int ret;
2409 struct kvm_userspace_memory_region tss_mem = { 2524 struct kvm_userspace_memory_region tss_mem = {
2410 .slot = 8, 2525 .slot = TSS_PRIVATE_MEMSLOT,
2411 .guest_phys_addr = addr, 2526 .guest_phys_addr = addr,
2412 .memory_size = PAGE_SIZE * 3, 2527 .memory_size = PAGE_SIZE * 3,
2413 .flags = 0, 2528 .flags = 0,
@@ -2492,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2492 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); 2607 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
2493 } 2608 }
2494 2609
2495 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ 2610 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
2496 return 1; /* already handled by vmx_vcpu_run() */ 2611 return 1; /* already handled by vmx_vcpu_run() */
2497 2612
2498 if (is_no_device(intr_info)) { 2613 if (is_no_device(intr_info)) {
@@ -2581,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2581 rep = (exit_qualification & 32) != 0; 2696 rep = (exit_qualification & 32) != 0;
2582 port = exit_qualification >> 16; 2697 port = exit_qualification >> 16;
2583 2698
2699 skip_emulated_instruction(vcpu);
2584 return kvm_emulate_pio(vcpu, kvm_run, in, size, port); 2700 return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
2585} 2701}
2586 2702
@@ -2767,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2767 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2883 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2768 2884
2769 KVMTRACE_0D(PEND_INTR, vcpu, handler); 2885 KVMTRACE_0D(PEND_INTR, vcpu, handler);
2886 ++vcpu->stat.irq_window_exits;
2770 2887
2771 /* 2888 /*
2772 * If the user space waits to inject interrupts, exit as soon as 2889 * If the user space waits to inject interrupts, exit as soon as
@@ -2775,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
2775 if (kvm_run->request_interrupt_window && 2892 if (kvm_run->request_interrupt_window &&
2776 !vcpu->arch.irq_summary) { 2893 !vcpu->arch.irq_summary) {
2777 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 2894 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
2778 ++vcpu->stat.irq_window_exits;
2779 return 0; 2895 return 0;
2780 } 2896 }
2781 return 1; 2897 return 1;
@@ -2832,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2832 2948
2833static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2949static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2834{ 2950{
2951 struct vcpu_vmx *vmx = to_vmx(vcpu);
2835 unsigned long exit_qualification; 2952 unsigned long exit_qualification;
2836 u16 tss_selector; 2953 u16 tss_selector;
2837 int reason; 2954 int reason;
@@ -2839,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2839 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 2956 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2840 2957
2841 reason = (u32)exit_qualification >> 30; 2958 reason = (u32)exit_qualification >> 30;
2959 if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
2960 (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
2961 (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
2962 == INTR_TYPE_NMI_INTR) {
2963 vcpu->arch.nmi_injected = false;
2964 if (cpu_has_virtual_nmis())
2965 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
2966 GUEST_INTR_STATE_NMI);
2967 }
2842 tss_selector = exit_qualification; 2968 tss_selector = exit_qualification;
2843 2969
2844 return kvm_task_switch(vcpu, tss_selector, reason); 2970 return kvm_task_switch(vcpu, tss_selector, reason);
@@ -2927,16 +3053,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
2927 while (!guest_state_valid(vcpu)) { 3053 while (!guest_state_valid(vcpu)) {
2928 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3054 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2929 3055
2930 switch (err) { 3056 if (err == EMULATE_DO_MMIO)
2931 case EMULATE_DONE: 3057 break;
2932 break; 3058
2933 case EMULATE_DO_MMIO: 3059 if (err != EMULATE_DONE) {
2934 kvm_report_emulation_failure(vcpu, "mmio"); 3060 kvm_report_emulation_failure(vcpu, "emulation failure");
2935 /* TODO: Handle MMIO */ 3061 return;
2936 return;
2937 default:
2938 kvm_report_emulation_failure(vcpu, "emulation failure");
2939 return;
2940 } 3062 }
2941 3063
2942 if (signal_pending(current)) 3064 if (signal_pending(current))
@@ -2948,8 +3070,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
2948 local_irq_disable(); 3070 local_irq_disable();
2949 preempt_disable(); 3071 preempt_disable();
2950 3072
2951 /* Guest state should be valid now, no more emulation should be needed */ 3073 /* Guest state should be valid now except if we need to
2952 vmx->emulation_required = 0; 3074 * emulate an MMIO */
3075 if (guest_state_valid(vcpu))
3076 vmx->emulation_required = 0;
2953} 3077}
2954 3078
2955/* 3079/*
@@ -2996,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2996 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), 3120 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
2997 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); 3121 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
2998 3122
3123 /* If we need to emulate an MMIO from handle_invalid_guest_state
3124 * we just return 0 */
3125 if (vmx->emulation_required && emulate_invalid_guest_state)
3126 return 0;
3127
2999 /* Access CR3 don't cause VMExit in paging mode, so we need 3128 /* Access CR3 don't cause VMExit in paging mode, so we need
3000 * to sync with guest real CR3. */ 3129 * to sync with guest real CR3. */
3001 if (vm_need_ept() && is_paging(vcpu)) { 3130 if (vm_need_ept() && is_paging(vcpu)) {
@@ -3012,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3012 3141
3013 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 3142 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
3014 (exit_reason != EXIT_REASON_EXCEPTION_NMI && 3143 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
3015 exit_reason != EXIT_REASON_EPT_VIOLATION)) 3144 exit_reason != EXIT_REASON_EPT_VIOLATION &&
3016 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 3145 exit_reason != EXIT_REASON_TASK_SWITCH))
3017 "exit reason is 0x%x\n", __func__, exit_reason); 3146 printk(KERN_WARNING "%s: unexpected, valid vectoring info "
3147 "(0x%x) and exit reason is 0x%x\n",
3148 __func__, vectoring_info, exit_reason);
3149
3150 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
3151 if (vcpu->arch.interrupt_window_open) {
3152 vmx->soft_vnmi_blocked = 0;
3153 vcpu->arch.nmi_window_open = 1;
3154 } else if (vmx->vnmi_blocked_time > 1000000000LL &&
3155 vcpu->arch.nmi_pending) {
3156 /*
3157 * This CPU don't support us in finding the end of an
3158 * NMI-blocked window if the guest runs with IRQs
3159 * disabled. So we pull the trigger after 1 s of
3160 * futile waiting, but inform the user about this.
3161 */
3162 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
3163 "state on VCPU %d after 1 s timeout\n",
3164 __func__, vcpu->vcpu_id);
3165 vmx->soft_vnmi_blocked = 0;
3166 vmx->vcpu.arch.nmi_window_open = 1;
3167 }
3168 }
3169
3018 if (exit_reason < kvm_vmx_max_exit_handlers 3170 if (exit_reason < kvm_vmx_max_exit_handlers
3019 && kvm_vmx_exit_handlers[exit_reason]) 3171 && kvm_vmx_exit_handlers[exit_reason])
3020 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); 3172 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3042,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
3042 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); 3194 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
3043} 3195}
3044 3196
3045static void enable_irq_window(struct kvm_vcpu *vcpu)
3046{
3047 u32 cpu_based_vm_exec_control;
3048
3049 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
3050 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
3051 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
3052}
3053
3054static void enable_nmi_window(struct kvm_vcpu *vcpu)
3055{
3056 u32 cpu_based_vm_exec_control;
3057
3058 if (!cpu_has_virtual_nmis())
3059 return;
3060
3061 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
3062 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
3063 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
3064}
3065
3066static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
3067{
3068 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3069 return !(guest_intr & (GUEST_INTR_STATE_NMI |
3070 GUEST_INTR_STATE_MOV_SS |
3071 GUEST_INTR_STATE_STI));
3072}
3073
3074static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
3075{
3076 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3077 return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
3078 GUEST_INTR_STATE_STI)) &&
3079 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
3080}
3081
3082static void enable_intr_window(struct kvm_vcpu *vcpu)
3083{
3084 if (vcpu->arch.nmi_pending)
3085 enable_nmi_window(vcpu);
3086 else if (kvm_cpu_has_interrupt(vcpu))
3087 enable_irq_window(vcpu);
3088}
3089
3090static void vmx_complete_interrupts(struct vcpu_vmx *vmx) 3197static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3091{ 3198{
3092 u32 exit_intr_info; 3199 u32 exit_intr_info;
@@ -3109,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3109 if (unblock_nmi && vector != DF_VECTOR) 3216 if (unblock_nmi && vector != DF_VECTOR)
3110 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3217 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
3111 GUEST_INTR_STATE_NMI); 3218 GUEST_INTR_STATE_NMI);
3112 } 3219 } else if (unlikely(vmx->soft_vnmi_blocked))
3220 vmx->vnmi_blocked_time +=
3221 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
3113 3222
3114 idt_vectoring_info = vmx->idt_vectoring_info; 3223 idt_vectoring_info = vmx->idt_vectoring_info;
3115 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 3224 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3147,26 +3256,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3147{ 3256{
3148 update_tpr_threshold(vcpu); 3257 update_tpr_threshold(vcpu);
3149 3258
3150 if (cpu_has_virtual_nmis()) { 3259 vmx_update_window_states(vcpu);
3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3260
3152 if (vcpu->arch.interrupt.pending) { 3261 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3153 enable_nmi_window(vcpu); 3262 if (vcpu->arch.interrupt.pending) {
3154 } else if (vmx_nmi_enabled(vcpu)) { 3263 enable_nmi_window(vcpu);
3155 vcpu->arch.nmi_pending = false; 3264 } else if (vcpu->arch.nmi_window_open) {
3156 vcpu->arch.nmi_injected = true; 3265 vcpu->arch.nmi_pending = false;
3157 } else { 3266 vcpu->arch.nmi_injected = true;
3158 enable_intr_window(vcpu); 3267 } else {
3159 return; 3268 enable_nmi_window(vcpu);
3160 }
3161 }
3162 if (vcpu->arch.nmi_injected) {
3163 vmx_inject_nmi(vcpu);
3164 enable_intr_window(vcpu);
3165 return; 3269 return;
3166 } 3270 }
3167 } 3271 }
3272 if (vcpu->arch.nmi_injected) {
3273 vmx_inject_nmi(vcpu);
3274 if (vcpu->arch.nmi_pending)
3275 enable_nmi_window(vcpu);
3276 else if (kvm_cpu_has_interrupt(vcpu))
3277 enable_irq_window(vcpu);
3278 return;
3279 }
3168 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { 3280 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
3169 if (vmx_irq_enabled(vcpu)) 3281 if (vcpu->arch.interrupt_window_open)
3170 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); 3282 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
3171 else 3283 else
3172 enable_irq_window(vcpu); 3284 enable_irq_window(vcpu);
@@ -3174,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3174 if (vcpu->arch.interrupt.pending) { 3286 if (vcpu->arch.interrupt.pending) {
3175 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 3287 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
3176 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); 3288 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
3289 if (kvm_cpu_has_interrupt(vcpu))
3290 enable_irq_window(vcpu);
3177 } 3291 }
3178} 3292}
3179 3293
@@ -3213,6 +3327,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3213 struct vcpu_vmx *vmx = to_vmx(vcpu); 3327 struct vcpu_vmx *vmx = to_vmx(vcpu);
3214 u32 intr_info; 3328 u32 intr_info;
3215 3329
3330 /* Record the guest's net vcpu time for enforced NMI injections. */
3331 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
3332 vmx->entry_time = ktime_get();
3333
3216 /* Handle invalid guest state instead of entering VMX */ 3334 /* Handle invalid guest state instead of entering VMX */
3217 if (vmx->emulation_required && emulate_invalid_guest_state) { 3335 if (vmx->emulation_required && emulate_invalid_guest_state) {
3218 handle_invalid_guest_state(vcpu, kvm_run); 3336 handle_invalid_guest_state(vcpu, kvm_run);
@@ -3327,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3327 if (vmx->rmode.irq.pending) 3445 if (vmx->rmode.irq.pending)
3328 fixup_rmode_irq(vmx); 3446 fixup_rmode_irq(vmx);
3329 3447
3330 vcpu->arch.interrupt_window_open = 3448 vmx_update_window_states(vcpu);
3331 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
3332 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
3333 3449
3334 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 3450 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
3335 vmx->launched = 1; 3451 vmx->launched = 1;
@@ -3337,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3337 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3453 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
3338 3454
3339 /* We need to handle NMIs before interrupts are enabled */ 3455 /* We need to handle NMIs before interrupts are enabled */
3340 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && 3456 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
3341 (intr_info & INTR_INFO_VALID_MASK)) { 3457 (intr_info & INTR_INFO_VALID_MASK)) {
3342 KVMTRACE_0D(NMI, vcpu, handler); 3458 KVMTRACE_0D(NMI, vcpu, handler);
3343 asm("int $2"); 3459 asm("int $2");
@@ -3455,6 +3571,11 @@ static int get_ept_level(void)
3455 return VMX_EPT_DEFAULT_GAW + 1; 3571 return VMX_EPT_DEFAULT_GAW + 1;
3456} 3572}
3457 3573
3574static int vmx_get_mt_mask_shift(void)
3575{
3576 return VMX_EPT_MT_EPTE_SHIFT;
3577}
3578
3458static struct kvm_x86_ops vmx_x86_ops = { 3579static struct kvm_x86_ops vmx_x86_ops = {
3459 .cpu_has_kvm_support = cpu_has_kvm_support, 3580 .cpu_has_kvm_support = cpu_has_kvm_support,
3460 .disabled_by_bios = vmx_disabled_by_bios, 3581 .disabled_by_bios = vmx_disabled_by_bios,
@@ -3510,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
3510 3631
3511 .set_tss_addr = vmx_set_tss_addr, 3632 .set_tss_addr = vmx_set_tss_addr,
3512 .get_tdp_level = get_ept_level, 3633 .get_tdp_level = get_ept_level,
3634 .get_mt_mask_shift = vmx_get_mt_mask_shift,
3513}; 3635};
3514 3636
3515static int __init vmx_init(void) 3637static int __init vmx_init(void)
@@ -3566,10 +3688,10 @@ static int __init vmx_init(void)
3566 bypass_guest_pf = 0; 3688 bypass_guest_pf = 0;
3567 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3568 VMX_EPT_WRITABLE_MASK | 3690 VMX_EPT_WRITABLE_MASK |
3569 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
3570 VMX_EPT_IGMT_BIT); 3691 VMX_EPT_IGMT_BIT);
3571 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3692 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3572 VMX_EPT_EXECUTABLE_MASK); 3693 VMX_EPT_EXECUTABLE_MASK,
3694 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3573 kvm_enable_tdp(); 3695 kvm_enable_tdp();
3574 } else 3696 } else
3575 kvm_disable_tdp(); 3697 kvm_disable_tdp();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1f8ff2f1fa2..0e6aa8141dcd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -39,6 +39,7 @@
39#include <asm/uaccess.h> 39#include <asm/uaccess.h>
40#include <asm/msr.h> 40#include <asm/msr.h>
41#include <asm/desc.h> 41#include <asm/desc.h>
42#include <asm/mtrr.h>
42 43
43#define MAX_IO_MSRS 256 44#define MAX_IO_MSRS 256
44#define CR0_RESERVED_BITS \ 45#define CR0_RESERVED_BITS \
@@ -86,6 +87,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
86 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 87 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
87 { "hypercalls", VCPU_STAT(hypercalls) }, 88 { "hypercalls", VCPU_STAT(hypercalls) },
88 { "request_irq", VCPU_STAT(request_irq_exits) }, 89 { "request_irq", VCPU_STAT(request_irq_exits) },
90 { "request_nmi", VCPU_STAT(request_nmi_exits) },
89 { "irq_exits", VCPU_STAT(irq_exits) }, 91 { "irq_exits", VCPU_STAT(irq_exits) },
90 { "host_state_reload", VCPU_STAT(host_state_reload) }, 92 { "host_state_reload", VCPU_STAT(host_state_reload) },
91 { "efer_reload", VCPU_STAT(efer_reload) }, 93 { "efer_reload", VCPU_STAT(efer_reload) },
@@ -93,6 +95,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
93 { "insn_emulation", VCPU_STAT(insn_emulation) }, 95 { "insn_emulation", VCPU_STAT(insn_emulation) },
94 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, 96 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
95 { "irq_injections", VCPU_STAT(irq_injections) }, 97 { "irq_injections", VCPU_STAT(irq_injections) },
98 { "nmi_injections", VCPU_STAT(nmi_injections) },
96 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, 99 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
97 { "mmu_pte_write", VM_STAT(mmu_pte_write) }, 100 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
98 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, 101 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -101,6 +104,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
101 { "mmu_recycled", VM_STAT(mmu_recycled) }, 104 { "mmu_recycled", VM_STAT(mmu_recycled) },
102 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 105 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
103 { "mmu_unsync", VM_STAT(mmu_unsync) }, 106 { "mmu_unsync", VM_STAT(mmu_unsync) },
107 { "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
104 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 108 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
105 { "largepages", VM_STAT(lpages) }, 109 { "largepages", VM_STAT(lpages) },
106 { NULL } 110 { NULL }
@@ -312,6 +316,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
312 kvm_x86_ops->set_cr0(vcpu, cr0); 316 kvm_x86_ops->set_cr0(vcpu, cr0);
313 vcpu->arch.cr0 = cr0; 317 vcpu->arch.cr0 = cr0;
314 318
319 kvm_mmu_sync_global(vcpu);
315 kvm_mmu_reset_context(vcpu); 320 kvm_mmu_reset_context(vcpu);
316 return; 321 return;
317} 322}
@@ -355,6 +360,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
355 } 360 }
356 kvm_x86_ops->set_cr4(vcpu, cr4); 361 kvm_x86_ops->set_cr4(vcpu, cr4);
357 vcpu->arch.cr4 = cr4; 362 vcpu->arch.cr4 = cr4;
363 kvm_mmu_sync_global(vcpu);
358 kvm_mmu_reset_context(vcpu); 364 kvm_mmu_reset_context(vcpu);
359} 365}
360EXPORT_SYMBOL_GPL(kvm_set_cr4); 366EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -449,7 +455,7 @@ static u32 msrs_to_save[] = {
449 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 455 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
450#endif 456#endif
451 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 457 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
452 MSR_IA32_PERF_STATUS, 458 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
453}; 459};
454 460
455static unsigned num_msrs_to_save; 461static unsigned num_msrs_to_save;
@@ -648,10 +654,38 @@ static bool msr_mtrr_valid(unsigned msr)
648 654
649static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 655static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
650{ 656{
657 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
658
651 if (!msr_mtrr_valid(msr)) 659 if (!msr_mtrr_valid(msr))
652 return 1; 660 return 1;
653 661
654 vcpu->arch.mtrr[msr - 0x200] = data; 662 if (msr == MSR_MTRRdefType) {
663 vcpu->arch.mtrr_state.def_type = data;
664 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
665 } else if (msr == MSR_MTRRfix64K_00000)
666 p[0] = data;
667 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
668 p[1 + msr - MSR_MTRRfix16K_80000] = data;
669 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
670 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
671 else if (msr == MSR_IA32_CR_PAT)
672 vcpu->arch.pat = data;
673 else { /* Variable MTRRs */
674 int idx, is_mtrr_mask;
675 u64 *pt;
676
677 idx = (msr - 0x200) / 2;
678 is_mtrr_mask = msr - 0x200 - 2 * idx;
679 if (!is_mtrr_mask)
680 pt =
681 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
682 else
683 pt =
684 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
685 *pt = data;
686 }
687
688 kvm_mmu_reset_context(vcpu);
655 return 0; 689 return 0;
656} 690}
657 691
@@ -747,10 +781,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
747 781
748static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 782static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
749{ 783{
784 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
785
750 if (!msr_mtrr_valid(msr)) 786 if (!msr_mtrr_valid(msr))
751 return 1; 787 return 1;
752 788
753 *pdata = vcpu->arch.mtrr[msr - 0x200]; 789 if (msr == MSR_MTRRdefType)
790 *pdata = vcpu->arch.mtrr_state.def_type +
791 (vcpu->arch.mtrr_state.enabled << 10);
792 else if (msr == MSR_MTRRfix64K_00000)
793 *pdata = p[0];
794 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
795 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
796 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
797 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
798 else if (msr == MSR_IA32_CR_PAT)
799 *pdata = vcpu->arch.pat;
800 else { /* Variable MTRRs */
801 int idx, is_mtrr_mask;
802 u64 *pt;
803
804 idx = (msr - 0x200) / 2;
805 is_mtrr_mask = msr - 0x200 - 2 * idx;
806 if (!is_mtrr_mask)
807 pt =
808 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
809 else
810 pt =
811 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
812 *pdata = *pt;
813 }
814
754 return 0; 815 return 0;
755} 816}
756 817
@@ -903,7 +964,6 @@ int kvm_dev_ioctl_check_extension(long ext)
903 case KVM_CAP_IRQCHIP: 964 case KVM_CAP_IRQCHIP:
904 case KVM_CAP_HLT: 965 case KVM_CAP_HLT:
905 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 966 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
906 case KVM_CAP_USER_MEMORY:
907 case KVM_CAP_SET_TSS_ADDR: 967 case KVM_CAP_SET_TSS_ADDR:
908 case KVM_CAP_EXT_CPUID: 968 case KVM_CAP_EXT_CPUID:
909 case KVM_CAP_CLOCKSOURCE: 969 case KVM_CAP_CLOCKSOURCE:
@@ -1188,6 +1248,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1188 int t, times = entry->eax & 0xff; 1248 int t, times = entry->eax & 0xff;
1189 1249
1190 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1250 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
1251 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
1191 for (t = 1; t < times && *nent < maxnent; ++t) { 1252 for (t = 1; t < times && *nent < maxnent; ++t) {
1192 do_cpuid_1_ent(&entry[t], function, 0); 1253 do_cpuid_1_ent(&entry[t], function, 0);
1193 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1254 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -1218,7 +1279,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1218 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1279 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1219 /* read more entries until level_type is zero */ 1280 /* read more entries until level_type is zero */
1220 for (i = 1; *nent < maxnent; ++i) { 1281 for (i = 1; *nent < maxnent; ++i) {
1221 level_type = entry[i - 1].ecx & 0xff; 1282 level_type = entry[i - 1].ecx & 0xff00;
1222 if (!level_type) 1283 if (!level_type)
1223 break; 1284 break;
1224 do_cpuid_1_ent(&entry[i], function, i); 1285 do_cpuid_1_ent(&entry[i], function, i);
@@ -1318,6 +1379,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
1318 return 0; 1379 return 0;
1319} 1380}
1320 1381
1382static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
1383{
1384 vcpu_load(vcpu);
1385 kvm_inject_nmi(vcpu);
1386 vcpu_put(vcpu);
1387
1388 return 0;
1389}
1390
1321static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, 1391static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
1322 struct kvm_tpr_access_ctl *tac) 1392 struct kvm_tpr_access_ctl *tac)
1323{ 1393{
@@ -1377,6 +1447,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1377 r = 0; 1447 r = 0;
1378 break; 1448 break;
1379 } 1449 }
1450 case KVM_NMI: {
1451 r = kvm_vcpu_ioctl_nmi(vcpu);
1452 if (r)
1453 goto out;
1454 r = 0;
1455 break;
1456 }
1380 case KVM_SET_CPUID: { 1457 case KVM_SET_CPUID: {
1381 struct kvm_cpuid __user *cpuid_arg = argp; 1458 struct kvm_cpuid __user *cpuid_arg = argp;
1382 struct kvm_cpuid cpuid; 1459 struct kvm_cpuid cpuid;
@@ -1968,7 +2045,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1968 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 2045 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
1969 if (ret < 0) 2046 if (ret < 0)
1970 return 0; 2047 return 0;
1971 kvm_mmu_pte_write(vcpu, gpa, val, bytes); 2048 kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
1972 return 1; 2049 return 1;
1973} 2050}
1974 2051
@@ -2404,8 +2481,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2404 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 2481 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2405 memcpy(vcpu->arch.pio_data, &val, 4); 2482 memcpy(vcpu->arch.pio_data, &val, 4);
2406 2483
2407 kvm_x86_ops->skip_emulated_instruction(vcpu);
2408
2409 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); 2484 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
2410 if (pio_dev) { 2485 if (pio_dev) {
2411 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); 2486 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
@@ -2541,7 +2616,7 @@ int kvm_arch_init(void *opaque)
2541 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 2616 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
2542 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 2617 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2543 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 2618 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2544 PT_DIRTY_MASK, PT64_NX_MASK, 0); 2619 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
2545 return 0; 2620 return 0;
2546 2621
2547out: 2622out:
@@ -2729,7 +2804,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
2729 2804
2730 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; 2805 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
2731 /* when no next entry is found, the current entry[i] is reselected */ 2806 /* when no next entry is found, the current entry[i] is reselected */
2732 for (j = i + 1; j == i; j = (j + 1) % nent) { 2807 for (j = i + 1; ; j = (j + 1) % nent) {
2733 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; 2808 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
2734 if (ej->function == e->function) { 2809 if (ej->function == e->function) {
2735 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; 2810 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
@@ -2973,7 +3048,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2973 pr_debug("vcpu %d received sipi with vector # %x\n", 3048 pr_debug("vcpu %d received sipi with vector # %x\n",
2974 vcpu->vcpu_id, vcpu->arch.sipi_vector); 3049 vcpu->vcpu_id, vcpu->arch.sipi_vector);
2975 kvm_lapic_reset(vcpu); 3050 kvm_lapic_reset(vcpu);
2976 r = kvm_x86_ops->vcpu_reset(vcpu); 3051 r = kvm_arch_vcpu_reset(vcpu);
2977 if (r) 3052 if (r)
2978 return r; 3053 return r;
2979 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 3054 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -3275,9 +3350,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3275 kvm_desct->padding = 0; 3350 kvm_desct->padding = 0;
3276} 3351}
3277 3352
3278static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, 3353static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
3279 u16 selector, 3354 u16 selector,
3280 struct descriptor_table *dtable) 3355 struct descriptor_table *dtable)
3281{ 3356{
3282 if (selector & 1 << 2) { 3357 if (selector & 1 << 2) {
3283 struct kvm_segment kvm_seg; 3358 struct kvm_segment kvm_seg;
@@ -3302,7 +3377,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3302 struct descriptor_table dtable; 3377 struct descriptor_table dtable;
3303 u16 index = selector >> 3; 3378 u16 index = selector >> 3;
3304 3379
3305 get_segment_descritptor_dtable(vcpu, selector, &dtable); 3380 get_segment_descriptor_dtable(vcpu, selector, &dtable);
3306 3381
3307 if (dtable.limit < index * 8 + 7) { 3382 if (dtable.limit < index * 8 + 7) {
3308 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 3383 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@@ -3321,7 +3396,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3321 struct descriptor_table dtable; 3396 struct descriptor_table dtable;
3322 u16 index = selector >> 3; 3397 u16 index = selector >> 3;
3323 3398
3324 get_segment_descritptor_dtable(vcpu, selector, &dtable); 3399 get_segment_descriptor_dtable(vcpu, selector, &dtable);
3325 3400
3326 if (dtable.limit < index * 8 + 7) 3401 if (dtable.limit < index * 8 + 7)
3327 return 1; 3402 return 1;
@@ -3900,6 +3975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
3900 /* We do fxsave: this must be aligned. */ 3975 /* We do fxsave: this must be aligned. */
3901 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); 3976 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
3902 3977
3978 vcpu->arch.mtrr_state.have_fixed = 1;
3903 vcpu_load(vcpu); 3979 vcpu_load(vcpu);
3904 r = kvm_arch_vcpu_reset(vcpu); 3980 r = kvm_arch_vcpu_reset(vcpu);
3905 if (r == 0) 3981 if (r == 0)
@@ -3925,6 +4001,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3925 4001
3926int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) 4002int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
3927{ 4003{
4004 vcpu->arch.nmi_pending = false;
4005 vcpu->arch.nmi_injected = false;
4006
3928 return kvm_x86_ops->vcpu_reset(vcpu); 4007 return kvm_x86_ops->vcpu_reset(vcpu);
3929} 4008}
3930 4009
@@ -4012,6 +4091,7 @@ struct kvm *kvm_arch_create_vm(void)
4012 return ERR_PTR(-ENOMEM); 4091 return ERR_PTR(-ENOMEM);
4013 4092
4014 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4093 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
4094 INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
4015 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 4095 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
4016 4096
4017 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 4097 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -4048,8 +4128,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
4048 4128
4049void kvm_arch_destroy_vm(struct kvm *kvm) 4129void kvm_arch_destroy_vm(struct kvm *kvm)
4050{ 4130{
4051 kvm_iommu_unmap_guest(kvm);
4052 kvm_free_all_assigned_devices(kvm); 4131 kvm_free_all_assigned_devices(kvm);
4132 kvm_iommu_unmap_guest(kvm);
4053 kvm_free_pit(kvm); 4133 kvm_free_pit(kvm);
4054 kfree(kvm->arch.vpic); 4134 kfree(kvm->arch.vpic);
4055 kfree(kvm->arch.vioapic); 4135 kfree(kvm->arch.vioapic);
@@ -4127,7 +4207,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
4127int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 4207int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4128{ 4208{
4129 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 4209 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
4130 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; 4210 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
4211 || vcpu->arch.nmi_pending;
4131} 4212}
4132 4213
4133static void vcpu_kick_intr(void *info) 4214static void vcpu_kick_intr(void *info)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ea051173b0da..d174db7a3370 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -58,6 +58,7 @@
58#define SrcMem32 (4<<4) /* Memory operand (32-bit). */ 58#define SrcMem32 (4<<4) /* Memory operand (32-bit). */
59#define SrcImm (5<<4) /* Immediate operand. */ 59#define SrcImm (5<<4) /* Immediate operand. */
60#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ 60#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
61#define SrcOne (7<<4) /* Implied '1' */
61#define SrcMask (7<<4) 62#define SrcMask (7<<4)
62/* Generic ModRM decode. */ 63/* Generic ModRM decode. */
63#define ModRM (1<<7) 64#define ModRM (1<<7)
@@ -70,17 +71,23 @@
70#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ 71#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
71#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ 72#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
72#define GroupMask 0xff /* Group number stored in bits 0:7 */ 73#define GroupMask 0xff /* Group number stored in bits 0:7 */
74/* Source 2 operand type */
75#define Src2None (0<<29)
76#define Src2CL (1<<29)
77#define Src2ImmByte (2<<29)
78#define Src2One (3<<29)
79#define Src2Mask (7<<29)
73 80
74enum { 81enum {
75 Group1_80, Group1_81, Group1_82, Group1_83, 82 Group1_80, Group1_81, Group1_82, Group1_83,
76 Group1A, Group3_Byte, Group3, Group4, Group5, Group7, 83 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
77}; 84};
78 85
79static u16 opcode_table[256] = { 86static u32 opcode_table[256] = {
80 /* 0x00 - 0x07 */ 87 /* 0x00 - 0x07 */
81 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 88 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
82 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 89 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
83 0, 0, 0, 0, 90 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
84 /* 0x08 - 0x0F */ 91 /* 0x08 - 0x0F */
85 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 92 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
86 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 93 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -195,7 +202,7 @@ static u16 opcode_table[256] = {
195 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, 202 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
196}; 203};
197 204
198static u16 twobyte_table[256] = { 205static u32 twobyte_table[256] = {
199 /* 0x00 - 0x0F */ 206 /* 0x00 - 0x0F */
200 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, 207 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
201 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 208 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -230,9 +237,14 @@ static u16 twobyte_table[256] = {
230 /* 0x90 - 0x9F */ 237 /* 0x90 - 0x9F */
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 238 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 /* 0xA0 - 0xA7 */ 239 /* 0xA0 - 0xA7 */
233 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, 240 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
241 DstMem | SrcReg | Src2ImmByte | ModRM,
242 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
234 /* 0xA8 - 0xAF */ 243 /* 0xA8 - 0xAF */
235 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, 244 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
245 DstMem | SrcReg | Src2ImmByte | ModRM,
246 DstMem | SrcReg | Src2CL | ModRM,
247 ModRM, 0,
236 /* 0xB0 - 0xB7 */ 248 /* 0xB0 - 0xB7 */
237 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, 249 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
238 DstMem | SrcReg | ModRM | BitOp, 250 DstMem | SrcReg | ModRM | BitOp,
@@ -253,7 +265,7 @@ static u16 twobyte_table[256] = {
253 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
254}; 266};
255 267
256static u16 group_table[] = { 268static u32 group_table[] = {
257 [Group1_80*8] = 269 [Group1_80*8] =
258 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 270 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
259 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 271 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@@ -297,9 +309,9 @@ static u16 group_table[] = {
297 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, 309 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
298}; 310};
299 311
300static u16 group2_table[] = { 312static u32 group2_table[] = {
301 [Group7*8] = 313 [Group7*8] =
302 SrcNone | ModRM, 0, 0, 0, 314 SrcNone | ModRM, 0, 0, SrcNone | ModRM,
303 SrcNone | ModRM | DstMem | Mov, 0, 315 SrcNone | ModRM | DstMem | Mov, 0,
304 SrcMem16 | ModRM | Mov, 0, 316 SrcMem16 | ModRM | Mov, 0,
305}; 317};
@@ -359,49 +371,48 @@ static u16 group2_table[] = {
359 "andl %"_msk",%"_LO32 _tmp"; " \ 371 "andl %"_msk",%"_LO32 _tmp"; " \
360 "orl %"_LO32 _tmp",%"_sav"; " 372 "orl %"_LO32 _tmp",%"_sav"; "
361 373
374#ifdef CONFIG_X86_64
375#define ON64(x) x
376#else
377#define ON64(x)
378#endif
379
380#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
381 do { \
382 __asm__ __volatile__ ( \
383 _PRE_EFLAGS("0", "4", "2") \
384 _op _suffix " %"_x"3,%1; " \
385 _POST_EFLAGS("0", "4", "2") \
386 : "=m" (_eflags), "=m" ((_dst).val), \
387 "=&r" (_tmp) \
388 : _y ((_src).val), "i" (EFLAGS_MASK)); \
389 } while (0)
390
391
362/* Raw emulation: instruction has two explicit operands. */ 392/* Raw emulation: instruction has two explicit operands. */
363#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ 393#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
364 do { \ 394 do { \
365 unsigned long _tmp; \ 395 unsigned long _tmp; \
366 \ 396 \
367 switch ((_dst).bytes) { \ 397 switch ((_dst).bytes) { \
368 case 2: \ 398 case 2: \
369 __asm__ __volatile__ ( \ 399 ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
370 _PRE_EFLAGS("0", "4", "2") \ 400 break; \
371 _op"w %"_wx"3,%1; " \ 401 case 4: \
372 _POST_EFLAGS("0", "4", "2") \ 402 ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
373 : "=m" (_eflags), "=m" ((_dst).val), \ 403 break; \
374 "=&r" (_tmp) \ 404 case 8: \
375 : _wy ((_src).val), "i" (EFLAGS_MASK)); \ 405 ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
376 break; \ 406 break; \
377 case 4: \ 407 } \
378 __asm__ __volatile__ ( \
379 _PRE_EFLAGS("0", "4", "2") \
380 _op"l %"_lx"3,%1; " \
381 _POST_EFLAGS("0", "4", "2") \
382 : "=m" (_eflags), "=m" ((_dst).val), \
383 "=&r" (_tmp) \
384 : _ly ((_src).val), "i" (EFLAGS_MASK)); \
385 break; \
386 case 8: \
387 __emulate_2op_8byte(_op, _src, _dst, \
388 _eflags, _qx, _qy); \
389 break; \
390 } \
391 } while (0) 408 } while (0)
392 409
393#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ 410#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
394 do { \ 411 do { \
395 unsigned long __tmp; \ 412 unsigned long _tmp; \
396 switch ((_dst).bytes) { \ 413 switch ((_dst).bytes) { \
397 case 1: \ 414 case 1: \
398 __asm__ __volatile__ ( \ 415 ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
399 _PRE_EFLAGS("0", "4", "2") \
400 _op"b %"_bx"3,%1; " \
401 _POST_EFLAGS("0", "4", "2") \
402 : "=m" (_eflags), "=m" ((_dst).val), \
403 "=&r" (__tmp) \
404 : _by ((_src).val), "i" (EFLAGS_MASK)); \
405 break; \ 416 break; \
406 default: \ 417 default: \
407 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ 418 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
@@ -425,71 +436,68 @@ static u16 group2_table[] = {
425 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ 436 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
426 "w", "r", _LO32, "r", "", "r") 437 "w", "r", _LO32, "r", "", "r")
427 438
428/* Instruction has only one explicit operand (no source operand). */ 439/* Instruction has three operands and one operand is stored in ECX register */
429#define emulate_1op(_op, _dst, _eflags) \ 440#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
430 do { \ 441 do { \
431 unsigned long _tmp; \ 442 unsigned long _tmp; \
432 \ 443 _type _clv = (_cl).val; \
433 switch ((_dst).bytes) { \ 444 _type _srcv = (_src).val; \
434 case 1: \ 445 _type _dstv = (_dst).val; \
435 __asm__ __volatile__ ( \ 446 \
436 _PRE_EFLAGS("0", "3", "2") \ 447 __asm__ __volatile__ ( \
437 _op"b %1; " \ 448 _PRE_EFLAGS("0", "5", "2") \
438 _POST_EFLAGS("0", "3", "2") \ 449 _op _suffix " %4,%1 \n" \
439 : "=m" (_eflags), "=m" ((_dst).val), \ 450 _POST_EFLAGS("0", "5", "2") \
440 "=&r" (_tmp) \ 451 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
441 : "i" (EFLAGS_MASK)); \ 452 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
442 break; \ 453 ); \
443 case 2: \ 454 \
444 __asm__ __volatile__ ( \ 455 (_cl).val = (unsigned long) _clv; \
445 _PRE_EFLAGS("0", "3", "2") \ 456 (_src).val = (unsigned long) _srcv; \
446 _op"w %1; " \ 457 (_dst).val = (unsigned long) _dstv; \
447 _POST_EFLAGS("0", "3", "2") \
448 : "=m" (_eflags), "=m" ((_dst).val), \
449 "=&r" (_tmp) \
450 : "i" (EFLAGS_MASK)); \
451 break; \
452 case 4: \
453 __asm__ __volatile__ ( \
454 _PRE_EFLAGS("0", "3", "2") \
455 _op"l %1; " \
456 _POST_EFLAGS("0", "3", "2") \
457 : "=m" (_eflags), "=m" ((_dst).val), \
458 "=&r" (_tmp) \
459 : "i" (EFLAGS_MASK)); \
460 break; \
461 case 8: \
462 __emulate_1op_8byte(_op, _dst, _eflags); \
463 break; \
464 } \
465 } while (0) 458 } while (0)
466 459
467/* Emulate an instruction with quadword operands (x86/64 only). */ 460#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
468#if defined(CONFIG_X86_64) 461 do { \
469#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ 462 switch ((_dst).bytes) { \
470 do { \ 463 case 2: \
471 __asm__ __volatile__ ( \ 464 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
472 _PRE_EFLAGS("0", "4", "2") \ 465 "w", unsigned short); \
473 _op"q %"_qx"3,%1; " \ 466 break; \
474 _POST_EFLAGS("0", "4", "2") \ 467 case 4: \
475 : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ 468 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
476 : _qy ((_src).val), "i" (EFLAGS_MASK)); \ 469 "l", unsigned int); \
470 break; \
471 case 8: \
472 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
473 "q", unsigned long)); \
474 break; \
475 } \
477 } while (0) 476 } while (0)
478 477
479#define __emulate_1op_8byte(_op, _dst, _eflags) \ 478#define __emulate_1op(_op, _dst, _eflags, _suffix) \
480 do { \ 479 do { \
481 __asm__ __volatile__ ( \ 480 unsigned long _tmp; \
482 _PRE_EFLAGS("0", "3", "2") \ 481 \
483 _op"q %1; " \ 482 __asm__ __volatile__ ( \
484 _POST_EFLAGS("0", "3", "2") \ 483 _PRE_EFLAGS("0", "3", "2") \
485 : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ 484 _op _suffix " %1; " \
486 : "i" (EFLAGS_MASK)); \ 485 _POST_EFLAGS("0", "3", "2") \
486 : "=m" (_eflags), "+m" ((_dst).val), \
487 "=&r" (_tmp) \
488 : "i" (EFLAGS_MASK)); \
487 } while (0) 489 } while (0)
488 490
489#elif defined(__i386__) 491/* Instruction has only one explicit operand (no source operand). */
490#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) 492#define emulate_1op(_op, _dst, _eflags) \
491#define __emulate_1op_8byte(_op, _dst, _eflags) 493 do { \
492#endif /* __i386__ */ 494 switch ((_dst).bytes) { \
495 case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
496 case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
497 case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
498 case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
499 } \
500 } while (0)
493 501
494/* Fetch next part of the instruction being emulated. */ 502/* Fetch next part of the instruction being emulated. */
495#define insn_fetch(_type, _size, _eip) \ 503#define insn_fetch(_type, _size, _eip) \
@@ -1041,6 +1049,33 @@ done_prefixes:
1041 c->src.bytes = 1; 1049 c->src.bytes = 1;
1042 c->src.val = insn_fetch(s8, 1, c->eip); 1050 c->src.val = insn_fetch(s8, 1, c->eip);
1043 break; 1051 break;
1052 case SrcOne:
1053 c->src.bytes = 1;
1054 c->src.val = 1;
1055 break;
1056 }
1057
1058 /*
1059 * Decode and fetch the second source operand: register, memory
1060 * or immediate.
1061 */
1062 switch (c->d & Src2Mask) {
1063 case Src2None:
1064 break;
1065 case Src2CL:
1066 c->src2.bytes = 1;
1067 c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1068 break;
1069 case Src2ImmByte:
1070 c->src2.type = OP_IMM;
1071 c->src2.ptr = (unsigned long *)c->eip;
1072 c->src2.bytes = 1;
1073 c->src2.val = insn_fetch(u8, 1, c->eip);
1074 break;
1075 case Src2One:
1076 c->src2.bytes = 1;
1077 c->src2.val = 1;
1078 break;
1044 } 1079 }
1045 1080
1046 /* Decode and fetch the destination operand: register or memory. */ 1081 /* Decode and fetch the destination operand: register or memory. */
@@ -1100,20 +1135,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1100 c->regs[VCPU_REGS_RSP]); 1135 c->regs[VCPU_REGS_RSP]);
1101} 1136}
1102 1137
1103static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, 1138static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1104 struct x86_emulate_ops *ops) 1139 struct x86_emulate_ops *ops)
1105{ 1140{
1106 struct decode_cache *c = &ctxt->decode; 1141 struct decode_cache *c = &ctxt->decode;
1107 int rc; 1142 int rc;
1108 1143
1109 rc = ops->read_std(register_address(c, ss_base(ctxt), 1144 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1110 c->regs[VCPU_REGS_RSP]), 1145 c->regs[VCPU_REGS_RSP]),
1111 &c->dst.val, c->dst.bytes, ctxt->vcpu); 1146 &c->src.val, c->src.bytes, ctxt->vcpu);
1112 if (rc != 0) 1147 if (rc != 0)
1113 return rc; 1148 return rc;
1114 1149
1115 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); 1150 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
1151 return rc;
1152}
1153
1154static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1155 struct x86_emulate_ops *ops)
1156{
1157 struct decode_cache *c = &ctxt->decode;
1158 int rc;
1116 1159
1160 c->src.bytes = c->dst.bytes;
1161 rc = emulate_pop(ctxt, ops);
1162 if (rc != 0)
1163 return rc;
1164 c->dst.val = c->src.val;
1117 return 0; 1165 return 0;
1118} 1166}
1119 1167
@@ -1415,24 +1463,15 @@ special_insn:
1415 emulate_1op("dec", c->dst, ctxt->eflags); 1463 emulate_1op("dec", c->dst, ctxt->eflags);
1416 break; 1464 break;
1417 case 0x50 ... 0x57: /* push reg */ 1465 case 0x50 ... 0x57: /* push reg */
1418 c->dst.type = OP_MEM; 1466 emulate_push(ctxt);
1419 c->dst.bytes = c->op_bytes;
1420 c->dst.val = c->src.val;
1421 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1422 -c->op_bytes);
1423 c->dst.ptr = (void *) register_address(
1424 c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
1425 break; 1467 break;
1426 case 0x58 ... 0x5f: /* pop reg */ 1468 case 0x58 ... 0x5f: /* pop reg */
1427 pop_instruction: 1469 pop_instruction:
1428 if ((rc = ops->read_std(register_address(c, ss_base(ctxt), 1470 c->src.bytes = c->op_bytes;
1429 c->regs[VCPU_REGS_RSP]), c->dst.ptr, 1471 rc = emulate_pop(ctxt, ops);
1430 c->op_bytes, ctxt->vcpu)) != 0) 1472 if (rc != 0)
1431 goto done; 1473 goto done;
1432 1474 c->dst.val = c->src.val;
1433 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1434 c->op_bytes);
1435 c->dst.type = OP_NONE; /* Disable writeback. */
1436 break; 1475 break;
1437 case 0x63: /* movsxd */ 1476 case 0x63: /* movsxd */
1438 if (ctxt->mode != X86EMUL_MODE_PROT64) 1477 if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1591,7 +1630,9 @@ special_insn:
1591 emulate_push(ctxt); 1630 emulate_push(ctxt);
1592 break; 1631 break;
1593 case 0x9d: /* popf */ 1632 case 0x9d: /* popf */
1633 c->dst.type = OP_REG;
1594 c->dst.ptr = (unsigned long *) &ctxt->eflags; 1634 c->dst.ptr = (unsigned long *) &ctxt->eflags;
1635 c->dst.bytes = c->op_bytes;
1595 goto pop_instruction; 1636 goto pop_instruction;
1596 case 0xa0 ... 0xa1: /* mov */ 1637 case 0xa0 ... 0xa1: /* mov */
1597 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 1638 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
@@ -1689,7 +1730,9 @@ special_insn:
1689 emulate_grp2(ctxt); 1730 emulate_grp2(ctxt);
1690 break; 1731 break;
1691 case 0xc3: /* ret */ 1732 case 0xc3: /* ret */
1733 c->dst.type = OP_REG;
1692 c->dst.ptr = &c->eip; 1734 c->dst.ptr = &c->eip;
1735 c->dst.bytes = c->op_bytes;
1693 goto pop_instruction; 1736 goto pop_instruction;
1694 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ 1737 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
1695 mov: 1738 mov:
@@ -1778,7 +1821,7 @@ special_insn:
1778 c->eip = saved_eip; 1821 c->eip = saved_eip;
1779 goto cannot_emulate; 1822 goto cannot_emulate;
1780 } 1823 }
1781 return 0; 1824 break;
1782 case 0xf4: /* hlt */ 1825 case 0xf4: /* hlt */
1783 ctxt->vcpu->arch.halt_request = 1; 1826 ctxt->vcpu->arch.halt_request = 1;
1784 break; 1827 break;
@@ -1999,12 +2042,20 @@ twobyte_insn:
1999 c->src.val &= (c->dst.bytes << 3) - 1; 2042 c->src.val &= (c->dst.bytes << 3) - 1;
2000 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); 2043 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
2001 break; 2044 break;
2045 case 0xa4: /* shld imm8, r, r/m */
2046 case 0xa5: /* shld cl, r, r/m */
2047 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
2048 break;
2002 case 0xab: 2049 case 0xab:
2003 bts: /* bts */ 2050 bts: /* bts */
2004 /* only subword offset */ 2051 /* only subword offset */
2005 c->src.val &= (c->dst.bytes << 3) - 1; 2052 c->src.val &= (c->dst.bytes << 3) - 1;
2006 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); 2053 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
2007 break; 2054 break;
2055 case 0xac: /* shrd imm8, r, r/m */
2056 case 0xad: /* shrd cl, r, r/m */
2057 emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
2058 break;
2008 case 0xae: /* clflush */ 2059 case 0xae: /* clflush */
2009 break; 2060 break;
2010 case 0xb0 ... 0xb1: /* cmpxchg */ 2061 case 0xb0 ... 0xb1: /* cmpxchg */
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 50a779264bb1..a7ed208f81e3 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -738,7 +738,7 @@ static void lguest_time_init(void)
738 738
739 /* We can't set cpumask in the initializer: damn C limitations! Set it 739 /* We can't set cpumask in the initializer: damn C limitations! Set it
740 * here and register our timer device. */ 740 * here and register our timer device. */
741 lguest_clockevent.cpumask = cpumask_of_cpu(0); 741 lguest_clockevent.cpumask = cpumask_of(0);
742 clockevents_register_device(&lguest_clockevent); 742 clockevents_register_device(&lguest_clockevent);
743 743
744 /* Finally, we unblock the timer interrupt. */ 744 /* Finally, we unblock the timer interrupt. */
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 37b9ae4d44c5..df167f265622 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -133,29 +133,28 @@ void __init time_init_hook(void)
133 **/ 133 **/
134void mca_nmi_hook(void) 134void mca_nmi_hook(void)
135{ 135{
136 /* If I recall correctly, there's a whole bunch of other things that 136 /*
137 * If I recall correctly, there's a whole bunch of other things that
137 * we can do to check for NMI problems, but that's all I know about 138 * we can do to check for NMI problems, but that's all I know about
138 * at the moment. 139 * at the moment.
139 */ 140 */
140 141 pr_warning("NMI generated from unknown source!\n");
141 printk("NMI generated from unknown source!\n");
142} 142}
143#endif 143#endif
144 144
145static __init int no_ipi_broadcast(char *str) 145static __init int no_ipi_broadcast(char *str)
146{ 146{
147 get_option(&str, &no_broadcast); 147 get_option(&str, &no_broadcast);
148 printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : 148 pr_info("Using %s mode\n",
149 "IPI Broadcast"); 149 no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
150 return 1; 150 return 1;
151} 151}
152
153__setup("no_ipi_broadcast=", no_ipi_broadcast); 152__setup("no_ipi_broadcast=", no_ipi_broadcast);
154 153
155static int __init print_ipi_mode(void) 154static int __init print_ipi_mode(void)
156{ 155{
157 printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : 156 pr_info("Using IPI %s mode\n",
158 "Shortcut"); 157 no_broadcast ? "No-Shortcut" : "Shortcut");
159 return 0; 158 return 0;
160} 159}
161 160
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3624a364b7f3..bc4c7840b2a8 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -42,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
42 { } 42 { }
43}; 43};
44 44
45static cpumask_t vector_allocation_domain(int cpu) 45static void vector_allocation_domain(int cpu, cpumask_t *retmask)
46{ 46{
47 return cpumask_of_cpu(cpu); 47 cpus_clear(*retmask);
48 cpu_set(cpu, *retmask);
48} 49}
49 50
50static int probe_bigsmp(void) 51static int probe_bigsmp(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 7b4e6d0d1690..4ba5ccaa1584 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
87} 87}
88#endif 88#endif
89 89
90static cpumask_t vector_allocation_domain(int cpu) 90static void vector_allocation_domain(int cpu, cpumask_t *retmask)
91{ 91{
92 /* Careful. Some cpus do not strictly honor the set of cpus 92 /* Careful. Some cpus do not strictly honor the set of cpus
93 * specified in the interrupt destination when using lowest 93 * specified in the interrupt destination when using lowest
@@ -97,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu)
97 * deliver interrupts to the wrong hyperthread when only one 97 * deliver interrupts to the wrong hyperthread when only one
98 * hyperthread was specified in the interrupt desitination. 98 * hyperthread was specified in the interrupt desitination.
99 */ 99 */
100 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 100 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
101 return domain;
102} 101}
103 102
104struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); 103struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 71a309b122e6..511d7941364f 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
38 return 0; 38 return 0;
39} 39}
40 40
41static cpumask_t vector_allocation_domain(int cpu) 41static void vector_allocation_domain(int cpu, cpumask_t *retmask)
42{ 42{
43 /* Careful. Some cpus do not strictly honor the set of cpus 43 /* Careful. Some cpus do not strictly honor the set of cpus
44 * specified in the interrupt destination when using lowest 44 * specified in the interrupt destination when using lowest
@@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu)
48 * deliver interrupts to the wrong hyperthread when only one 48 * deliver interrupts to the wrong hyperthread when only one
49 * hyperthread was specified in the interrupt desitination. 49 * hyperthread was specified in the interrupt desitination.
50 */ 50 */
51 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 51 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
52 return domain;
53} 52}
54 53
55struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); 54struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 2c6d234e0009..2821ffc188b5 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -24,7 +24,7 @@ static int probe_summit(void)
24 return 0; 24 return 0;
25} 25}
26 26
27static cpumask_t vector_allocation_domain(int cpu) 27static void vector_allocation_domain(int cpu, cpumask_t *retmask)
28{ 28{
29 /* Careful. Some cpus do not strictly honor the set of cpus 29 /* Careful. Some cpus do not strictly honor the set of cpus
30 * specified in the interrupt destination when using lowest 30 * specified in the interrupt destination when using lowest
@@ -34,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu)
34 * deliver interrupts to the wrong hyperthread when only one 34 * deliver interrupts to the wrong hyperthread when only one
35 * hyperthread was specified in the interrupt desitination. 35 * hyperthread was specified in the interrupt desitination.
36 */ 36 */
37 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 37 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
38 return domain;
39} 38}
40 39
41struct genapic apic_summit = APIC_INIT("summit", probe_summit); 40struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 52145007bd7e..a5bc05492b1e 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -63,11 +63,6 @@ static int voyager_extended_cpus = 1;
63/* Used for the invalidate map that's also checked in the spinlock */ 63/* Used for the invalidate map that's also checked in the spinlock */
64static volatile unsigned long smp_invalidate_needed; 64static volatile unsigned long smp_invalidate_needed;
65 65
66/* Bitmask of currently online CPUs - used by setup.c for
67 /proc/cpuinfo, visible externally but still physical */
68cpumask_t cpu_online_map = CPU_MASK_NONE;
69EXPORT_SYMBOL(cpu_online_map);
70
71/* Bitmask of CPUs present in the system - exported by i386_syms.c, used 66/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
72 * by scheduler but indexed physically */ 67 * by scheduler but indexed physically */
73cpumask_t phys_cpu_present_map = CPU_MASK_NONE; 68cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
@@ -218,8 +213,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
218/* This is for the new dynamic CPU boot code */ 213/* This is for the new dynamic CPU boot code */
219cpumask_t cpu_callin_map = CPU_MASK_NONE; 214cpumask_t cpu_callin_map = CPU_MASK_NONE;
220cpumask_t cpu_callout_map = CPU_MASK_NONE; 215cpumask_t cpu_callout_map = CPU_MASK_NONE;
221cpumask_t cpu_possible_map = CPU_MASK_NONE;
222EXPORT_SYMBOL(cpu_possible_map);
223 216
224/* The per processor IRQ masks (these are usually kept in sync) */ 217/* The per processor IRQ masks (these are usually kept in sync) */
225static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; 218static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
@@ -679,7 +672,7 @@ void __init smp_boot_cpus(void)
679 672
680 /* loop over all the extended VIC CPUs and boot them. The 673 /* loop over all the extended VIC CPUs and boot them. The
681 * Quad CPUs must be bootstrapped by their extended VIC cpu */ 674 * Quad CPUs must be bootstrapped by their extended VIC cpu */
682 for (i = 0; i < NR_CPUS; i++) { 675 for (i = 0; i < nr_cpu_ids; i++) {
683 if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) 676 if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
684 continue; 677 continue;
685 do_boot_cpu(i); 678 do_boot_cpu(i);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8655b5bb0963..f99a6c6c432e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -435,8 +435,12 @@ static void __init set_highmem_pages_init(void)
435#endif /* !CONFIG_NUMA */ 435#endif /* !CONFIG_NUMA */
436 436
437#else 437#else
438# define permanent_kmaps_init(pgd_base) do { } while (0) 438static inline void permanent_kmaps_init(pgd_t *pgd_base)
439# define set_highmem_pages_init() do { } while (0) 439{
440}
441static inline void set_highmem_pages_init(void)
442{
443}
440#endif /* CONFIG_HIGHMEM */ 444#endif /* CONFIG_HIGHMEM */
441 445
442void __init native_pagetable_setup_start(pgd_t *base) 446void __init native_pagetable_setup_start(pgd_t *base)
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index cebcbf152d46..71a14f89f89e 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@ void __init numa_init_array(void)
278 int rr, i; 278 int rr, i;
279 279
280 rr = first_node(node_online_map); 280 rr = first_node(node_online_map);
281 for (i = 0; i < NR_CPUS; i++) { 281 for (i = 0; i < nr_cpu_ids; i++) {
282 if (early_cpu_to_node(i) != NUMA_NO_NODE) 282 if (early_cpu_to_node(i) != NUMA_NO_NODE)
283 continue; 283 continue;
284 numa_set_node(i, rr); 284 numa_set_node(i, rr);
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
549 memnodemap[0] = 0; 549 memnodemap[0] = 0;
550 node_set_online(0); 550 node_set_online(0);
551 node_set(0, node_possible_map); 551 node_set(0, node_possible_map);
552 for (i = 0; i < NR_CPUS; i++) 552 for (i = 0; i < nr_cpu_ids; i++)
553 numa_set_node(i, 0); 553 numa_set_node(i, 0);
554 e820_register_active_regions(0, start_pfn, last_pfn); 554 e820_register_active_regions(0, start_pfn, last_pfn);
555 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); 555 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51c0a2fc14fe..09737c8af074 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
382 if (!node_online(i)) 382 if (!node_online(i))
383 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 383 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
384 384
385 for (i = 0; i < NR_CPUS; i++) { 385 for (i = 0; i < nr_cpu_ids; i++) {
386 int node = early_cpu_to_node(i); 386 int node = early_cpu_to_node(i);
387 387
388 if (node == NUMA_NO_NODE) 388 if (node == NUMA_NO_NODE)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 1d88d2b39771..9e5752fe4d15 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -4,7 +4,7 @@
4#include <linux/irq.h> 4#include <linux/irq.h>
5#include <linux/dmi.h> 5#include <linux/dmi.h>
6#include <asm/numa.h> 6#include <asm/numa.h>
7#include "pci.h" 7#include <asm/pci_x86.h>
8 8
9struct pci_root_info { 9struct pci_root_info {
10 char *name; 10 char *name;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 22e057665e55..9bb09823b362 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,7 +2,7 @@
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <linux/topology.h> 3#include <linux/topology.h>
4#include <linux/cpu.h> 4#include <linux/cpu.h>
5#include "pci.h" 5#include <asm/pci_x86.h>
6 6
7#ifdef CONFIG_X86_64 7#ifdef CONFIG_X86_64
8#include <asm/pci-direct.h> 8#include <asm/pci-direct.h>
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index bb1a01f089e2..62ddb73e09ed 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -14,8 +14,7 @@
14#include <asm/segment.h> 14#include <asm/segment.h>
15#include <asm/io.h> 15#include <asm/io.h>
16#include <asm/smp.h> 16#include <asm/smp.h>
17 17#include <asm/pci_x86.h>
18#include "pci.h"
19 18
20unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | 19unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
21 PCI_PROBE_MMCONF; 20 PCI_PROBE_MMCONF;
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9a5af6c8fbe9..bd13c3e4c6db 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -5,7 +5,7 @@
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/dmi.h> 7#include <linux/dmi.h>
8#include "pci.h" 8#include <asm/pci_x86.h>
9 9
10/* 10/*
11 * Functions for accessing PCI base (first 256 bytes) and extended 11 * Functions for accessing PCI base (first 256 bytes) and extended
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 86631ccbc25a..f6adf2c6d751 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -2,7 +2,7 @@
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <asm/pci-direct.h> 3#include <asm/pci-direct.h>
4#include <asm/io.h> 4#include <asm/io.h>
5#include "pci.h" 5#include <asm/pci_x86.h>
6 6
7/* Direct PCI access. This is used for PCI accesses in early boot before 7/* Direct PCI access. This is used for PCI accesses in early boot before
8 the PCI subsystem works. */ 8 the PCI subsystem works. */
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 2051dc96b8e9..7d388d5cf548 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -6,8 +6,7 @@
6#include <linux/dmi.h> 6#include <linux/dmi.h>
7#include <linux/pci.h> 7#include <linux/pci.h>
8#include <linux/init.h> 8#include <linux/init.h>
9#include "pci.h" 9#include <asm/pci_x86.h>
10
11 10
12static void __devinit pci_fixup_i450nx(struct pci_dev *d) 11static void __devinit pci_fixup_i450nx(struct pci_dev *d)
13{ 12{
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 844df0cbbd3e..e51bf2cda4b0 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -34,8 +34,8 @@
34 34
35#include <asm/pat.h> 35#include <asm/pat.h>
36#include <asm/e820.h> 36#include <asm/e820.h>
37#include <asm/pci_x86.h>
37 38
38#include "pci.h"
39 39
40static int 40static int
41skip_isa_ioresource_align(struct pci_dev *dev) { 41skip_isa_ioresource_align(struct pci_dev *dev) {
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index d6c950f81858..bec3b048e72b 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -1,6 +1,6 @@
1#include <linux/pci.h> 1#include <linux/pci.h>
2#include <linux/init.h> 2#include <linux/init.h>
3#include "pci.h" 3#include <asm/pci_x86.h>
4 4
5/* arch_initcall has too random ordering, so call the initializers 5/* arch_initcall has too random ordering, so call the initializers
6 in the right sequence from here. */ 6 in the right sequence from here. */
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index bf69dbe08bff..373b9afe6d44 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -16,8 +16,7 @@
16#include <asm/io_apic.h> 16#include <asm/io_apic.h>
17#include <linux/irq.h> 17#include <linux/irq.h>
18#include <linux/acpi.h> 18#include <linux/acpi.h>
19 19#include <asm/pci_x86.h>
20#include "pci.h"
21 20
22#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) 21#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
23#define PIRQ_VERSION 0x0100 22#define PIRQ_VERSION 0x0100
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index b722dd481b39..f1065b129e9c 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -3,7 +3,7 @@
3 */ 3 */
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include "pci.h" 6#include <asm/pci_x86.h>
7 7
8/* 8/*
9 * Discover remaining PCI buses in case there are peer host bridges. 9 * Discover remaining PCI buses in case there are peer host bridges.
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 654a2234f8f3..89bf9242c80a 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -15,8 +15,7 @@
15#include <linux/acpi.h> 15#include <linux/acpi.h>
16#include <linux/bitmap.h> 16#include <linux/bitmap.h>
17#include <asm/e820.h> 17#include <asm/e820.h>
18 18#include <asm/pci_x86.h>
19#include "pci.h"
20 19
21/* aperture is up to 256MB but BIOS may reserve less */ 20/* aperture is up to 256MB but BIOS may reserve less */
22#define MMCONFIG_APER_MIN (2 * 1024*1024) 21#define MMCONFIG_APER_MIN (2 * 1024*1024)
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index f3c761dce695..8b2d561046a3 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -13,7 +13,7 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/acpi.h> 14#include <linux/acpi.h>
15#include <asm/e820.h> 15#include <asm/e820.h>
16#include "pci.h" 16#include <asm/pci_x86.h>
17 17
18/* Assume systems with more busses have correct MCFG */ 18/* Assume systems with more busses have correct MCFG */
19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) 19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index a1994163c99d..30007ffc8e11 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -10,8 +10,7 @@
10#include <linux/acpi.h> 10#include <linux/acpi.h>
11#include <linux/bitmap.h> 11#include <linux/bitmap.h>
12#include <asm/e820.h> 12#include <asm/e820.h>
13 13#include <asm/pci_x86.h>
14#include "pci.h"
15 14
16/* Static virtual mapping of the MMCONFIG aperture */ 15/* Static virtual mapping of the MMCONFIG aperture */
17struct mmcfg_virt { 16struct mmcfg_virt {
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 1177845d3186..2089354968a2 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -7,7 +7,7 @@
7#include <linux/nodemask.h> 7#include <linux/nodemask.h>
8#include <mach_apic.h> 8#include <mach_apic.h>
9#include <asm/mpspec.h> 9#include <asm/mpspec.h>
10#include "pci.h" 10#include <asm/pci_x86.h>
11 11
12#define XQUAD_PORTIO_BASE 0xfe400000 12#define XQUAD_PORTIO_BASE 0xfe400000
13#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ 13#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index e11e9e803d5f..b889d824f7c6 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -29,7 +29,7 @@
29#include <linux/init.h> 29#include <linux/init.h>
30#include <asm/olpc.h> 30#include <asm/olpc.h>
31#include <asm/geode.h> 31#include <asm/geode.h>
32#include "pci.h" 32#include <asm/pci_x86.h>
33 33
34/* 34/*
35 * In the tables below, the first two line (8 longwords) are the 35 * In the tables below, the first two line (8 longwords) are the
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 37472fc6f729..b82cae970dfd 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -6,9 +6,8 @@
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/uaccess.h> 8#include <linux/uaccess.h>
9#include "pci.h" 9#include <asm/pci_x86.h>
10#include "pci-functions.h" 10#include <asm/mach-default/pci-functions.h>
11
12 11
13/* BIOS32 signature: "_32_" */ 12/* BIOS32 signature: "_32_" */
14#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) 13#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 42f4cb19faca..16d0c0eb0d19 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -9,11 +9,10 @@
9#include <linux/init.h> 9#include <linux/init.h>
10 10
11#include <asm/setup.h> 11#include <asm/setup.h>
12#include <asm/pci_x86.h>
12#include <asm/visws/cobalt.h> 13#include <asm/visws/cobalt.h>
13#include <asm/visws/lithium.h> 14#include <asm/visws/lithium.h>
14 15
15#include "pci.h"
16
17static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } 16static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
18static void pci_visws_disable_irq(struct pci_dev *dev) { } 17static void pci_visws_disable_irq(struct pci_dev *dev) { }
19 18
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 773d68d3e912..503c240e26c7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1082,7 +1082,7 @@ static void drop_other_mm_ref(void *info)
1082 1082
1083static void xen_drop_mm_ref(struct mm_struct *mm) 1083static void xen_drop_mm_ref(struct mm_struct *mm)
1084{ 1084{
1085 cpumask_t mask; 1085 cpumask_var_t mask;
1086 unsigned cpu; 1086 unsigned cpu;
1087 1087
1088 if (current->active_mm == mm) { 1088 if (current->active_mm == mm) {
@@ -1094,7 +1094,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1094 } 1094 }
1095 1095
1096 /* Get the "official" set of cpus referring to our pagetable. */ 1096 /* Get the "official" set of cpus referring to our pagetable. */
1097 mask = mm->cpu_vm_mask; 1097 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
1098 for_each_online_cpu(cpu) {
1099 if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
1100 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
1101 continue;
1102 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
1103 }
1104 return;
1105 }
1106 cpumask_copy(mask, &mm->cpu_vm_mask);
1098 1107
1099 /* It's possible that a vcpu may have a stale reference to our 1108 /* It's possible that a vcpu may have a stale reference to our
1100 cr3, because its in lazy mode, and it hasn't yet flushed 1109 cr3, because its in lazy mode, and it hasn't yet flushed
@@ -1103,11 +1112,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1103 if needed. */ 1112 if needed. */
1104 for_each_online_cpu(cpu) { 1113 for_each_online_cpu(cpu) {
1105 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) 1114 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
1106 cpu_set(cpu, mask); 1115 cpumask_set_cpu(cpu, mask);
1107 } 1116 }
1108 1117
1109 if (!cpus_empty(mask)) 1118 if (!cpumask_empty(mask))
1110 smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); 1119 smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
1120 free_cpumask_var(mask);
1111} 1121}
1112#else 1122#else
1113static void xen_drop_mm_ref(struct mm_struct *mm) 1123static void xen_drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index acd9b6705e02..c44e2069c7c7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -33,7 +33,7 @@
33#include "xen-ops.h" 33#include "xen-ops.h"
34#include "mmu.h" 34#include "mmu.h"
35 35
36cpumask_t xen_cpu_initialized_map; 36cpumask_var_t xen_cpu_initialized_map;
37 37
38static DEFINE_PER_CPU(int, resched_irq); 38static DEFINE_PER_CPU(int, resched_irq);
39static DEFINE_PER_CPU(int, callfunc_irq); 39static DEFINE_PER_CPU(int, callfunc_irq);
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void)
158{ 158{
159 int i, rc; 159 int i, rc;
160 160
161 for (i = 0; i < NR_CPUS; i++) { 161 for (i = 0; i < nr_cpu_ids; i++) {
162 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); 162 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
163 if (rc >= 0) { 163 if (rc >= 0) {
164 num_processors++; 164 num_processors++;
@@ -192,11 +192,14 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
192 if (xen_smp_intr_init(0)) 192 if (xen_smp_intr_init(0))
193 BUG(); 193 BUG();
194 194
195 xen_cpu_initialized_map = cpumask_of_cpu(0); 195 if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
196 panic("could not allocate xen_cpu_initialized_map\n");
197
198 cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
196 199
197 /* Restrict the possible_map according to max_cpus. */ 200 /* Restrict the possible_map according to max_cpus. */
198 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { 201 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
199 for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) 202 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
200 continue; 203 continue;
201 cpu_clear(cpu, cpu_possible_map); 204 cpu_clear(cpu, cpu_possible_map);
202 } 205 }
@@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
221 struct vcpu_guest_context *ctxt; 224 struct vcpu_guest_context *ctxt;
222 struct desc_struct *gdt; 225 struct desc_struct *gdt;
223 226
224 if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) 227 if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
225 return 0; 228 return 0;
226 229
227 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 230 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
@@ -408,24 +411,23 @@ static void xen_smp_send_reschedule(int cpu)
408 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); 411 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
409} 412}
410 413
411static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) 414static void xen_send_IPI_mask(const struct cpumask *mask,
415 enum ipi_vector vector)
412{ 416{
413 unsigned cpu; 417 unsigned cpu;
414 418
415 cpus_and(mask, mask, cpu_online_map); 419 for_each_cpu_and(cpu, mask, cpu_online_mask)
416
417 for_each_cpu_mask_nr(cpu, mask)
418 xen_send_IPI_one(cpu, vector); 420 xen_send_IPI_one(cpu, vector);
419} 421}
420 422
421static void xen_smp_send_call_function_ipi(cpumask_t mask) 423static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
422{ 424{
423 int cpu; 425 int cpu;
424 426
425 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); 427 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
426 428
427 /* Make sure other vcpus get a chance to run if they need to. */ 429 /* Make sure other vcpus get a chance to run if they need to. */
428 for_each_cpu_mask_nr(cpu, mask) { 430 for_each_cpu(cpu, mask) {
429 if (xen_vcpu_stolen(cpu)) { 431 if (xen_vcpu_stolen(cpu)) {
430 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 432 HYPERVISOR_sched_op(SCHEDOP_yield, 0);
431 break; 433 break;
@@ -435,7 +437,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask)
435 437
436static void xen_smp_send_call_function_single_ipi(int cpu) 438static void xen_smp_send_call_function_single_ipi(int cpu)
437{ 439{
438 xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); 440 xen_send_IPI_mask(cpumask_of(cpu),
441 XEN_CALL_FUNCTION_SINGLE_VECTOR);
439} 442}
440 443
441static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) 444static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 2a234db5949b..212ffe012b76 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled)
35 pfn_to_mfn(xen_start_info->console.domU.mfn); 35 pfn_to_mfn(xen_start_info->console.domU.mfn);
36 } else { 36 } else {
37#ifdef CONFIG_SMP 37#ifdef CONFIG_SMP
38 xen_cpu_initialized_map = cpu_online_map; 38 BUG_ON(xen_cpu_initialized_map == NULL);
39 cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
39#endif 40#endif
40 xen_vcpu_restore(); 41 xen_vcpu_restore();
41 } 42 }
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda48ed7..14f240623497 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -132,8 +132,7 @@ static void do_stolen_accounting(void)
132 *snap = state; 132 *snap = state;
133 133
134 /* Add the appropriate number of ticks of stolen time, 134 /* Add the appropriate number of ticks of stolen time,
135 including any left-overs from last time. Passing NULL to 135 including any left-overs from last time. */
136 account_steal_time accounts the time as stolen. */
137 stolen = runnable + offline + __get_cpu_var(residual_stolen); 136 stolen = runnable + offline + __get_cpu_var(residual_stolen);
138 137
139 if (stolen < 0) 138 if (stolen < 0)
@@ -141,11 +140,10 @@ static void do_stolen_accounting(void)
141 140
142 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); 141 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
143 __get_cpu_var(residual_stolen) = stolen; 142 __get_cpu_var(residual_stolen) = stolen;
144 account_steal_time(NULL, ticks); 143 account_steal_ticks(ticks);
145 144
146 /* Add the appropriate number of ticks of blocked time, 145 /* Add the appropriate number of ticks of blocked time,
147 including any left-overs from last time. Passing idle to 146 including any left-overs from last time. */
148 account_steal_time accounts the time as idle/wait. */
149 blocked += __get_cpu_var(residual_blocked); 147 blocked += __get_cpu_var(residual_blocked);
150 148
151 if (blocked < 0) 149 if (blocked < 0)
@@ -153,7 +151,7 @@ static void do_stolen_accounting(void)
153 151
154 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); 152 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
155 __get_cpu_var(residual_blocked) = blocked; 153 __get_cpu_var(residual_blocked) = blocked;
156 account_steal_time(idle_task(smp_processor_id()), ticks); 154 account_idle_ticks(ticks);
157} 155}
158 156
159/* 157/*
@@ -437,7 +435,7 @@ void xen_setup_timer(int cpu)
437 evt = &per_cpu(xen_clock_events, cpu); 435 evt = &per_cpu(xen_clock_events, cpu);
438 memcpy(evt, xen_clockevent, sizeof(*evt)); 436 memcpy(evt, xen_clockevent, sizeof(*evt));
439 437
440 evt->cpumask = cpumask_of_cpu(cpu); 438 evt->cpumask = cpumask_of(cpu);
441 evt->irq = irq; 439 evt->irq = irq;
442 440
443 setup_runstate_info(cpu); 441 setup_runstate_info(cpu);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9e1afae8461f..c1f8faf0a2c5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void);
58__cpuinit void xen_init_lock_cpu(int cpu); 58__cpuinit void xen_init_lock_cpu(int cpu);
59void xen_uninit_lock_cpu(int cpu); 59void xen_uninit_lock_cpu(int cpu);
60 60
61extern cpumask_t xen_cpu_initialized_map; 61extern cpumask_var_t xen_cpu_initialized_map;
62#else 62#else
63static inline void xen_smp_init(void) {} 63static inline void xen_smp_init(void) {}
64#endif 64#endif